1/* 2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5 * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 * 35 */ 36 37/* 38 * Abstract: 39 * Implementation of osm_pr_rcv_t. 40 * This object represents the PathRecord Receiver object. 41 * This object is part of the opensm family of objects. 42 */ 43 44#if HAVE_CONFIG_H 45# include <config.h> 46#endif /* HAVE_CONFIG_H */ 47 48#include <string.h> 49#include <arpa/inet.h> 50#include <iba/ib_types.h> 51#include <complib/cl_qmap.h> 52#include <complib/cl_passivelock.h> 53#include <complib/cl_debug.h> 54#include <complib/cl_qlist.h> 55#include <vendor/osm_vendor_api.h> 56#include <opensm/osm_base.h> 57#include <opensm/osm_port.h> 58#include <opensm/osm_node.h> 59#include <opensm/osm_switch.h> 60#include <opensm/osm_helper.h> 61#include <opensm/osm_pkey.h> 62#include <opensm/osm_multicast.h> 63#include <opensm/osm_partition.h> 64#include <opensm/osm_opensm.h> 65#include <opensm/osm_qos_policy.h> 66#include <opensm/osm_sa.h> 67#include <opensm/osm_router.h> 68#include <opensm/osm_prefix_route.h> 69 70#include <sys/socket.h> 71 72extern uint8_t osm_get_lash_sl(osm_opensm_t * p_osm, 73 const osm_port_t * p_src_port, 74 const osm_port_t * p_dst_port); 75 76typedef struct osm_pr_item { 77 cl_list_item_t list_item; 78 ib_path_rec_t path_rec; 79} osm_pr_item_t; 80 81typedef struct osm_path_parms { 82 ib_net16_t pkey; 83 uint8_t mtu; 84 uint8_t rate; 85 uint8_t sl; 86 uint8_t pkt_life; 87 boolean_t reversible; 88} osm_path_parms_t; 89 90static const ib_gid_t zero_gid = { {0x00, 0x00, 0x00, 0x00, 91 0x00, 0x00, 0x00, 0x00, 92 0x00, 0x00, 0x00, 0x00, 93 0x00, 0x00, 0x00, 0x00}, 94}; 95 96/********************************************************************** 97 **********************************************************************/ 98static inline boolean_t 99__osm_sa_path_rec_is_tavor_port(IN const osm_port_t * const p_port) 100{ 101 osm_node_t const *p_node; 102 ib_net32_t vend_id; 103 104 p_node = p_port->p_node; 105 vend_id = ib_node_info_get_vendor_id(&p_node->node_info); 106 107 return ((p_node->node_info.device_id == CL_HTON16(23108)) && 108 ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) || 109 (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) || 110 (vend_id == CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) || 111 (vend_id == CL_HTON32(OSM_VENDOR_ID_VOLTAIRE)))); 112} 113 114/********************************************************************** 115 **********************************************************************/ 116static boolean_t 117__osm_sa_path_rec_apply_tavor_mtu_limit(IN const ib_path_rec_t * const p_pr, 118 IN const osm_port_t * const p_src_port, 119 IN const osm_port_t * const p_dest_port, 120 IN const ib_net64_t comp_mask) 121{ 122 uint8_t required_mtu; 123 124 /* only if at least one of the ports is a Tavor device */ 125 if (!__osm_sa_path_rec_is_tavor_port(p_src_port) && 126 !__osm_sa_path_rec_is_tavor_port(p_dest_port)) 127 return (FALSE); 128 129 /* 130 we can apply the patch if either: 131 1. No MTU required 132 2. Required MTU < 133 3. Required MTU = 1K or 512 or 256 134 4. Required MTU > 256 or 512 135 */ 136 required_mtu = ib_path_rec_mtu(p_pr); 137 if ((comp_mask & IB_PR_COMPMASK_MTUSELEC) && 138 (comp_mask & IB_PR_COMPMASK_MTU)) { 139 switch (ib_path_rec_mtu_sel(p_pr)) { 140 case 0: /* must be greater than */ 141 case 2: /* exact match */ 142 if (IB_MTU_LEN_1024 < required_mtu) 143 return (FALSE); 144 break; 145 146 case 1: /* must be less than */ 147 /* can't be disqualified by this one */ 148 break; 149 150 case 3: /* largest available */ 151 /* the ULP intentionally requested */ 152 /* the largest MTU possible */ 153 return (FALSE); 154 break; 155 156 default: 157 /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ 158 CL_ASSERT(FALSE); 159 break; 160 } 161 } 162 163 return (TRUE); 164} 165 166/********************************************************************** 167 **********************************************************************/ 168static ib_api_status_t 169__osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, 170 IN const ib_path_rec_t * const p_pr, 171 IN const osm_port_t * const p_src_port, 172 IN const osm_port_t * const p_dest_port, 173 IN const uint16_t dest_lid_ho, 174 IN const ib_net64_t comp_mask, 175 OUT osm_path_parms_t * const p_parms) 176{ 177 const osm_node_t *p_node; 178 const osm_physp_t *p_physp; 179 const osm_physp_t *p_src_physp; 180 const osm_physp_t *p_dest_physp; 181 const osm_prtn_t *p_prtn = NULL; 182 osm_opensm_t *p_osm; 183 const ib_port_info_t *p_pi; 184 ib_api_status_t status = IB_SUCCESS; 185 ib_net16_t pkey; 186 uint8_t mtu; 187 uint8_t rate; 188 uint8_t pkt_life; 189 uint8_t required_mtu; 190 uint8_t required_rate; 191 uint8_t required_pkt_life; 192 uint8_t sl; 193 uint8_t in_port_num; 194 ib_net16_t dest_lid; 195 uint8_t i; 196 ib_slvl_table_t *p_slvl_tbl = NULL; 197 osm_qos_level_t *p_qos_level = NULL; 198 uint16_t valid_sl_mask = 0xffff; 199 int is_lash; 200 201 OSM_LOG_ENTER(sa->p_log); 202 203 dest_lid = cl_hton16(dest_lid_ho); 204 205 p_dest_physp = p_dest_port->p_physp; 206 p_physp = p_src_port->p_physp; 207 p_src_physp = p_physp; 208 p_pi = &p_physp->port_info; 209 p_osm = sa->p_subn->p_osm; 210 211 mtu = ib_port_info_get_mtu_cap(p_pi); 212 rate = ib_port_info_compute_rate(p_pi); 213 214 /* 215 Mellanox Tavor device performance is better using 1K MTU. 216 If required MTU and MTU selector are such that 1K is OK 217 and at least one end of the path is Tavor we override the 218 port MTU with 1K. 219 */ 220 if (sa->p_subn->opt.enable_quirks && 221 __osm_sa_path_rec_apply_tavor_mtu_limit(p_pr, p_src_port, 222 p_dest_port, comp_mask)) 223 if (mtu > IB_MTU_LEN_1024) { 224 mtu = IB_MTU_LEN_1024; 225 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 226 "Optimized Path MTU to 1K for Mellanox Tavor device\n"); 227 } 228 229 /* 230 Walk the subnet object from source to destination, 231 tracking the most restrictive rate and mtu values along the way... 232 233 If source port node is a switch, then p_physp should 234 point to the port that routes the destination lid 235 */ 236 237 p_node = osm_physp_get_node_ptr(p_physp); 238 239 if (p_node->sw) { 240 /* 241 * Source node is a switch. 242 * Make sure that p_physp points to the out port of the 243 * switch that routes to the destination lid (dest_lid_ho) 244 */ 245 p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); 246 if (p_physp == 0) { 247 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F02: " 248 "Cannot find routing to LID %u from switch for GUID 0x%016" 249 PRIx64 "\n", dest_lid_ho, 250 cl_ntoh64(osm_node_get_node_guid(p_node))); 251 status = IB_NOT_FOUND; 252 goto Exit; 253 } 254 } 255 256 if (sa->p_subn->opt.qos) { 257 258 /* 259 * Whether this node is switch or CA, the IN port for 260 * the sl2vl table is 0, because this is a source node. 261 */ 262 p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0); 263 264 /* update valid SLs that still exist on this route */ 265 for (i = 0; i < IB_MAX_NUM_VLS; i++) { 266 if (valid_sl_mask & (1 << i) && 267 ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL) 268 valid_sl_mask &= ~(1 << i); 269 } 270 if (!valid_sl_mask) { 271 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 272 "All the SLs lead to VL15 on this path\n"); 273 status = IB_NOT_FOUND; 274 goto Exit; 275 } 276 } 277 278 /* 279 * Same as above 280 */ 281 p_node = osm_physp_get_node_ptr(p_dest_physp); 282 283 if (p_node->sw) { 284 /* 285 * if destination is switch, we want p_dest_physp to point to port 0 286 */ 287 p_dest_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); 288 289 if (p_dest_physp == 0) { 290 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F03: " 291 "Cannot find routing to LID %u from switch for GUID 0x%016" 292 PRIx64 "\n", dest_lid_ho, 293 cl_ntoh64(osm_node_get_node_guid(p_node))); 294 status = IB_NOT_FOUND; 295 goto Exit; 296 } 297 298 } 299 300 /* 301 * Now go through the path step by step 302 */ 303 304 while (p_physp != p_dest_physp) { 305 306 p_node = osm_physp_get_node_ptr(p_physp); 307 p_physp = osm_physp_get_remote(p_physp); 308 309 if (p_physp == 0) { 310 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F05: " 311 "Cannot find remote phys port when routing to LID %u from node GUID 0x%016" 312 PRIx64 "\n", dest_lid_ho, 313 cl_ntoh64(osm_node_get_node_guid(p_node))); 314 status = IB_ERROR; 315 goto Exit; 316 } 317 318 in_port_num = osm_physp_get_port_num(p_physp); 319 320 /* 321 This is point to point case (no switch in between) 322 */ 323 if (p_physp == p_dest_physp) 324 break; 325 326 p_node = osm_physp_get_node_ptr(p_physp); 327 328 if (!p_node->sw) { 329 /* 330 There is some sort of problem in the subnet object! 331 If this isn't a switch, we should have reached 332 the destination by now! 333 */ 334 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F06: " 335 "Internal error, bad path\n"); 336 status = IB_ERROR; 337 goto Exit; 338 } 339 340 /* 341 Check parameters for the ingress port in this switch. 342 */ 343 p_pi = &p_physp->port_info; 344 345 if (mtu > ib_port_info_get_mtu_cap(p_pi)) 346 mtu = ib_port_info_get_mtu_cap(p_pi); 347 348 if (rate > ib_port_info_compute_rate(p_pi)) 349 rate = ib_port_info_compute_rate(p_pi); 350 351 /* 352 Continue with the egress port on this switch. 353 */ 354 p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); 355 if (p_physp == 0) { 356 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F07: " 357 "Dead end on path to LID %u from switch for GUID 0x%016" 358 PRIx64 "\n", dest_lid_ho, 359 cl_ntoh64(osm_node_get_node_guid(p_node))); 360 status = IB_ERROR; 361 goto Exit; 362 } 363 364 p_pi = &p_physp->port_info; 365 366 if (mtu > ib_port_info_get_mtu_cap(p_pi)) 367 mtu = ib_port_info_get_mtu_cap(p_pi); 368 369 if (rate > ib_port_info_compute_rate(p_pi)) 370 rate = ib_port_info_compute_rate(p_pi); 371 372 if (sa->p_subn->opt.qos) { 373 /* 374 * Check SL2VL table of the switch and update valid SLs 375 */ 376 p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num); 377 for (i = 0; i < IB_MAX_NUM_VLS; i++) { 378 if (valid_sl_mask & (1 << i) && 379 ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL) 380 valid_sl_mask &= ~(1 << i); 381 } 382 if (!valid_sl_mask) { 383 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "All the SLs " 384 "lead to VL15 on this path\n"); 385 status = IB_NOT_FOUND; 386 goto Exit; 387 } 388 } 389 } 390 391 /* 392 p_physp now points to the destination 393 */ 394 p_pi = &p_physp->port_info; 395 396 if (mtu > ib_port_info_get_mtu_cap(p_pi)) 397 mtu = ib_port_info_get_mtu_cap(p_pi); 398 399 if (rate > ib_port_info_compute_rate(p_pi)) 400 rate = ib_port_info_compute_rate(p_pi); 401 402 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 403 "Path min MTU = %u, min rate = %u\n", mtu, rate); 404 405 /* 406 * Get QoS Level object according to the path request 407 * and adjust path parameters according to QoS settings 408 */ 409 if (sa->p_subn->opt.qos && 410 sa->p_subn->p_qos_policy && 411 (p_qos_level = 412 osm_qos_policy_get_qos_level_by_pr(sa->p_subn->p_qos_policy, 413 p_pr, p_src_physp, p_dest_physp, 414 comp_mask))) { 415 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 416 "PathRecord request matches QoS Level '%s' (%s)\n", 417 p_qos_level->name, p_qos_level->use ? 418 p_qos_level->use : "no description"); 419 420 if (p_qos_level->mtu_limit_set 421 && (mtu > p_qos_level->mtu_limit)) 422 mtu = p_qos_level->mtu_limit; 423 424 if (p_qos_level->rate_limit_set 425 && (rate > p_qos_level->rate_limit)) 426 rate = p_qos_level->rate_limit; 427 428 if (p_qos_level->sl_set) { 429 sl = p_qos_level->sl; 430 if (!(valid_sl_mask & (1 << sl))) { 431 status = IB_NOT_FOUND; 432 goto Exit; 433 } 434 } 435 } 436 437 /* 438 * Set packet lifetime. 439 * According to spec definition IBA 1.2 Table 205 440 * PacketLifeTime description, for loopback paths, 441 * packetLifeTime shall be zero. 442 */ 443 if (p_src_port == p_dest_port) 444 pkt_life = 0; 445 else if (p_qos_level && p_qos_level->pkt_life_set) 446 pkt_life = p_qos_level->pkt_life; 447 else 448 pkt_life = sa->p_subn->opt.subnet_timeout; 449 450 /* 451 Determine if these values meet the user criteria 452 and adjust appropriately 453 */ 454 455 /* we silently ignore cases where only the MTU selector is defined */ 456 if ((comp_mask & IB_PR_COMPMASK_MTUSELEC) && 457 (comp_mask & IB_PR_COMPMASK_MTU)) { 458 required_mtu = ib_path_rec_mtu(p_pr); 459 switch (ib_path_rec_mtu_sel(p_pr)) { 460 case 0: /* must be greater than */ 461 if (mtu <= required_mtu) 462 status = IB_NOT_FOUND; 463 break; 464 465 case 1: /* must be less than */ 466 if (mtu >= required_mtu) { 467 /* adjust to use the highest mtu 468 lower then the required one */ 469 if (required_mtu > 1) 470 mtu = required_mtu - 1; 471 else 472 status = IB_NOT_FOUND; 473 } 474 break; 475 476 case 2: /* exact match */ 477 if (mtu < required_mtu) 478 status = IB_NOT_FOUND; 479 else 480 mtu = required_mtu; 481 break; 482 483 case 3: /* largest available */ 484 /* can't be disqualified by this one */ 485 break; 486 487 default: 488 /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ 489 CL_ASSERT(FALSE); 490 status = IB_ERROR; 491 break; 492 } 493 } 494 if (status != IB_SUCCESS) 495 goto Exit; 496 497 /* we silently ignore cases where only the Rate selector is defined */ 498 if ((comp_mask & IB_PR_COMPMASK_RATESELEC) && 499 (comp_mask & IB_PR_COMPMASK_RATE)) { 500 required_rate = ib_path_rec_rate(p_pr); 501 switch (ib_path_rec_rate_sel(p_pr)) { 502 case 0: /* must be greater than */ 503 if (rate <= required_rate) 504 status = IB_NOT_FOUND; 505 break; 506 507 case 1: /* must be less than */ 508 if (rate >= required_rate) { 509 /* adjust the rate to use the highest rate 510 lower then the required one */ 511 if (required_rate > 2) 512 rate = required_rate - 1; 513 else 514 status = IB_NOT_FOUND; 515 } 516 break; 517 518 case 2: /* exact match */ 519 if (rate < required_rate) 520 status = IB_NOT_FOUND; 521 else 522 rate = required_rate; 523 break; 524 525 case 3: /* largest available */ 526 /* can't be disqualified by this one */ 527 break; 528 529 default: 530 /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ 531 CL_ASSERT(FALSE); 532 status = IB_ERROR; 533 break; 534 } 535 } 536 if (status != IB_SUCCESS) 537 goto Exit; 538 539 /* we silently ignore cases where only the PktLife selector is defined */ 540 if ((comp_mask & IB_PR_COMPMASK_PKTLIFETIMESELEC) && 541 (comp_mask & IB_PR_COMPMASK_PKTLIFETIME)) { 542 required_pkt_life = ib_path_rec_pkt_life(p_pr); 543 switch (ib_path_rec_pkt_life_sel(p_pr)) { 544 case 0: /* must be greater than */ 545 if (pkt_life <= required_pkt_life) 546 status = IB_NOT_FOUND; 547 break; 548 549 case 1: /* must be less than */ 550 if (pkt_life >= required_pkt_life) { 551 /* adjust the lifetime to use the highest possible 552 lower then the required one */ 553 if (required_pkt_life > 1) 554 pkt_life = required_pkt_life - 1; 555 else 556 status = IB_NOT_FOUND; 557 } 558 break; 559 560 case 2: /* exact match */ 561 if (pkt_life < required_pkt_life) 562 status = IB_NOT_FOUND; 563 else 564 pkt_life = required_pkt_life; 565 break; 566 567 case 3: /* smallest available */ 568 /* can't be disqualified by this one */ 569 break; 570 571 default: 572 /* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */ 573 CL_ASSERT(FALSE); 574 status = IB_ERROR; 575 break; 576 } 577 } 578 579 if (status != IB_SUCCESS) 580 goto Exit; 581 582 /* 583 * set Pkey for this path record request 584 */ 585 586 if ((comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) && 587 (cl_ntoh32(p_pr->hop_flow_raw) & (1 << 31))) 588 pkey = osm_physp_find_common_pkey(p_src_physp, p_dest_physp); 589 590 else if (comp_mask & IB_PR_COMPMASK_PKEY) { 591 /* 592 * PR request has a specific pkey: 593 * Check that source and destination share this pkey. 594 * If QoS level has pkeys, check that this pkey exists 595 * in the QoS level pkeys. 596 * PR returned pkey is the requested pkey. 597 */ 598 pkey = p_pr->pkey; 599 if (!osm_physp_share_this_pkey(p_src_physp, p_dest_physp, pkey)) { 600 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1A: " 601 "Ports 0x%016" PRIx64 " 0x%016" PRIx64 602 " do not share specified PKey 0x%04x\n", 603 cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), 604 cl_ntoh64(osm_physp_get_port_guid(p_dest_physp)), 605 cl_ntoh16(pkey)); 606 status = IB_NOT_FOUND; 607 goto Exit; 608 } 609 if (p_qos_level && p_qos_level->pkey_range_len && 610 !osm_qos_level_has_pkey(p_qos_level, pkey)) { 611 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1D: " 612 "Ports do not share PKeys defined by QoS level\n"); 613 status = IB_NOT_FOUND; 614 goto Exit; 615 } 616 617 } else if (p_qos_level && p_qos_level->pkey_range_len) { 618 /* 619 * PR request doesn't have a specific pkey, but QoS level 620 * has pkeys - get shared pkey from QoS level pkeys 621 */ 622 pkey = osm_qos_level_get_shared_pkey(p_qos_level, 623 p_src_physp, p_dest_physp); 624 if (!pkey) { 625 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1E: " 626 "Ports 0x%016" PRIx64 " 0x%016" PRIx64 627 " do not share PKeys defined by QoS level\n", 628 cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), 629 cl_ntoh64(osm_physp_get_port_guid(p_dest_physp))); 630 status = IB_NOT_FOUND; 631 goto Exit; 632 } 633 } else { 634 /* 635 * Neither PR request nor QoS level have pkey. 636 * Just get any shared pkey. 637 */ 638 pkey = osm_physp_find_common_pkey(p_src_physp, p_dest_physp); 639 if (!pkey) { 640 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1B: " 641 "Ports 0x%016" PRIx64 " 0x%016" PRIx64 642 " do not have any shared PKeys\n", 643 cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), 644 cl_ntoh64(osm_physp_get_port_guid(p_dest_physp))); 645 status = IB_NOT_FOUND; 646 goto Exit; 647 } 648 } 649 650 if (pkey) { 651 p_prtn = 652 (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl, 653 pkey & cl_hton16((uint16_t) ~ 654 0x8000)); 655 if (p_prtn == 656 (osm_prtn_t *) cl_qmap_end(&sa->p_subn->prtn_pkey_tbl)) 657 p_prtn = NULL; 658 } 659 660 /* 661 * Set PathRecord SL. 662 */ 663 664 is_lash = (p_osm->routing_engine_used == OSM_ROUTING_ENGINE_TYPE_LASH); 665 666 if (comp_mask & IB_PR_COMPMASK_SL) { 667 /* 668 * Specific SL was requested 669 */ 670 sl = ib_path_rec_sl(p_pr); 671 672 if (p_qos_level && p_qos_level->sl_set 673 && (p_qos_level->sl != sl)) { 674 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1F: " 675 "QoS constaraints: required PathRecord SL (%u) " 676 "doesn't match QoS policy SL (%u)\n", sl, 677 p_qos_level->sl); 678 status = IB_NOT_FOUND; 679 goto Exit; 680 } 681 682 if (is_lash 683 && osm_get_lash_sl(p_osm, p_src_port, p_dest_port) != sl) { 684 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F23: " 685 "Required PathRecord SL (%u) doesn't " 686 "match LASH SL\n", sl); 687 status = IB_NOT_FOUND; 688 goto Exit; 689 } 690 691 } else if (is_lash) { 692 /* 693 * No specific SL in PathRecord request. 694 * If it's LASH routing - use its SL. 695 * slid and dest_lid are stored in network in lash. 696 */ 697 sl = osm_get_lash_sl(p_osm, p_src_port, p_dest_port); 698 } else if (p_qos_level && p_qos_level->sl_set) { 699 /* 700 * No specific SL was requested, and we're not in 701 * LASH routing, but there is an SL in QoS level. 702 */ 703 sl = p_qos_level->sl; 704 705 if (pkey && p_prtn && p_prtn->sl != p_qos_level->sl) 706 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 707 "QoS level SL (%u) overrides partition SL (%u)\n", 708 p_qos_level->sl, p_prtn->sl); 709 710 } else if (pkey) { 711 /* 712 * No specific SL in request or in QoS level - use partition SL 713 */ 714 if (!p_prtn) { 715 sl = OSM_DEFAULT_SL; 716 /* this may be possible when pkey tables are created somehow in 717 previous runs or things are going wrong here */ 718 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1C: " 719 "No partition found for PKey 0x%04x - using default SL %d\n", 720 cl_ntoh16(pkey), sl); 721 } else 722 sl = p_prtn->sl; 723 } else if (sa->p_subn->opt.qos) { 724 if (valid_sl_mask & (1 << OSM_DEFAULT_SL)) 725 sl = OSM_DEFAULT_SL; 726 else { 727 for (i = 0; i < IB_MAX_NUM_VLS; i++) 728 if (valid_sl_mask & (1 << i)) 729 break; 730 sl = i; 731 } 732 } else 733 sl = OSM_DEFAULT_SL; 734 735 if (sa->p_subn->opt.qos && !(valid_sl_mask & (1 << sl))) { 736 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F24: " 737 "Selected SL (%u) leads to VL15\n", sl); 738 status = IB_NOT_FOUND; 739 goto Exit; 740 } 741 742 /* reset pkey when raw traffic */ 743 if (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC && 744 cl_ntoh32(p_pr->hop_flow_raw) & (1 << 31)) 745 pkey = 0; 746 747 p_parms->mtu = mtu; 748 p_parms->rate = rate; 749 p_parms->pkt_life = pkt_life; 750 p_parms->pkey = pkey; 751 p_parms->sl = sl; 752 753 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Path params: mtu = %u, rate = %u," 754 " packet lifetime = %u, pkey = 0x%04X, sl = %u\n", 755 mtu, rate, pkt_life, cl_ntoh16(pkey), sl); 756Exit: 757 OSM_LOG_EXIT(sa->p_log); 758 return (status); 759} 760 761/********************************************************************** 762 **********************************************************************/ 763static void 764__osm_pr_rcv_build_pr(IN osm_sa_t * sa, 765 IN const osm_port_t * const p_src_port, 766 IN const osm_port_t * const p_dest_port, 767 IN const ib_gid_t * const p_dgid, 768 IN const uint16_t src_lid_ho, 769 IN const uint16_t dest_lid_ho, 770 IN const uint8_t preference, 771 IN const osm_path_parms_t * const p_parms, 772 OUT ib_path_rec_t * const p_pr) 773{ 774 const osm_physp_t *p_src_physp; 775 const osm_physp_t *p_dest_physp; 776 boolean_t is_nonzero_gid = 0; 777 778 OSM_LOG_ENTER(sa->p_log); 779 780 p_src_physp = p_src_port->p_physp; 781 782 if (p_dgid) { 783 if (memcmp(p_dgid, &zero_gid, sizeof(*p_dgid))) 784 is_nonzero_gid = 1; 785 } 786 787 if (is_nonzero_gid) 788 p_pr->dgid = *p_dgid; 789 else { 790 p_dest_physp = p_dest_port->p_physp; 791 792 p_pr->dgid.unicast.prefix = 793 osm_physp_get_subnet_prefix(p_dest_physp); 794 p_pr->dgid.unicast.interface_id = 795 osm_physp_get_port_guid(p_dest_physp); 796 } 797 798 p_pr->sgid.unicast.prefix = osm_physp_get_subnet_prefix(p_src_physp); 799 p_pr->sgid.unicast.interface_id = osm_physp_get_port_guid(p_src_physp); 800 801 p_pr->dlid = cl_hton16(dest_lid_ho); 802 p_pr->slid = cl_hton16(src_lid_ho); 803 804 p_pr->hop_flow_raw &= cl_hton32(1 << 31); 805 806 /* Only set HopLimit if going through a router */ 807 if (is_nonzero_gid) 808 p_pr->hop_flow_raw |= cl_hton32(IB_HOPLIMIT_MAX); 809 810 p_pr->pkey = p_parms->pkey; 811 ib_path_rec_set_sl(p_pr, p_parms->sl); 812 ib_path_rec_set_qos_class(p_pr, 0); 813 p_pr->mtu = (uint8_t) (p_parms->mtu | 0x80); 814 p_pr->rate = (uint8_t) (p_parms->rate | 0x80); 815 816 /* According to 1.2 spec definition Table 205 PacketLifeTime description, 817 for loopback paths, packetLifeTime shall be zero. */ 818 if (p_src_port == p_dest_port) 819 p_pr->pkt_life = 0x80; /* loopback */ 820 else 821 p_pr->pkt_life = (uint8_t) (p_parms->pkt_life | 0x80); 822 823 p_pr->preference = preference; 824 825 /* always return num_path = 0 so this is only the reversible component */ 826 if (p_parms->reversible) 827 p_pr->num_path = 0x80; 828 829 OSM_LOG_EXIT(sa->p_log); 830} 831 832/********************************************************************** 833 **********************************************************************/ 834static osm_pr_item_t * 835__osm_pr_rcv_get_lid_pair_path(IN osm_sa_t * sa, 836 IN const ib_path_rec_t * const p_pr, 837 IN const osm_port_t * const p_src_port, 838 IN const osm_port_t * const p_dest_port, 839 IN const ib_gid_t * const p_dgid, 840 IN const uint16_t src_lid_ho, 841 IN const uint16_t dest_lid_ho, 842 IN const ib_net64_t comp_mask, 843 IN const uint8_t preference) 844{ 845 osm_path_parms_t path_parms; 846 osm_path_parms_t rev_path_parms; 847 osm_pr_item_t *p_pr_item; 848 ib_api_status_t status, rev_path_status; 849 850 OSM_LOG_ENTER(sa->p_log); 851 852 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID %u, Dest LID %u\n", 853 src_lid_ho, dest_lid_ho); 854 855 p_pr_item = malloc(sizeof(*p_pr_item)); 856 if (p_pr_item == NULL) { 857 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F01: " 858 "Unable to allocate path record\n"); 859 goto Exit; 860 } 861 memset(p_pr_item, 0, sizeof(*p_pr_item)); 862 863 status = __osm_pr_rcv_get_path_parms(sa, p_pr, p_src_port, 864 p_dest_port, dest_lid_ho, 865 comp_mask, &path_parms); 866 867 if (status != IB_SUCCESS) { 868 free(p_pr_item); 869 p_pr_item = NULL; 870 goto Exit; 871 } 872 873 /* now try the reversible path */ 874 rev_path_status = __osm_pr_rcv_get_path_parms(sa, p_pr, p_dest_port, 875 p_src_port, src_lid_ho, 876 comp_mask, 877 &rev_path_parms); 878 path_parms.reversible = (rev_path_status == IB_SUCCESS); 879 880 /* did we get a Reversible Path compmask ? */ 881 /* 882 NOTE that if the reversible component = 0, it is a don't care 883 rather then requiring non-reversible paths ... 884 see Vol1 Ver1.2 p900 l16 885 */ 886 if (comp_mask & IB_PR_COMPMASK_REVERSIBLE) { 887 if ((!path_parms.reversible && (p_pr->num_path & 0x80))) { 888 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 889 "Requested reversible path but failed to get one\n"); 890 891 free(p_pr_item); 892 p_pr_item = NULL; 893 goto Exit; 894 } 895 } 896 897 __osm_pr_rcv_build_pr(sa, p_src_port, p_dest_port, p_dgid, 898 src_lid_ho, dest_lid_ho, preference, &path_parms, 899 &p_pr_item->path_rec); 900 901Exit: 902 OSM_LOG_EXIT(sa->p_log); 903 return (p_pr_item); 904} 905 906/********************************************************************** 907 **********************************************************************/ 908static void 909__osm_pr_rcv_get_port_pair_paths(IN osm_sa_t * sa, 910 IN const osm_madw_t * const p_madw, 911 IN const osm_port_t * const p_req_port, 912 IN const osm_port_t * const p_src_port, 913 IN const osm_port_t * const p_dest_port, 914 IN const ib_gid_t * const p_dgid, 915 IN const ib_net64_t comp_mask, 916 IN cl_qlist_t * const p_list) 917{ 918 const ib_path_rec_t *p_pr; 919 const ib_sa_mad_t *p_sa_mad; 920 osm_pr_item_t *p_pr_item; 921 uint16_t src_lid_min_ho; 922 uint16_t src_lid_max_ho; 923 uint16_t dest_lid_min_ho; 924 uint16_t dest_lid_max_ho; 925 uint16_t src_lid_ho; 926 uint16_t dest_lid_ho; 927 uint32_t path_num; 928 uint8_t preference; 929 uintn_t iterations; 930 uintn_t src_offset; 931 uintn_t dest_offset; 932 933 OSM_LOG_ENTER(sa->p_log); 934 935 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 936 "Src port 0x%016" PRIx64 ", Dst port 0x%016" PRIx64 "\n", 937 cl_ntoh64(osm_port_get_guid(p_src_port)), 938 cl_ntoh64(osm_port_get_guid(p_dest_port))); 939 940 /* Check that the req_port, src_port and dest_port all share a 941 pkey. The check is done on the default physical port of the ports. */ 942 if (osm_port_share_pkey(sa->p_log, p_req_port, p_src_port) == FALSE 943 || osm_port_share_pkey(sa->p_log, p_req_port, 944 p_dest_port) == FALSE 945 || osm_port_share_pkey(sa->p_log, p_src_port, 946 p_dest_port) == FALSE) 947 /* One of the pairs doesn't share a pkey so the path is disqualified. */ 948 goto Exit; 949 950 p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); 951 p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); 952 953 /* 954 We shouldn't be here if the paths are disqualified in some way... 955 Thus, we assume every possible connection is valid. 956 957 We desire to return high-quality paths first. 958 In OpenSM, higher quality means least overlap with other paths. 959 This is acheived in practice by returning paths with 960 different LID value on each end, which means these 961 paths are more redundant that paths with the same LID repeated 962 on one side. For example, in OpenSM the paths between two 963 endpoints with LMC = 1 might be as follows: 964 965 Port A, LID 1 <-> Port B, LID 3 966 Port A, LID 1 <-> Port B, LID 4 967 Port A, LID 2 <-> Port B, LID 3 968 Port A, LID 2 <-> Port B, LID 4 969 970 The OpenSM unicast routing algorithms attempt to disperse each path 971 to as varied a physical path as is reasonable. 1<->3 and 1<->4 have 972 more physical overlap (hence less redundancy) than 1<->3 and 2<->4. 973 974 OpenSM ranks paths in three preference groups: 975 976 Preference Value Description 977 ---------------- ------------------------------------------- 978 0 Redundant in both directions with other 979 pref value = 0 paths 980 981 1 Redundant in one direction with other 982 pref value = 0 and pref value = 1 paths 983 984 2 Not redundant in either direction with 985 other paths 986 987 3-FF Unused 988 989 SA clients don't need to know these details, only that the lower 990 preference paths are preferred, as stated in the spec. The paths 991 may not actually be physically redundant depending on the topology 992 of the subnet, but the point of LMC > 0 is to offer redundancy, 993 so it is assumed that the subnet is physically appropriate for the 994 specified LMC value. A more advanced implementation would inspect for 995 physical redundancy, but I'm not going to bother with that now. 996 */ 997 998 /* 999 Refine our search if the client specified end-point LIDs 1000 */ 1001 if (comp_mask & IB_PR_COMPMASK_DLID) { 1002 dest_lid_min_ho = cl_ntoh16(p_pr->dlid); 1003 dest_lid_max_ho = cl_ntoh16(p_pr->dlid); 1004 } else 1005 osm_port_get_lid_range_ho(p_dest_port, &dest_lid_min_ho, 1006 &dest_lid_max_ho); 1007 1008 if (comp_mask & IB_PR_COMPMASK_SLID) { 1009 src_lid_min_ho = cl_ntoh16(p_pr->slid); 1010 src_lid_max_ho = cl_ntoh16(p_pr->slid); 1011 } else 1012 osm_port_get_lid_range_ho(p_src_port, &src_lid_min_ho, 1013 &src_lid_max_ho); 1014 1015 if (src_lid_min_ho == 0) { 1016 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F20:" 1017 "Obtained source LID of 0. No such LID possible\n"); 1018 goto Exit; 1019 } 1020 1021 if (dest_lid_min_ho == 0) { 1022 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F21:" 1023 "Obtained destination LID of 0. No such LID possible\n"); 1024 goto Exit; 1025 } 1026 1027 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 1028 "Src LIDs [%u-%u], Dest LIDs [%u-%u]\n", 1029 src_lid_min_ho, src_lid_max_ho, 1030 dest_lid_min_ho, dest_lid_max_ho); 1031 1032 src_lid_ho = src_lid_min_ho; 1033 dest_lid_ho = dest_lid_min_ho; 1034 1035 /* 1036 Preferred paths come first in OpenSM 1037 */ 1038 preference = 0; 1039 path_num = 0; 1040 1041 /* If SubnAdmGet, assume NumbPaths 1 (1.2 erratum) */ 1042 if (p_sa_mad->method != IB_MAD_METHOD_GET) 1043 if (comp_mask & IB_PR_COMPMASK_NUMBPATH) 1044 iterations = ib_path_rec_num_path(p_pr); 1045 else 1046 iterations = (uintn_t) (-1); 1047 else 1048 iterations = 1; 1049 1050 while (path_num < iterations) { 1051 /* 1052 These paths are "fully redundant" 1053 */ 1054 1055 p_pr_item = __osm_pr_rcv_get_lid_pair_path(sa, p_pr, 1056 p_src_port, 1057 p_dest_port, p_dgid, 1058 src_lid_ho, 1059 dest_lid_ho, 1060 comp_mask, 1061 preference); 1062 1063 if (p_pr_item) { 1064 cl_qlist_insert_tail(p_list, &p_pr_item->list_item); 1065 ++path_num; 1066 } 1067 1068 if (++src_lid_ho > src_lid_max_ho) 1069 break; 1070 1071 if (++dest_lid_ho > dest_lid_max_ho) 1072 break; 1073 } 1074 1075 /* 1076 Check if we've accumulated all the paths that the user cares to see 1077 */ 1078 if (path_num == iterations) 1079 goto Exit; 1080 1081 /* 1082 Don't bother reporting preference 1 paths for now. 1083 It's more trouble than it's worth and can only occur 1084 if ports have different LMC values, which isn't supported 1085 by OpenSM right now anyway. 1086 */ 1087 preference = 2; 1088 src_lid_ho = src_lid_min_ho; 1089 dest_lid_ho = dest_lid_min_ho; 1090 src_offset = 0; 1091 dest_offset = 0; 1092 1093 /* 1094 Iterate over the remaining paths 1095 */ 1096 while (path_num < iterations) { 1097 dest_offset++; 1098 dest_lid_ho++; 1099 1100 if (dest_lid_ho > dest_lid_max_ho) { 1101 src_offset++; 1102 src_lid_ho++; 1103 1104 if (src_lid_ho > src_lid_max_ho) 1105 break; /* done */ 1106 1107 dest_offset = 0; 1108 dest_lid_ho = dest_lid_min_ho; 1109 } 1110 1111 /* 1112 These paths are "fully non-redundant" with paths already 1113 identified above and consequently not of much value. 1114 1115 Don't return paths we already identified above, as indicated 1116 by the offset values being equal. 1117 */ 1118 if (src_offset == dest_offset) 1119 continue; /* already reported */ 1120 1121 p_pr_item = __osm_pr_rcv_get_lid_pair_path(sa, p_pr, 1122 p_src_port, 1123 p_dest_port, p_dgid, 1124 src_lid_ho, 1125 dest_lid_ho, 1126 comp_mask, 1127 preference); 1128 1129 if (p_pr_item) { 1130 cl_qlist_insert_tail(p_list, &p_pr_item->list_item); 1131 ++path_num; 1132 } 1133 } 1134 1135Exit: 1136 OSM_LOG_EXIT(sa->p_log); 1137} 1138 1139/********************************************************************** 1140 **********************************************************************/ 1141static ib_net16_t 1142__osm_pr_rcv_get_end_points(IN osm_sa_t * sa, 1143 IN const osm_madw_t * const p_madw, 1144 OUT const osm_port_t ** const pp_src_port, 1145 OUT const osm_port_t ** const pp_dest_port, 1146 OUT ib_gid_t * const p_dgid) 1147{ 1148 const ib_path_rec_t *p_pr; 1149 const ib_sa_mad_t *p_sa_mad; 1150 ib_net64_t comp_mask; 1151 ib_net64_t dest_guid; 1152 ib_api_status_t status; 1153 ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS; 1154 osm_router_t *p_rtr; 1155 osm_port_t *p_rtr_port; 1156 1157 OSM_LOG_ENTER(sa->p_log); 1158 1159 /* 1160 Determine what fields are valid and then get a pointer 1161 to the source and destination port objects, if possible. 1162 */ 1163 1164 p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); 1165 p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); 1166 1167 comp_mask = p_sa_mad->comp_mask; 1168 1169 /* 1170 Check a few easy disqualifying cases up front before getting 1171 into the endpoints. 1172 */ 1173 1174 if (comp_mask & IB_PR_COMPMASK_SGID) { 1175 if (!ib_gid_is_link_local(&p_pr->sgid)) { 1176 if (ib_gid_get_subnet_prefix(&p_pr->sgid) != 1177 sa->p_subn->opt.subnet_prefix) { 1178 /* 1179 This 'error' is the client's fault (bad gid) 1180 so don't enter it as an error in our own log. 1181 Return an error response to the client. 1182 */ 1183 OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, 1184 "Non local SGID subnet prefix 0x%016" 1185 PRIx64 "\n", 1186 cl_ntoh64(p_pr->sgid.unicast.prefix)); 1187 1188 sa_status = IB_SA_MAD_STATUS_INVALID_GID; 1189 goto Exit; 1190 } 1191 } 1192 1193 *pp_src_port = osm_get_port_by_guid(sa->p_subn, 1194 p_pr->sgid.unicast. 1195 interface_id); 1196 if (!*pp_src_port) { 1197 /* 1198 This 'error' is the client's fault (bad gid) so 1199 don't enter it as an error in our own log. 1200 Return an error response to the client. 1201 */ 1202 OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, 1203 "No source port with GUID 0x%016" PRIx64 "\n", 1204 cl_ntoh64(p_pr->sgid.unicast.interface_id)); 1205 1206 sa_status = IB_SA_MAD_STATUS_INVALID_GID; 1207 goto Exit; 1208 } 1209 } else { 1210 *pp_src_port = 0; 1211 if (comp_mask & IB_PR_COMPMASK_SLID) { 1212 status = cl_ptr_vector_at(&sa->p_subn->port_lid_tbl, 1213 cl_ntoh16(p_pr->slid), 1214 (void **)pp_src_port); 1215 1216 if ((status != CL_SUCCESS) || (*pp_src_port == NULL)) { 1217 /* 1218 This 'error' is the client's fault (bad lid) so 1219 don't enter it as an error in our own log. 1220 Return an error response to the client. 1221 */ 1222 OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, 1223 "No source port with LID %u\n", 1224 cl_ntoh16(p_pr->slid)); 1225 1226 sa_status = IB_SA_MAD_STATUS_NO_RECORDS; 1227 goto Exit; 1228 } 1229 } 1230 } 1231 1232 if (p_dgid) 1233 memset(p_dgid, 0, sizeof(*p_dgid)); 1234 1235 if (comp_mask & IB_PR_COMPMASK_DGID) { 1236 dest_guid = p_pr->dgid.unicast.interface_id; 1237 if (!ib_gid_is_link_local(&p_pr->dgid)) { 1238 if (!ib_gid_is_multicast(&p_pr->dgid) && 1239 ib_gid_get_subnet_prefix(&p_pr->dgid) != 1240 sa->p_subn->opt.subnet_prefix) { 1241 OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, 1242 "Non local DGID subnet prefix 0x%016" 1243 PRIx64 "\n", 1244 cl_ntoh64(p_pr->dgid.unicast.prefix)); 1245 1246 /* Find the router port that is configured to 1247 handle this prefix, if any */ 1248 osm_prefix_route_t *route = NULL; 1249 osm_prefix_route_t *r = (osm_prefix_route_t *) 1250 cl_qlist_head(&sa->p_subn->prefix_routes_list); 1251 1252 while (r != (osm_prefix_route_t *) 1253 cl_qlist_end(&sa->p_subn->prefix_routes_list)) 1254 { 1255 if (r->prefix == p_pr->dgid.unicast.prefix || 1256 r->prefix == 0) 1257 { 1258 route = r; 1259 break; 1260 } 1261 r = (osm_prefix_route_t *) cl_qlist_next(&r->list_item); 1262 } 1263 1264 if (!route) { 1265 /* 1266 This 'error' is the client's fault (bad gid) so 1267 don't enter it as an error in our own log. 1268 Return an error response to the client. 1269 */ 1270 sa_status = IB_SA_MAD_STATUS_INVALID_GID; 1271 goto Exit; 1272 } else if (route->guid == 0) { 1273 /* first router */ 1274 p_rtr = (osm_router_t *) 1275 cl_qmap_head(&sa-> 1276 p_subn-> 1277 rtr_guid_tbl); 1278 } else { 1279 p_rtr = (osm_router_t *) 1280 cl_qmap_get(&sa-> 1281 p_subn-> 1282 rtr_guid_tbl, 1283 route->guid); 1284 } 1285 1286 if (p_rtr == 1287 (osm_router_t *) cl_qmap_end(&sa-> 1288 p_subn-> 1289 rtr_guid_tbl)) 1290 { 1291 OSM_LOG(sa->p_log, OSM_LOG_ERROR, 1292 "ERR 1F22: " 1293 "Off subnet DGID but router not found\n"); 1294 sa_status = 1295 IB_SA_MAD_STATUS_INVALID_GID; 1296 goto Exit; 1297 } 1298 1299 p_rtr_port = osm_router_get_port_ptr(p_rtr); 1300 dest_guid = osm_port_get_guid(p_rtr_port); 1301 if (p_dgid) 1302 *p_dgid = p_pr->dgid; 1303 } 1304 } 1305 1306 *pp_dest_port = osm_get_port_by_guid(sa->p_subn, dest_guid); 1307 if (!*pp_dest_port) { 1308 /* 1309 This 'error' is the client's fault (bad gid) so 1310 don't enter it as an error in our own log. 1311 Return an error response to the client. 1312 */ 1313 OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, 1314 "No dest port with GUID 0x%016" PRIx64 "\n", 1315 cl_ntoh64(dest_guid)); 1316 1317 sa_status = IB_SA_MAD_STATUS_INVALID_GID; 1318 goto Exit; 1319 } 1320 } else { 1321 *pp_dest_port = 0; 1322 if (comp_mask & IB_PR_COMPMASK_DLID) { 1323 status = cl_ptr_vector_at(&sa->p_subn->port_lid_tbl, 1324 cl_ntoh16(p_pr->dlid), 1325 (void **)pp_dest_port); 1326 1327 if ((status != CL_SUCCESS) || (*pp_dest_port == NULL)) { 1328 /* 1329 This 'error' is the client's fault (bad lid) 1330 so don't enter it as an error in our own log. 1331 Return an error response to the client. 1332 */ 1333 OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, 1334 "No dest port with LID %u\n", 1335 cl_ntoh16(p_pr->dlid)); 1336 1337 sa_status = IB_SA_MAD_STATUS_NO_RECORDS; 1338 goto Exit; 1339 } 1340 } 1341 } 1342 1343Exit: 1344 OSM_LOG_EXIT(sa->p_log); 1345 return (sa_status); 1346} 1347 1348/********************************************************************** 1349 **********************************************************************/ 1350static void 1351__osm_pr_rcv_process_world(IN osm_sa_t * sa, 1352 IN const osm_madw_t * const p_madw, 1353 IN const osm_port_t * const requester_port, 1354 IN const ib_gid_t * const p_dgid, 1355 IN const ib_net64_t comp_mask, 1356 IN cl_qlist_t * const p_list) 1357{ 1358 const cl_qmap_t *p_tbl; 1359 const osm_port_t *p_dest_port; 1360 const osm_port_t *p_src_port; 1361 1362 OSM_LOG_ENTER(sa->p_log); 1363 1364 /* 1365 Iterate the entire port space over itself. 1366 A path record from a port to itself is legit, so no 1367 need for a special case there. 1368 1369 We compute both A -> B and B -> A, since we don't have 1370 any check to determine the reversability of the paths. 1371 */ 1372 p_tbl = &sa->p_subn->port_guid_tbl; 1373 1374 p_dest_port = (osm_port_t *) cl_qmap_head(p_tbl); 1375 while (p_dest_port != (osm_port_t *) cl_qmap_end(p_tbl)) { 1376 p_src_port = (osm_port_t *) cl_qmap_head(p_tbl); 1377 while (p_src_port != (osm_port_t *) cl_qmap_end(p_tbl)) { 1378 __osm_pr_rcv_get_port_pair_paths(sa, p_madw, 1379 requester_port, 1380 p_src_port, 1381 p_dest_port, p_dgid, 1382 comp_mask, p_list); 1383 1384 p_src_port = 1385 (osm_port_t *) cl_qmap_next(&p_src_port->map_item); 1386 } 1387 1388 p_dest_port = 1389 (osm_port_t *) cl_qmap_next(&p_dest_port->map_item); 1390 } 1391 1392 OSM_LOG_EXIT(sa->p_log); 1393} 1394 1395/********************************************************************** 1396 **********************************************************************/ 1397static void 1398__osm_pr_rcv_process_half(IN osm_sa_t * sa, 1399 IN const osm_madw_t * const p_madw, 1400 IN const osm_port_t * const requester_port, 1401 IN const osm_port_t * const p_src_port, 1402 IN const osm_port_t * const p_dest_port, 1403 IN const ib_gid_t * const p_dgid, 1404 IN const ib_net64_t comp_mask, 1405 IN cl_qlist_t * const p_list) 1406{ 1407 const cl_qmap_t *p_tbl; 1408 const osm_port_t *p_port; 1409 1410 OSM_LOG_ENTER(sa->p_log); 1411 1412 /* 1413 Iterate over every port, looking for matches... 1414 A path record from a port to itself is legit, so no 1415 need to special case that one. 1416 */ 1417 p_tbl = &sa->p_subn->port_guid_tbl; 1418 1419 if (p_src_port) { 1420 /* 1421 The src port if fixed, so iterate over destination ports. 1422 */ 1423 p_port = (osm_port_t *) cl_qmap_head(p_tbl); 1424 while (p_port != (osm_port_t *) cl_qmap_end(p_tbl)) { 1425 __osm_pr_rcv_get_port_pair_paths(sa, p_madw, 1426 requester_port, 1427 p_src_port, p_port, 1428 p_dgid, comp_mask, 1429 p_list); 1430 p_port = (osm_port_t *) cl_qmap_next(&p_port->map_item); 1431 } 1432 } else { 1433 /* 1434 The dest port if fixed, so iterate over source ports. 1435 */ 1436 p_port = (osm_port_t *) cl_qmap_head(p_tbl); 1437 while (p_port != (osm_port_t *) cl_qmap_end(p_tbl)) { 1438 __osm_pr_rcv_get_port_pair_paths(sa, p_madw, 1439 requester_port, p_port, 1440 p_dest_port, p_dgid, 1441 comp_mask, p_list); 1442 p_port = (osm_port_t *) cl_qmap_next(&p_port->map_item); 1443 } 1444 } 1445 1446 OSM_LOG_EXIT(sa->p_log); 1447} 1448 1449/********************************************************************** 1450 **********************************************************************/ 1451static void 1452__osm_pr_rcv_process_pair(IN osm_sa_t * sa, 1453 IN const osm_madw_t * const p_madw, 1454 IN const osm_port_t * const requester_port, 1455 IN const osm_port_t * const p_src_port, 1456 IN const osm_port_t * const p_dest_port, 1457 IN const ib_gid_t * const p_dgid, 1458 IN const ib_net64_t comp_mask, 1459 IN cl_qlist_t * const p_list) 1460{ 1461 OSM_LOG_ENTER(sa->p_log); 1462 1463 __osm_pr_rcv_get_port_pair_paths(sa, p_madw, requester_port, 1464 p_src_port, p_dest_port, p_dgid, 1465 comp_mask, p_list); 1466 1467 OSM_LOG_EXIT(sa->p_log); 1468} 1469 1470/********************************************************************** 1471 **********************************************************************/ 1472static osm_mgrp_t *pr_get_mgrp(IN osm_sa_t * sa, 1473 IN const osm_madw_t * const p_madw) 1474{ 1475 ib_path_rec_t *p_pr; 1476 const ib_sa_mad_t *p_sa_mad; 1477 ib_net64_t comp_mask; 1478 osm_mgrp_t *mgrp = NULL; 1479 1480 p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); 1481 p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); 1482 1483 comp_mask = p_sa_mad->comp_mask; 1484 1485 if ((comp_mask & IB_PR_COMPMASK_DGID) && 1486 !(mgrp = osm_get_mgrp_by_mgid(sa, &p_pr->dgid))) { 1487 char gid_str[INET6_ADDRSTRLEN]; 1488 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F09: " 1489 "No MC group found for PathRecord destination GID %s\n", 1490 inet_ntop(AF_INET6, p_pr->dgid.raw, gid_str, 1491 sizeof gid_str)); 1492 goto Exit; 1493 } 1494 1495 if (comp_mask & IB_PR_COMPMASK_DLID) { 1496 if (mgrp) { 1497 /* check that the MLID in the MC group is */ 1498 /* the same as the DLID in the PathRecord */ 1499 if (mgrp->mlid != p_pr->dlid) { 1500 /* Note: perhaps this might be better indicated as an invalid request */ 1501 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F10: " 1502 "MC group MLID 0x%x does not match " 1503 "PathRecord destination LID 0x%x\n", 1504 mgrp->mlid, p_pr->dlid); 1505 mgrp = NULL; 1506 goto Exit; 1507 } 1508 } else if (!(mgrp = osm_get_mgrp_by_mlid(sa->p_subn, p_pr->dlid))) 1509 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F11: " 1510 "No MC group found for PathRecord " 1511 "destination LID 0x%x\n", p_pr->dlid); 1512 } 1513 1514Exit: 1515 return mgrp; 1516} 1517 1518/********************************************************************** 1519 **********************************************************************/ 1520static ib_api_status_t 1521__osm_pr_match_mgrp_attributes(IN osm_sa_t * sa, 1522 IN const osm_madw_t * const p_madw, 1523 IN const osm_mgrp_t * const p_mgrp) 1524{ 1525 const ib_path_rec_t *p_pr; 1526 const ib_sa_mad_t *p_sa_mad; 1527 ib_net64_t comp_mask; 1528 ib_api_status_t status = IB_ERROR; 1529 uint32_t flow_label; 1530 uint8_t sl; 1531 uint8_t hop_limit; 1532 1533 OSM_LOG_ENTER(sa->p_log); 1534 1535 p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); 1536 p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); 1537 1538 comp_mask = p_sa_mad->comp_mask; 1539 1540 /* If SGID and/or SLID specified, should validate as member of MC group */ 1541 /* Also, MTU, rate, packet lifetime, and raw traffic requested are not currently checked */ 1542 if (comp_mask & IB_PR_COMPMASK_PKEY) { 1543 if (p_pr->pkey != p_mgrp->mcmember_rec.pkey) 1544 goto Exit; 1545 } 1546 1547 ib_member_get_sl_flow_hop(p_mgrp->mcmember_rec.sl_flow_hop, 1548 &sl, &flow_label, &hop_limit); 1549 1550 if (comp_mask & IB_PR_COMPMASK_SL) { 1551 if (ib_path_rec_sl(p_pr) != sl) 1552 goto Exit; 1553 } 1554 1555 /* If SubnAdmGet, assume NumbPaths of 1 (1.2 erratum) */ 1556 if ((comp_mask & IB_PR_COMPMASK_NUMBPATH) && 1557 (p_sa_mad->method != IB_MAD_METHOD_GET)) { 1558 if (ib_path_rec_num_path(p_pr) == 0) 1559 goto Exit; 1560 } 1561 1562 if (comp_mask & IB_PR_COMPMASK_FLOWLABEL) { 1563 if (ib_path_rec_flow_lbl(p_pr) != flow_label) 1564 goto Exit; 1565 } 1566 1567 if (comp_mask & IB_PR_COMPMASK_HOPLIMIT) { 1568 if (ib_path_rec_hop_limit(p_pr) != hop_limit) 1569 goto Exit; 1570 } 1571 1572 if (comp_mask & IB_PR_COMPMASK_TCLASS) { 1573 if (p_pr->tclass != p_mgrp->mcmember_rec.tclass) 1574 goto Exit; 1575 } 1576 1577 status = IB_SUCCESS; 1578 1579Exit: 1580 OSM_LOG_EXIT(sa->p_log); 1581 return (status); 1582} 1583 1584/********************************************************************** 1585 **********************************************************************/ 1586static int 1587__osm_pr_rcv_check_mcast_dest(IN osm_sa_t * sa, 1588 IN const osm_madw_t * const p_madw) 1589{ 1590 const ib_path_rec_t *p_pr; 1591 const ib_sa_mad_t *p_sa_mad; 1592 ib_net64_t comp_mask; 1593 int is_multicast = 0; 1594 1595 OSM_LOG_ENTER(sa->p_log); 1596 1597 p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); 1598 p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); 1599 1600 comp_mask = p_sa_mad->comp_mask; 1601 1602 if (comp_mask & IB_PR_COMPMASK_DGID) { 1603 is_multicast = ib_gid_is_multicast(&p_pr->dgid); 1604 if (!is_multicast) 1605 goto Exit; 1606 } 1607 1608 if (comp_mask & IB_PR_COMPMASK_DLID) { 1609 if (cl_ntoh16(p_pr->dlid) >= IB_LID_MCAST_START_HO && 1610 cl_ntoh16(p_pr->dlid) <= IB_LID_MCAST_END_HO) 1611 is_multicast = 1; 1612 else if (is_multicast) { 1613 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F12: " 1614 "PathRecord request indicates MGID but not MLID\n"); 1615 is_multicast = -1; 1616 } 1617 } 1618 1619Exit: 1620 OSM_LOG_EXIT(sa->p_log); 1621 return (is_multicast); 1622} 1623 1624/********************************************************************** 1625 **********************************************************************/ 1626void osm_pr_rcv_process(IN void *context, IN void *data) 1627{ 1628 osm_sa_t *sa = context; 1629 osm_madw_t *p_madw = data; 1630 const ib_path_rec_t *p_pr; 1631 const ib_sa_mad_t *p_sa_mad; 1632 const osm_port_t *p_src_port; 1633 const osm_port_t *p_dest_port; 1634 cl_qlist_t pr_list; 1635 ib_gid_t dgid; 1636 ib_net16_t sa_status; 1637 osm_port_t *requester_port; 1638 int ret; 1639 1640 OSM_LOG_ENTER(sa->p_log); 1641 1642 CL_ASSERT(p_madw); 1643 1644 p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); 1645 p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); 1646 1647 CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_PATH_RECORD); 1648 1649 /* we only support SubnAdmGet and SubnAdmGetTable methods */ 1650 if (p_sa_mad->method != IB_MAD_METHOD_GET && 1651 p_sa_mad->method != IB_MAD_METHOD_GETTABLE) { 1652 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F17: " 1653 "Unsupported Method (%s)\n", 1654 ib_get_sa_method_str(p_sa_mad->method)); 1655 osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); 1656 goto Exit; 1657 } 1658 1659 /* update the requester physical port. */ 1660 requester_port = osm_get_port_by_mad_addr(sa->p_log, sa->p_subn, 1661 osm_madw_get_mad_addr_ptr 1662 (p_madw)); 1663 if (requester_port == NULL) { 1664 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F16: " 1665 "Cannot find requester physical port\n"); 1666 goto Exit; 1667 } 1668 1669 if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) 1670 osm_dump_path_record(sa->p_log, p_pr, OSM_LOG_DEBUG); 1671 1672 cl_qlist_init(&pr_list); 1673 1674 /* 1675 Most SA functions (including this one) are read-only on the 1676 subnet object, so we grab the lock non-exclusively. 1677 */ 1678 cl_plock_acquire(sa->p_lock); 1679 1680 /* Handle multicast destinations separately */ 1681 if ((ret = __osm_pr_rcv_check_mcast_dest(sa, p_madw)) < 0) { 1682 /* Multicast DGID with unicast DLID */ 1683 cl_plock_release(sa->p_lock); 1684 osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_INVALID_FIELD); 1685 goto Exit; 1686 } 1687 1688 if (ret > 0) 1689 goto McastDest; 1690 1691 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Unicast destination requested\n"); 1692 1693 sa_status = __osm_pr_rcv_get_end_points(sa, p_madw, 1694 &p_src_port, &p_dest_port, 1695 &dgid); 1696 1697 if (sa_status == IB_SA_MAD_STATUS_SUCCESS) { 1698 /* 1699 What happens next depends on the type of endpoint information 1700 that was specified.... 1701 */ 1702 if (p_src_port) { 1703 if (p_dest_port) 1704 __osm_pr_rcv_process_pair(sa, p_madw, 1705 requester_port, 1706 p_src_port, 1707 p_dest_port, &dgid, 1708 p_sa_mad->comp_mask, 1709 &pr_list); 1710 else 1711 __osm_pr_rcv_process_half(sa, p_madw, 1712 requester_port, 1713 p_src_port, NULL, 1714 &dgid, 1715 p_sa_mad->comp_mask, 1716 &pr_list); 1717 } else { 1718 if (p_dest_port) 1719 __osm_pr_rcv_process_half(sa, p_madw, 1720 requester_port, NULL, 1721 p_dest_port, &dgid, 1722 p_sa_mad->comp_mask, 1723 &pr_list); 1724 else 1725 /* 1726 Katie, bar the door! 1727 */ 1728 __osm_pr_rcv_process_world(sa, p_madw, 1729 requester_port, 1730 &dgid, 1731 p_sa_mad->comp_mask, 1732 &pr_list); 1733 } 1734 } 1735 goto Unlock; 1736 1737McastDest: 1738 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Multicast destination requested\n"); 1739 { 1740 osm_mgrp_t *p_mgrp = NULL; 1741 ib_api_status_t status; 1742 osm_pr_item_t *p_pr_item; 1743 uint32_t flow_label; 1744 uint8_t sl; 1745 uint8_t hop_limit; 1746 1747 /* First, get the MC info */ 1748 p_mgrp = pr_get_mgrp(sa, p_madw); 1749 1750 if (!p_mgrp) 1751 goto Unlock; 1752 1753 /* Make sure the rest of the PathRecord matches the MC group attributes */ 1754 status = __osm_pr_match_mgrp_attributes(sa, p_madw, p_mgrp); 1755 if (status != IB_SUCCESS) { 1756 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F19: " 1757 "MC group attributes don't match PathRecord request\n"); 1758 goto Unlock; 1759 } 1760 1761 p_pr_item = malloc(sizeof(*p_pr_item)); 1762 if (p_pr_item == NULL) { 1763 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F18: " 1764 "Unable to allocate path record for MC group\n"); 1765 goto Unlock; 1766 } 1767 memset(p_pr_item, 0, sizeof(*p_pr_item)); 1768 1769 /* Copy PathRecord request into response */ 1770 p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); 1771 p_pr = (ib_path_rec_t *) 1772 ib_sa_mad_get_payload_ptr(p_sa_mad); 1773 p_pr_item->path_rec = *p_pr; 1774 1775 /* Now, use the MC info to cruft up the PathRecord response */ 1776 p_pr_item->path_rec.dgid = p_mgrp->mcmember_rec.mgid; 1777 p_pr_item->path_rec.dlid = p_mgrp->mcmember_rec.mlid; 1778 p_pr_item->path_rec.tclass = p_mgrp->mcmember_rec.tclass; 1779 p_pr_item->path_rec.num_path = 1; 1780 p_pr_item->path_rec.pkey = p_mgrp->mcmember_rec.pkey; 1781 1782 /* MTU, rate, and packet lifetime should be exactly */ 1783 p_pr_item->path_rec.mtu = (2 << 6) | p_mgrp->mcmember_rec.mtu; 1784 p_pr_item->path_rec.rate = (2 << 6) | p_mgrp->mcmember_rec.rate; 1785 p_pr_item->path_rec.pkt_life = 1786 (2 << 6) | p_mgrp->mcmember_rec.pkt_life; 1787 1788 /* SL, Hop Limit, and Flow Label */ 1789 ib_member_get_sl_flow_hop(p_mgrp->mcmember_rec.sl_flow_hop, 1790 &sl, &flow_label, &hop_limit); 1791 ib_path_rec_set_sl(&p_pr_item->path_rec, sl); 1792 ib_path_rec_set_qos_class(&p_pr_item->path_rec, 0); 1793 1794 /* HopLimit is not yet set in non link local MC groups */ 1795 /* If it were, this would not be needed */ 1796 if (ib_mgid_get_scope(&p_mgrp->mcmember_rec.mgid) != IB_MC_SCOPE_LINK_LOCAL) 1797 hop_limit = IB_HOPLIMIT_MAX; 1798 1799 p_pr_item->path_rec.hop_flow_raw = 1800 cl_hton32(hop_limit) | (flow_label << 8); 1801 1802 cl_qlist_insert_tail(&pr_list, &p_pr_item->list_item); 1803 } 1804 1805Unlock: 1806 cl_plock_release(sa->p_lock); 1807 1808 /* Now, (finally) respond to the PathRecord request */ 1809 osm_sa_respond(sa, p_madw, sizeof(ib_path_rec_t), &pr_list); 1810 1811Exit: 1812 OSM_LOG_EXIT(sa->p_log); 1813} 1814