1/* 2 * Copyright (c) 2006-2008 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 * 34 */ 35 36/* 37 * Abstract: 38 * Implementation of osm_mpr_rcv_t. 39 * This object represents the MultiPath Record Receiver object. 40 * This object is part of the opensm family of objects. 41 */ 42 43#if HAVE_CONFIG_H 44# include <config.h> 45#endif /* HAVE_CONFIG_H */ 46 47#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) 48 49#include <string.h> 50#include <iba/ib_types.h> 51#include <complib/cl_qmap.h> 52#include <complib/cl_passivelock.h> 53#include <complib/cl_debug.h> 54#include <complib/cl_qlist.h> 55#include <vendor/osm_vendor_api.h> 56#include <opensm/osm_port.h> 57#include <opensm/osm_node.h> 58#include <opensm/osm_switch.h> 59#include <opensm/osm_partition.h> 60#include <opensm/osm_helper.h> 61#include <opensm/osm_qos_policy.h> 62#include <opensm/osm_sa.h> 63 64#define OSM_SA_MPR_MAX_NUM_PATH 127 65 66typedef struct osm_mpr_item { 67 cl_list_item_t list_item; 68 ib_path_rec_t path_rec; 69 const osm_port_t *p_src_port; 70 const osm_port_t *p_dest_port; 71 int hops; 72} osm_mpr_item_t; 73 74typedef struct osm_path_parms { 75 ib_net16_t pkey; 76 uint8_t mtu; 77 uint8_t rate; 78 uint8_t sl; 79 uint8_t pkt_life; 80 boolean_t reversible; 81 int hops; 82} osm_path_parms_t; 83 84/********************************************************************** 85 **********************************************************************/ 86static inline boolean_t 87__osm_sa_multipath_rec_is_tavor_port(IN const osm_port_t * const p_port) 88{ 89 osm_node_t const *p_node; 90 ib_net32_t vend_id; 91 92 p_node = p_port->p_node; 93 vend_id = ib_node_info_get_vendor_id(&p_node->node_info); 94 95 return ((p_node->node_info.device_id == CL_HTON16(23108)) && 96 ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) || 97 (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) || 98 (vend_id == CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) || 99 (vend_id == CL_HTON32(OSM_VENDOR_ID_VOLTAIRE)))); 100} 101 102/********************************************************************** 103 **********************************************************************/ 104static boolean_t 105__osm_sa_multipath_rec_apply_tavor_mtu_limit(IN const ib_multipath_rec_t * 106 const p_mpr, 107 IN const osm_port_t * 108 const p_src_port, 109 IN const osm_port_t * 110 const p_dest_port, 111 IN const ib_net64_t comp_mask) 112{ 113 uint8_t required_mtu; 114 115 /* only if at least one of the ports is a Tavor device */ 116 if (!__osm_sa_multipath_rec_is_tavor_port(p_src_port) && 117 !__osm_sa_multipath_rec_is_tavor_port(p_dest_port)) 118 return (FALSE); 119 120 /* 121 we can apply the patch if either: 122 1. No MTU required 123 2. Required MTU < 124 3. Required MTU = 1K or 512 or 256 125 4. Required MTU > 256 or 512 126 */ 127 required_mtu = ib_multipath_rec_mtu(p_mpr); 128 if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) && 129 (comp_mask & IB_MPR_COMPMASK_MTU)) { 130 switch (ib_multipath_rec_mtu_sel(p_mpr)) { 131 case 0: /* must be greater than */ 132 case 2: /* exact match */ 133 if (IB_MTU_LEN_1024 < required_mtu) 134 return (FALSE); 135 break; 136 137 case 1: /* must be less than */ 138 /* can't be disqualified by this one */ 139 break; 140 141 case 3: /* largest available */ 142 /* the ULP intentionally requested */ 143 /* the largest MTU possible */ 144 return (FALSE); 145 break; 146 147 default: 148 /* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */ 149 CL_ASSERT(FALSE); 150 break; 151 } 152 } 153 154 return (TRUE); 155} 156 157/********************************************************************** 158 **********************************************************************/ 159static ib_api_status_t 160__osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, 161 IN const ib_multipath_rec_t * const p_mpr, 162 IN const osm_port_t * const p_src_port, 163 IN const osm_port_t * const p_dest_port, 164 IN const uint16_t dest_lid_ho, 165 IN const ib_net64_t comp_mask, 166 OUT osm_path_parms_t * const p_parms) 167{ 168 const osm_node_t *p_node; 169 const osm_physp_t *p_physp; 170 const osm_physp_t *p_src_physp; 171 const osm_physp_t *p_dest_physp; 172 const osm_prtn_t *p_prtn = NULL; 173 const ib_port_info_t *p_pi; 174 ib_slvl_table_t *p_slvl_tbl; 175 ib_api_status_t status = IB_SUCCESS; 176 uint8_t mtu; 177 uint8_t rate; 178 uint8_t pkt_life; 179 uint8_t required_mtu; 180 uint8_t required_rate; 181 ib_net16_t required_pkey; 182 uint8_t required_sl; 183 uint8_t required_pkt_life; 184 ib_net16_t dest_lid; 185 int hops = 0; 186 int in_port_num = 0; 187 uint8_t i; 188 osm_qos_level_t *p_qos_level = NULL; 189 uint16_t valid_sl_mask = 0xffff; 190 191 OSM_LOG_ENTER(sa->p_log); 192 193 dest_lid = cl_hton16(dest_lid_ho); 194 195 p_dest_physp = p_dest_port->p_physp; 196 p_physp = p_src_port->p_physp; 197 p_src_physp = p_physp; 198 p_pi = &p_physp->port_info; 199 200 mtu = ib_port_info_get_mtu_cap(p_pi); 201 rate = ib_port_info_compute_rate(p_pi); 202 203 /* 204 Mellanox Tavor device performance is better using 1K MTU. 205 If required MTU and MTU selector are such that 1K is OK 206 and at least one end of the path is Tavor we override the 207 port MTU with 1K. 208 */ 209 if (sa->p_subn->opt.enable_quirks && 210 __osm_sa_multipath_rec_apply_tavor_mtu_limit(p_mpr, p_src_port, 211 p_dest_port, 212 comp_mask)) 213 if (mtu > IB_MTU_LEN_1024) { 214 mtu = IB_MTU_LEN_1024; 215 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 216 "Optimized Path MTU to 1K for Mellanox Tavor device\n"); 217 } 218 219 /* 220 Walk the subnet object from source to destination, 221 tracking the most restrictive rate and mtu values along the way... 222 223 If source port node is a switch, then p_physp should 224 point to the port that routes the destination lid 225 */ 226 227 p_node = osm_physp_get_node_ptr(p_physp); 228 229 if (p_node->sw) { 230 /* 231 * Source node is a switch. 232 * Make sure that p_physp points to the out port of the 233 * switch that routes to the destination lid (dest_lid_ho) 234 */ 235 p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); 236 if (p_physp == 0) { 237 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4514: " 238 "Can't find routing to LID %u from switch for GUID 0x%016" 239 PRIx64 "\n", dest_lid_ho, 240 cl_ntoh64(osm_node_get_node_guid(p_node))); 241 status = IB_NOT_FOUND; 242 goto Exit; 243 } 244 } 245 246 if (sa->p_subn->opt.qos) { 247 248 /* 249 * Whether this node is switch or CA, the IN port for 250 * the sl2vl table is 0, because this is a source node. 251 */ 252 p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0); 253 254 /* update valid SLs that still exist on this route */ 255 for (i = 0; i < IB_MAX_NUM_VLS; i++) { 256 if (valid_sl_mask & (1 << i) && 257 ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL) 258 valid_sl_mask &= ~(1 << i); 259 } 260 if (!valid_sl_mask) { 261 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 262 "All the SLs lead to VL15 on this path\n"); 263 status = IB_NOT_FOUND; 264 goto Exit; 265 } 266 } 267 268 /* 269 * Same as above 270 */ 271 p_node = osm_physp_get_node_ptr(p_dest_physp); 272 273 if (p_node->sw) { 274 /* 275 * if destination is switch, we want p_dest_physp to point to port 0 276 */ 277 p_dest_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); 278 279 if (p_dest_physp == 0) { 280 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4515: " 281 "Can't find routing to LID %u from switch for GUID 0x%016" 282 PRIx64 "\n", dest_lid_ho, 283 cl_ntoh64(osm_node_get_node_guid(p_node))); 284 status = IB_NOT_FOUND; 285 goto Exit; 286 } 287 288 } 289 290 /* 291 * Now go through the path step by step 292 */ 293 294 while (p_physp != p_dest_physp) { 295 296 p_node = osm_physp_get_node_ptr(p_physp); 297 p_physp = osm_physp_get_remote(p_physp); 298 299 if (p_physp == 0) { 300 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4505: " 301 "Can't find remote phys port when routing to LID %u from node GUID 0x%016" 302 PRIx64 "\n", dest_lid_ho, 303 cl_ntoh64(osm_node_get_node_guid(p_node))); 304 status = IB_ERROR; 305 goto Exit; 306 } 307 308 hops++; 309 in_port_num = osm_physp_get_port_num(p_physp); 310 311 /* 312 This is point to point case (no switch in between) 313 */ 314 if (p_physp == p_dest_physp) 315 break; 316 317 p_node = osm_physp_get_node_ptr(p_physp); 318 319 if (!p_node->sw) { 320 /* 321 There is some sort of problem in the subnet object! 322 If this isn't a switch, we should have reached 323 the destination by now! 324 */ 325 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4503: " 326 "Internal error, bad path\n"); 327 status = IB_ERROR; 328 goto Exit; 329 } 330 331 /* 332 Check parameters for the ingress port in this switch. 333 */ 334 p_pi = &p_physp->port_info; 335 336 if (mtu > ib_port_info_get_mtu_cap(p_pi)) 337 mtu = ib_port_info_get_mtu_cap(p_pi); 338 339 if (rate > ib_port_info_compute_rate(p_pi)) 340 rate = ib_port_info_compute_rate(p_pi); 341 342 /* 343 Continue with the egress port on this switch. 344 */ 345 p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); 346 if (p_physp == 0) { 347 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4516: " 348 "Dead end on path to LID %u from switch for GUID 0x%016" 349 PRIx64 "\n", dest_lid_ho, 350 cl_ntoh64(osm_node_get_node_guid(p_node))); 351 status = IB_ERROR; 352 goto Exit; 353 } 354 355 p_pi = &p_physp->port_info; 356 357 if (mtu > ib_port_info_get_mtu_cap(p_pi)) 358 mtu = ib_port_info_get_mtu_cap(p_pi); 359 360 if (rate > ib_port_info_compute_rate(p_pi)) 361 rate = ib_port_info_compute_rate(p_pi); 362 363 if (sa->p_subn->opt.qos) { 364 /* 365 * Check SL2VL table of the switch and update valid SLs 366 */ 367 p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num); 368 for (i = 0; i < IB_MAX_NUM_VLS; i++) { 369 if (valid_sl_mask & (1 << i) && 370 ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL) 371 valid_sl_mask &= ~(1 << i); 372 } 373 if (!valid_sl_mask) { 374 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 375 "All the SLs lead to VL15 " 376 "on this path\n"); 377 status = IB_NOT_FOUND; 378 goto Exit; 379 } 380 } 381 } 382 383 /* 384 p_physp now points to the destination 385 */ 386 p_pi = &p_physp->port_info; 387 388 if (mtu > ib_port_info_get_mtu_cap(p_pi)) 389 mtu = ib_port_info_get_mtu_cap(p_pi); 390 391 if (rate > ib_port_info_compute_rate(p_pi)) 392 rate = ib_port_info_compute_rate(p_pi); 393 394 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 395 "Path min MTU = %u, min rate = %u\n", mtu, rate); 396 397 /* 398 * Get QoS Level object according to the MultiPath request 399 * and adjust MultiPath parameters according to QoS settings 400 */ 401 if (sa->p_subn->opt.qos && 402 sa->p_subn->p_qos_policy && 403 (p_qos_level = 404 osm_qos_policy_get_qos_level_by_mpr(sa->p_subn->p_qos_policy, 405 p_mpr, p_src_physp, 406 p_dest_physp, comp_mask))) { 407 408 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 409 "MultiPathRecord request matches QoS Level '%s' (%s)\n", 410 p_qos_level->name, 411 p_qos_level->use ? p_qos_level->use : "no description"); 412 413 if (p_qos_level->mtu_limit_set 414 && (mtu > p_qos_level->mtu_limit)) 415 mtu = p_qos_level->mtu_limit; 416 417 if (p_qos_level->rate_limit_set 418 && (rate > p_qos_level->rate_limit)) 419 rate = p_qos_level->rate_limit; 420 421 if (p_qos_level->sl_set) { 422 required_sl = p_qos_level->sl; 423 if (!(valid_sl_mask & (1 << required_sl))) { 424 status = IB_NOT_FOUND; 425 goto Exit; 426 } 427 } 428 } 429 430 /* 431 Determine if these values meet the user criteria 432 */ 433 434 /* we silently ignore cases where only the MTU selector is defined */ 435 if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) && 436 (comp_mask & IB_MPR_COMPMASK_MTU)) { 437 required_mtu = ib_multipath_rec_mtu(p_mpr); 438 switch (ib_multipath_rec_mtu_sel(p_mpr)) { 439 case 0: /* must be greater than */ 440 if (mtu <= required_mtu) 441 status = IB_NOT_FOUND; 442 break; 443 444 case 1: /* must be less than */ 445 if (mtu >= required_mtu) { 446 /* adjust to use the highest mtu 447 lower then the required one */ 448 if (required_mtu > 1) 449 mtu = required_mtu - 1; 450 else 451 status = IB_NOT_FOUND; 452 } 453 break; 454 455 case 2: /* exact match */ 456 if (mtu < required_mtu) 457 status = IB_NOT_FOUND; 458 else 459 mtu = required_mtu; 460 break; 461 462 case 3: /* largest available */ 463 /* can't be disqualified by this one */ 464 break; 465 466 default: 467 /* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */ 468 CL_ASSERT(FALSE); 469 status = IB_ERROR; 470 break; 471 } 472 } 473 if (status != IB_SUCCESS) 474 goto Exit; 475 476 /* we silently ignore cases where only the Rate selector is defined */ 477 if ((comp_mask & IB_MPR_COMPMASK_RATESELEC) && 478 (comp_mask & IB_MPR_COMPMASK_RATE)) { 479 required_rate = ib_multipath_rec_rate(p_mpr); 480 switch (ib_multipath_rec_rate_sel(p_mpr)) { 481 case 0: /* must be greater than */ 482 if (rate <= required_rate) 483 status = IB_NOT_FOUND; 484 break; 485 486 case 1: /* must be less than */ 487 if (rate >= required_rate) { 488 /* adjust the rate to use the highest rate 489 lower then the required one */ 490 if (required_rate > 2) 491 rate = required_rate - 1; 492 else 493 status = IB_NOT_FOUND; 494 } 495 break; 496 497 case 2: /* exact match */ 498 if (rate < required_rate) 499 status = IB_NOT_FOUND; 500 else 501 rate = required_rate; 502 break; 503 504 case 3: /* largest available */ 505 /* can't be disqualified by this one */ 506 break; 507 508 default: 509 /* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */ 510 CL_ASSERT(FALSE); 511 status = IB_ERROR; 512 break; 513 } 514 } 515 if (status != IB_SUCCESS) 516 goto Exit; 517 518 /* Verify the pkt_life_time */ 519 /* According to spec definition IBA 1.2 Table 205 PacketLifeTime description, 520 for loopback paths, packetLifeTime shall be zero. */ 521 if (p_src_port == p_dest_port) 522 pkt_life = 0; /* loopback */ 523 else if (p_qos_level && p_qos_level->pkt_life_set) 524 pkt_life = p_qos_level->pkt_life; 525 else 526 pkt_life = sa->p_subn->opt.subnet_timeout; 527 528 /* we silently ignore cases where only the PktLife selector is defined */ 529 if ((comp_mask & IB_MPR_COMPMASK_PKTLIFETIMESELEC) && 530 (comp_mask & IB_MPR_COMPMASK_PKTLIFETIME)) { 531 required_pkt_life = ib_multipath_rec_pkt_life(p_mpr); 532 switch (ib_multipath_rec_pkt_life_sel(p_mpr)) { 533 case 0: /* must be greater than */ 534 if (pkt_life <= required_pkt_life) 535 status = IB_NOT_FOUND; 536 break; 537 538 case 1: /* must be less than */ 539 if (pkt_life >= required_pkt_life) { 540 /* adjust the lifetime to use the highest possible 541 lower then the required one */ 542 if (required_pkt_life > 1) 543 pkt_life = required_pkt_life - 1; 544 else 545 status = IB_NOT_FOUND; 546 } 547 break; 548 549 case 2: /* exact match */ 550 if (pkt_life < required_pkt_life) 551 status = IB_NOT_FOUND; 552 else 553 pkt_life = required_pkt_life; 554 break; 555 556 case 3: /* smallest available */ 557 /* can't be disqualified by this one */ 558 break; 559 560 default: 561 /* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */ 562 CL_ASSERT(FALSE); 563 status = IB_ERROR; 564 break; 565 } 566 } 567 568 if (status != IB_SUCCESS) 569 goto Exit; 570 571 /* 572 * set Pkey for this MultiPath record request 573 */ 574 575 if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC && 576 cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31)) 577 required_pkey = 578 osm_physp_find_common_pkey(p_src_physp, p_dest_physp); 579 580 else if (comp_mask & IB_MPR_COMPMASK_PKEY) { 581 /* 582 * MPR request has a specific pkey: 583 * Check that source and destination share this pkey. 584 * If QoS level has pkeys, check that this pkey exists 585 * in the QoS level pkeys. 586 * MPR returned pkey is the requested pkey. 587 */ 588 required_pkey = p_mpr->pkey; 589 if (!osm_physp_share_this_pkey 590 (p_src_physp, p_dest_physp, required_pkey)) { 591 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4518: " 592 "Ports do not share specified PKey 0x%04x\n" 593 "\t\tsrc %" PRIx64 " dst %" PRIx64 "\n", 594 cl_ntoh16(required_pkey), 595 cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), 596 cl_ntoh64(osm_physp_get_port_guid 597 (p_dest_physp))); 598 status = IB_NOT_FOUND; 599 goto Exit; 600 } 601 if (p_qos_level && p_qos_level->pkey_range_len && 602 !osm_qos_level_has_pkey(p_qos_level, required_pkey)) { 603 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451C: " 604 "Ports do not share PKeys defined by QoS level\n"); 605 status = IB_NOT_FOUND; 606 goto Exit; 607 } 608 609 } else if (p_qos_level && p_qos_level->pkey_range_len) { 610 /* 611 * MPR request doesn't have a specific pkey, but QoS level 612 * has pkeys - get shared pkey from QoS level pkeys 613 */ 614 required_pkey = osm_qos_level_get_shared_pkey(p_qos_level, 615 p_src_physp, 616 p_dest_physp); 617 if (!required_pkey) { 618 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451D: " 619 "Ports do not share PKeys defined by QoS level\n"); 620 status = IB_NOT_FOUND; 621 goto Exit; 622 } 623 624 } else { 625 /* 626 * Neither MPR request nor QoS level have pkey. 627 * Just get any shared pkey. 628 */ 629 required_pkey = 630 osm_physp_find_common_pkey(p_src_physp, p_dest_physp); 631 if (!required_pkey) { 632 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4519: " 633 "Ports do not have any shared PKeys\n" 634 "\t\tsrc %" PRIx64 " dst %" PRIx64 "\n", 635 cl_ntoh64(osm_physp_get_port_guid(p_physp)), 636 cl_ntoh64(osm_physp_get_port_guid 637 (p_dest_physp))); 638 status = IB_NOT_FOUND; 639 goto Exit; 640 } 641 } 642 643 if (required_pkey) { 644 p_prtn = 645 (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl, 646 required_pkey & 647 cl_ntoh16((uint16_t) ~ 0x8000)); 648 if (p_prtn == 649 (osm_prtn_t *) cl_qmap_end(&sa->p_subn->prtn_pkey_tbl)) 650 p_prtn = NULL; 651 } 652 653 /* 654 * Set MultiPathRecord SL. 655 */ 656 657 if (comp_mask & IB_MPR_COMPMASK_SL) { 658 /* 659 * Specific SL was requested 660 */ 661 required_sl = ib_multipath_rec_sl(p_mpr); 662 663 if (p_qos_level && p_qos_level->sl_set && 664 p_qos_level->sl != required_sl) { 665 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451E: " 666 "QoS constaraints: required MultiPathRecord SL (%u) " 667 "doesn't match QoS policy SL (%u)\n", 668 required_sl, p_qos_level->sl); 669 status = IB_NOT_FOUND; 670 goto Exit; 671 } 672 673 } else if (p_qos_level && p_qos_level->sl_set) { 674 /* 675 * No specific SL was requested, 676 * but there is an SL in QoS level. 677 */ 678 required_sl = p_qos_level->sl; 679 680 if (required_pkey && p_prtn && p_prtn->sl != p_qos_level->sl) 681 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 682 "QoS level SL (%u) overrides partition SL (%u)\n", 683 p_qos_level->sl, p_prtn->sl); 684 685 } else if (required_pkey) { 686 /* 687 * No specific SL in request or in QoS level - use partition SL 688 */ 689 p_prtn = 690 (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl, 691 required_pkey & 692 cl_ntoh16((uint16_t) ~ 0x8000)); 693 if (!p_prtn) { 694 required_sl = OSM_DEFAULT_SL; 695 /* this may be possible when pkey tables are created somehow in 696 previous runs or things are going wrong here */ 697 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451A: " 698 "No partition found for PKey 0x%04x - using default SL %d\n", 699 cl_ntoh16(required_pkey), required_sl); 700 } else 701 required_sl = p_prtn->sl; 702 703 } else if (sa->p_subn->opt.qos) { 704 if (valid_sl_mask & (1 << OSM_DEFAULT_SL)) 705 required_sl = OSM_DEFAULT_SL; 706 else { 707 for (i = 0; i < IB_MAX_NUM_VLS; i++) 708 if (valid_sl_mask & (1 << i)) 709 break; 710 required_sl = i; 711 } 712 } else 713 required_sl = OSM_DEFAULT_SL; 714 715 if (sa->p_subn->opt.qos && !(valid_sl_mask & (1 << required_sl))) { 716 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451F: " 717 "Selected SL (%u) leads to VL15\n", required_sl); 718 status = IB_NOT_FOUND; 719 goto Exit; 720 } 721 722 /* reset pkey when raw traffic */ 723 if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC && 724 cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31)) 725 required_pkey = 0; 726 727 p_parms->mtu = mtu; 728 p_parms->rate = rate; 729 p_parms->pkey = required_pkey; 730 p_parms->pkt_life = pkt_life; 731 p_parms->sl = required_sl; 732 p_parms->hops = hops; 733 734 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "MultiPath params:" 735 " mtu = %u, rate = %u, packet lifetime = %u," 736 " pkey = 0x%04X, sl = %u, hops = %u\n", mtu, rate, 737 pkt_life, cl_ntoh16(required_pkey), required_sl, hops); 738 739Exit: 740 OSM_LOG_EXIT(sa->p_log); 741 return (status); 742} 743 744/********************************************************************** 745 **********************************************************************/ 746static void 747__osm_mpr_rcv_build_pr(IN osm_sa_t * sa, 748 IN const osm_port_t * const p_src_port, 749 IN const osm_port_t * const p_dest_port, 750 IN const uint16_t src_lid_ho, 751 IN const uint16_t dest_lid_ho, 752 IN const uint8_t preference, 753 IN const osm_path_parms_t * const p_parms, 754 OUT ib_path_rec_t * const p_pr) 755{ 756 const osm_physp_t *p_src_physp; 757 const osm_physp_t *p_dest_physp; 758 759 OSM_LOG_ENTER(sa->p_log); 760 761 p_src_physp = p_src_port->p_physp; 762 p_dest_physp = p_dest_port->p_physp; 763 764 p_pr->dgid.unicast.prefix = osm_physp_get_subnet_prefix(p_dest_physp); 765 p_pr->dgid.unicast.interface_id = osm_physp_get_port_guid(p_dest_physp); 766 767 p_pr->sgid.unicast.prefix = osm_physp_get_subnet_prefix(p_src_physp); 768 p_pr->sgid.unicast.interface_id = osm_physp_get_port_guid(p_src_physp); 769 770 p_pr->dlid = cl_hton16(dest_lid_ho); 771 p_pr->slid = cl_hton16(src_lid_ho); 772 773 p_pr->hop_flow_raw &= cl_hton32(1 << 31); 774 775 p_pr->pkey = p_parms->pkey; 776 ib_path_rec_set_qos_class(p_pr, 0); 777 ib_path_rec_set_sl(p_pr, p_parms->sl); 778 p_pr->mtu = (uint8_t) (p_parms->mtu | 0x80); 779 p_pr->rate = (uint8_t) (p_parms->rate | 0x80); 780 781 /* According to 1.2 spec definition Table 205 PacketLifeTime description, 782 for loopback paths, packetLifeTime shall be zero. */ 783 if (p_src_port == p_dest_port) 784 p_pr->pkt_life = 0x80; /* loopback */ 785 else 786 p_pr->pkt_life = (uint8_t) (p_parms->pkt_life | 0x80); 787 788 p_pr->preference = preference; 789 790 /* always return num_path = 0 so this is only the reversible component */ 791 if (p_parms->reversible) 792 p_pr->num_path = 0x80; 793 794 OSM_LOG_EXIT(sa->p_log); 795} 796 797/********************************************************************** 798 **********************************************************************/ 799static osm_mpr_item_t * 800__osm_mpr_rcv_get_lid_pair_path(IN osm_sa_t * sa, 801 IN const ib_multipath_rec_t * const p_mpr, 802 IN const osm_port_t * const p_src_port, 803 IN const osm_port_t * const p_dest_port, 804 IN const uint16_t src_lid_ho, 805 IN const uint16_t dest_lid_ho, 806 IN const ib_net64_t comp_mask, 807 IN const uint8_t preference) 808{ 809 osm_path_parms_t path_parms; 810 osm_path_parms_t rev_path_parms; 811 osm_mpr_item_t *p_pr_item; 812 ib_api_status_t status, rev_path_status; 813 814 OSM_LOG_ENTER(sa->p_log); 815 816 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID %u, Dest LID %u\n", 817 src_lid_ho, dest_lid_ho); 818 819 p_pr_item = malloc(sizeof(*p_pr_item)); 820 if (p_pr_item == NULL) { 821 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4501: " 822 "Unable to allocate path record\n"); 823 goto Exit; 824 } 825 memset(p_pr_item, 0, sizeof(*p_pr_item)); 826 827 status = __osm_mpr_rcv_get_path_parms(sa, p_mpr, p_src_port, 828 p_dest_port, dest_lid_ho, 829 comp_mask, &path_parms); 830 831 if (status != IB_SUCCESS) { 832 free(p_pr_item); 833 p_pr_item = NULL; 834 goto Exit; 835 } 836 837 /* now try the reversible path */ 838 rev_path_status = 839 __osm_mpr_rcv_get_path_parms(sa, p_mpr, p_dest_port, p_src_port, 840 src_lid_ho, comp_mask, 841 &rev_path_parms); 842 path_parms.reversible = (rev_path_status == IB_SUCCESS); 843 844 /* did we get a Reversible Path compmask ? */ 845 /* 846 NOTE that if the reversible component = 0, it is a don't care 847 rather then requiring non-reversible paths ... 848 see Vol1 Ver1.2 p900 l16 849 */ 850 if (comp_mask & IB_MPR_COMPMASK_REVERSIBLE) { 851 if ((!path_parms.reversible && (p_mpr->num_path & 0x80))) { 852 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 853 "Requested reversible path but failed to get one\n"); 854 855 free(p_pr_item); 856 p_pr_item = NULL; 857 goto Exit; 858 } 859 } 860 861 p_pr_item->p_src_port = p_src_port; 862 p_pr_item->p_dest_port = p_dest_port; 863 p_pr_item->hops = path_parms.hops; 864 865 __osm_mpr_rcv_build_pr(sa, p_src_port, p_dest_port, src_lid_ho, 866 dest_lid_ho, preference, &path_parms, 867 &p_pr_item->path_rec); 868 869Exit: 870 OSM_LOG_EXIT(sa->p_log); 871 return (p_pr_item); 872} 873 874/********************************************************************** 875 **********************************************************************/ 876static uint32_t 877__osm_mpr_rcv_get_port_pair_paths(IN osm_sa_t * sa, 878 IN const ib_multipath_rec_t * const p_mpr, 879 IN const osm_port_t * const p_req_port, 880 IN const osm_port_t * const p_src_port, 881 IN const osm_port_t * const p_dest_port, 882 IN const uint32_t rem_paths, 883 IN const ib_net64_t comp_mask, 884 IN cl_qlist_t * const p_list) 885{ 886 osm_mpr_item_t *p_pr_item; 887 uint16_t src_lid_min_ho; 888 uint16_t src_lid_max_ho; 889 uint16_t dest_lid_min_ho; 890 uint16_t dest_lid_max_ho; 891 uint16_t src_lid_ho; 892 uint16_t dest_lid_ho; 893 uint32_t path_num = 0; 894 uint8_t preference; 895 uintn_t src_offset; 896 uintn_t dest_offset; 897 898 OSM_LOG_ENTER(sa->p_log); 899 900 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 901 "Src port 0x%016" PRIx64 ", Dst port 0x%016" PRIx64 "\n", 902 cl_ntoh64(osm_port_get_guid(p_src_port)), 903 cl_ntoh64(osm_port_get_guid(p_dest_port))); 904 905 /* Check that the req_port, src_port and dest_port all share a 906 pkey. The check is done on the default physical port of the ports. */ 907 if (osm_port_share_pkey(sa->p_log, p_req_port, p_src_port) == FALSE 908 || osm_port_share_pkey(sa->p_log, p_req_port, 909 p_dest_port) == FALSE 910 || osm_port_share_pkey(sa->p_log, p_src_port, 911 p_dest_port) == FALSE) 912 /* One of the pairs doesn't share a pkey so the path is disqualified. */ 913 goto Exit; 914 915 /* 916 We shouldn't be here if the paths are disqualified in some way... 917 Thus, we assume every possible connection is valid. 918 919 We desire to return high-quality paths first. 920 In OpenSM, higher quality mean least overlap with other paths. 921 This is acheived in practice by returning paths with 922 different LID value on each end, which means these 923 paths are more redundant that paths with the same LID repeated 924 on one side. For example, in OpenSM the paths between two 925 endpoints with LMC = 1 might be as follows: 926 927 Port A, LID 1 <-> Port B, LID 3 928 Port A, LID 1 <-> Port B, LID 4 929 Port A, LID 2 <-> Port B, LID 3 930 Port A, LID 2 <-> Port B, LID 4 931 932 The OpenSM unicast routing algorithms attempt to disperse each path 933 to as varied a physical path as is reasonable. 1<->3 and 1<->4 have 934 more physical overlap (hence less redundancy) than 1<->3 and 2<->4. 935 936 OpenSM ranks paths in three preference groups: 937 938 Preference Value Description 939 ---------------- ------------------------------------------- 940 0 Redundant in both directions with other 941 pref value = 0 paths 942 943 1 Redundant in one direction with other 944 pref value = 0 and pref value = 1 paths 945 946 2 Not redundant in either direction with 947 other paths 948 949 3-FF Unused 950 951 SA clients don't need to know these details, only that the lower 952 preference paths are preferred, as stated in the spec. The paths 953 may not actually be physically redundant depending on the topology 954 of the subnet, but the point of LMC > 0 is to offer redundancy, 955 so I assume the subnet is physically appropriate for the specified 956 LMC value. A more advanced implementation could inspect for physical 957 redundancy, but I'm not going to bother with that now. 958 */ 959 960 osm_port_get_lid_range_ho(p_src_port, &src_lid_min_ho, &src_lid_max_ho); 961 osm_port_get_lid_range_ho(p_dest_port, &dest_lid_min_ho, 962 &dest_lid_max_ho); 963 964 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID [%u-%u], Dest LID [%u-%u]\n", 965 src_lid_min_ho, src_lid_max_ho, 966 dest_lid_min_ho, dest_lid_max_ho); 967 968 src_lid_ho = src_lid_min_ho; 969 dest_lid_ho = dest_lid_min_ho; 970 971 /* 972 Preferred paths come first in OpenSM 973 */ 974 preference = 0; 975 976 while (path_num < rem_paths) { 977 /* 978 These paths are "fully redundant" 979 */ 980 p_pr_item = __osm_mpr_rcv_get_lid_pair_path(sa, p_mpr, 981 p_src_port, 982 p_dest_port, 983 src_lid_ho, 984 dest_lid_ho, 985 comp_mask, 986 preference); 987 988 if (p_pr_item) { 989 cl_qlist_insert_tail(p_list, &p_pr_item->list_item); 990 ++path_num; 991 } 992 993 if (++src_lid_ho > src_lid_max_ho) 994 break; 995 996 if (++dest_lid_ho > dest_lid_max_ho) 997 break; 998 } 999 1000 /* 1001 Check if we've accumulated all the paths that the user cares to see 1002 */ 1003 if (path_num == rem_paths) 1004 goto Exit; 1005 1006 /* 1007 Don't bother reporting preference 1 paths for now. 1008 It's more trouble than it's worth and can only occur 1009 if ports have different LMC values, which isn't supported 1010 by OpenSM right now anyway. 1011 */ 1012 preference = 2; 1013 src_lid_ho = src_lid_min_ho; 1014 dest_lid_ho = dest_lid_min_ho; 1015 src_offset = 0; 1016 dest_offset = 0; 1017 1018 /* 1019 Iterate over the remaining paths 1020 */ 1021 while (path_num < rem_paths) { 1022 dest_offset++; 1023 dest_lid_ho++; 1024 1025 if (dest_lid_ho > dest_lid_max_ho) { 1026 src_offset++; 1027 src_lid_ho++; 1028 1029 if (src_lid_ho > src_lid_max_ho) 1030 break; /* done */ 1031 1032 dest_offset = 0; 1033 dest_lid_ho = dest_lid_min_ho; 1034 } 1035 1036 /* 1037 These paths are "fully non-redundant" with paths already 1038 identified above and consequently not of much value. 1039 1040 Don't return paths we already identified above, as indicated 1041 by the offset values being equal. 1042 */ 1043 if (src_offset == dest_offset) 1044 continue; /* already reported */ 1045 1046 p_pr_item = __osm_mpr_rcv_get_lid_pair_path(sa, p_mpr, 1047 p_src_port, 1048 p_dest_port, 1049 src_lid_ho, 1050 dest_lid_ho, 1051 comp_mask, 1052 preference); 1053 1054 if (p_pr_item) { 1055 cl_qlist_insert_tail(p_list, &p_pr_item->list_item); 1056 ++path_num; 1057 } 1058 } 1059 1060Exit: 1061 OSM_LOG_EXIT(sa->p_log); 1062 return path_num; 1063} 1064 1065#undef min 1066#define min(x,y) (((x) < (y)) ? (x) : (y)) 1067 1068/********************************************************************** 1069 **********************************************************************/ 1070static osm_mpr_item_t * 1071__osm_mpr_rcv_get_apm_port_pair_paths(IN osm_sa_t * sa, 1072 IN const ib_multipath_rec_t * const p_mpr, 1073 IN const osm_port_t * const p_src_port, 1074 IN const osm_port_t * const p_dest_port, 1075 IN int base_offs, 1076 IN const ib_net64_t comp_mask, 1077 IN cl_qlist_t * const p_list) 1078{ 1079 osm_mpr_item_t *p_pr_item = 0; 1080 uint16_t src_lid_min_ho; 1081 uint16_t src_lid_max_ho; 1082 uint16_t dest_lid_min_ho; 1083 uint16_t dest_lid_max_ho; 1084 uint16_t src_lid_ho; 1085 uint16_t dest_lid_ho; 1086 uintn_t iterations; 1087 int src_lids, dest_lids; 1088 1089 OSM_LOG_ENTER(sa->p_log); 1090 1091 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src port 0x%016" PRIx64 ", " 1092 "Dst port 0x%016" PRIx64 ", base offs %d\n", 1093 cl_ntoh64(osm_port_get_guid(p_src_port)), 1094 cl_ntoh64(osm_port_get_guid(p_dest_port)), base_offs); 1095 1096 osm_port_get_lid_range_ho(p_src_port, &src_lid_min_ho, &src_lid_max_ho); 1097 osm_port_get_lid_range_ho(p_dest_port, &dest_lid_min_ho, 1098 &dest_lid_max_ho); 1099 1100 src_lid_ho = src_lid_min_ho; 1101 dest_lid_ho = dest_lid_min_ho; 1102 1103 src_lids = src_lid_max_ho - src_lid_min_ho + 1; 1104 dest_lids = dest_lid_max_ho - dest_lid_min_ho + 1; 1105 1106 src_lid_ho += base_offs % src_lids; 1107 dest_lid_ho += base_offs % dest_lids; 1108 1109 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 1110 "Src LIDs [%u-%u] hashed %u, " 1111 "Dest LIDs [%u-%u] hashed %u\n", 1112 src_lid_min_ho, src_lid_max_ho, src_lid_ho, 1113 dest_lid_min_ho, dest_lid_max_ho, dest_lid_ho); 1114 1115 iterations = min(src_lids, dest_lids); 1116 1117 while (iterations--) { 1118 /* 1119 These paths are "fully redundant" 1120 */ 1121 p_pr_item = __osm_mpr_rcv_get_lid_pair_path(sa, p_mpr, 1122 p_src_port, 1123 p_dest_port, 1124 src_lid_ho, 1125 dest_lid_ho, 1126 comp_mask, 0); 1127 1128 if (p_pr_item) { 1129 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 1130 "Found matching path from Src LID %u to Dest LID %u with %d hops\n", 1131 src_lid_ho, dest_lid_ho, p_pr_item->hops); 1132 break; 1133 } 1134 1135 if (++src_lid_ho > src_lid_max_ho) 1136 src_lid_ho = src_lid_min_ho; 1137 1138 if (++dest_lid_ho > dest_lid_max_ho) 1139 dest_lid_ho = dest_lid_min_ho; 1140 } 1141 1142 OSM_LOG_EXIT(sa->p_log); 1143 return p_pr_item; 1144} 1145 1146/********************************************************************** 1147 **********************************************************************/ 1148static ib_net16_t 1149__osm_mpr_rcv_get_gids(IN osm_sa_t * sa, 1150 IN const ib_gid_t * gids, 1151 IN int ngids, IN int is_sgid, OUT osm_port_t ** pp_port) 1152{ 1153 osm_port_t *p_port; 1154 ib_net16_t ib_status = IB_SUCCESS; 1155 int i; 1156 1157 OSM_LOG_ENTER(sa->p_log); 1158 1159 for (i = 0; i < ngids; i++, gids++) { 1160 if (!ib_gid_is_link_local(gids)) { 1161 if ((is_sgid && ib_gid_is_multicast(gids)) || 1162 (ib_gid_get_subnet_prefix(gids) != 1163 sa->p_subn->opt.subnet_prefix)) { 1164 /* 1165 This 'error' is the client's fault (bad gid) 1166 so don't enter it as an error in our own log. 1167 Return an error response to the client. 1168 */ 1169 OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "ERR 451B: " 1170 "%sGID 0x%016" PRIx64 1171 " is multicast or non local subnet prefix\n", 1172 is_sgid ? "S" : "D", 1173 cl_ntoh64(gids->unicast.prefix)); 1174 1175 ib_status = IB_SA_MAD_STATUS_INVALID_GID; 1176 goto Exit; 1177 } 1178 } 1179 1180 p_port = 1181 osm_get_port_by_guid(sa->p_subn, 1182 gids->unicast.interface_id); 1183 if (!p_port) { 1184 /* 1185 This 'error' is the client's fault (bad gid) so 1186 don't enter it as an error in our own log. 1187 Return an error response to the client. 1188 */ 1189 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4506: " 1190 "No port with GUID 0x%016" PRIx64 "\n", 1191 cl_ntoh64(gids->unicast.interface_id)); 1192 1193 ib_status = IB_SA_MAD_STATUS_INVALID_GID; 1194 goto Exit; 1195 } 1196 1197 pp_port[i] = p_port; 1198 } 1199 1200Exit: 1201 OSM_LOG_EXIT(sa->p_log); 1202 1203 return ib_status; 1204} 1205 1206/********************************************************************** 1207 **********************************************************************/ 1208static ib_net16_t 1209__osm_mpr_rcv_get_end_points(IN osm_sa_t * sa, 1210 IN const osm_madw_t * const p_madw, 1211 OUT osm_port_t ** pp_ports, 1212 OUT int *nsrc, OUT int *ndest) 1213{ 1214 const ib_multipath_rec_t *p_mpr; 1215 const ib_sa_mad_t *p_sa_mad; 1216 ib_net64_t comp_mask; 1217 ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS; 1218 ib_gid_t *gids; 1219 1220 OSM_LOG_ENTER(sa->p_log); 1221 1222 /* 1223 Determine what fields are valid and then get a pointer 1224 to the source and destination port objects, if possible. 1225 */ 1226 p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); 1227 p_mpr = (ib_multipath_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); 1228 gids = (ib_gid_t *) p_mpr->gids; 1229 1230 comp_mask = p_sa_mad->comp_mask; 1231 1232 /* 1233 Check a few easy disqualifying cases up front before getting 1234 into the endpoints. 1235 */ 1236 *nsrc = *ndest = 0; 1237 1238 if (comp_mask & IB_MPR_COMPMASK_SGIDCOUNT) { 1239 *nsrc = p_mpr->sgid_count; 1240 if (*nsrc > IB_MULTIPATH_MAX_GIDS) 1241 *nsrc = IB_MULTIPATH_MAX_GIDS; 1242 sa_status = 1243 __osm_mpr_rcv_get_gids(sa, gids, *nsrc, 1, pp_ports); 1244 if (sa_status != IB_SUCCESS) 1245 goto Exit; 1246 } 1247 1248 if (comp_mask & IB_MPR_COMPMASK_DGIDCOUNT) { 1249 *ndest = p_mpr->dgid_count; 1250 if (*ndest + *nsrc > IB_MULTIPATH_MAX_GIDS) 1251 *ndest = IB_MULTIPATH_MAX_GIDS - *nsrc; 1252 sa_status = 1253 __osm_mpr_rcv_get_gids(sa, gids + *nsrc, *ndest, 0, 1254 pp_ports + *nsrc); 1255 } 1256 1257Exit: 1258 OSM_LOG_EXIT(sa->p_log); 1259 return (sa_status); 1260} 1261 1262#define __hash_lids(a, b, lmc) \ 1263 (((((a) >> (lmc)) << 4) | ((b) >> (lmc))) % 103) 1264 1265/********************************************************************** 1266 **********************************************************************/ 1267static void 1268__osm_mpr_rcv_get_apm_paths(IN osm_sa_t * sa, 1269 IN const ib_multipath_rec_t * const p_mpr, 1270 IN const osm_port_t * const p_req_port, 1271 IN osm_port_t ** _pp_ports, 1272 IN const ib_net64_t comp_mask, 1273 IN cl_qlist_t * const p_list) 1274{ 1275 osm_port_t *pp_ports[4]; 1276 osm_mpr_item_t *matrix[2][2]; 1277 int base_offs, src_lid_ho, dest_lid_ho; 1278 int sumA, sumB, minA, minB; 1279 1280 OSM_LOG_ENTER(sa->p_log); 1281 1282 /* 1283 * We want to: 1284 * 1. use different lid offsets (from base) for the resultant paths 1285 * to increase the probability of redundant paths or in case 1286 * of Clos - to ensure it (different offset => different spine!) 1287 * 2. keep consistent paths no matter of direction and order of ports 1288 * 3. distibute the lid offsets to balance the load 1289 * So, we sort the ports (within the srcs, and within the dests), 1290 * hash the lids of S0, D0 (after the sort), and call __osm_mpr_rcv_get_apm_port_pair_paths 1291 * with base_lid for S0, D0 and base_lid + 1 for S1, D1. This way we will get 1292 * always the same offsets - order indepentent, and make sure different spines are used. 1293 * Note that the diagonals on a Clos have the same number of hops, so it doesn't 1294 * really matter which diagonal we use. 1295 */ 1296 if (_pp_ports[0]->guid < _pp_ports[1]->guid) { 1297 pp_ports[0] = _pp_ports[0]; 1298 pp_ports[1] = _pp_ports[1]; 1299 } else { 1300 pp_ports[0] = _pp_ports[1]; 1301 pp_ports[1] = _pp_ports[0]; 1302 } 1303 if (_pp_ports[2]->guid < _pp_ports[3]->guid) { 1304 pp_ports[2] = _pp_ports[2]; 1305 pp_ports[3] = _pp_ports[3]; 1306 } else { 1307 pp_ports[2] = _pp_ports[3]; 1308 pp_ports[3] = _pp_ports[2]; 1309 } 1310 1311 src_lid_ho = osm_port_get_base_lid(pp_ports[0]); 1312 dest_lid_ho = osm_port_get_base_lid(pp_ports[2]); 1313 1314 base_offs = src_lid_ho < dest_lid_ho ? 1315 __hash_lids(src_lid_ho, dest_lid_ho, sa->p_subn->opt.lmc) : 1316 __hash_lids(dest_lid_ho, src_lid_ho, sa->p_subn->opt.lmc); 1317 1318 matrix[0][0] = 1319 __osm_mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_ports[0], 1320 pp_ports[2], base_offs, 1321 comp_mask, p_list); 1322 matrix[0][1] = 1323 __osm_mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_ports[0], 1324 pp_ports[3], base_offs, 1325 comp_mask, p_list); 1326 matrix[1][0] = 1327 __osm_mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_ports[1], 1328 pp_ports[2], base_offs + 1, 1329 comp_mask, p_list); 1330 matrix[1][1] = 1331 __osm_mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_ports[1], 1332 pp_ports[3], base_offs + 1, 1333 comp_mask, p_list); 1334 1335 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "APM matrix:\n" 1336 "\t{0,0} 0x%X->0x%X (%d)\t| {0,1} 0x%X->0x%X (%d)\n" 1337 "\t{1,0} 0x%X->0x%X (%d)\t| {1,1} 0x%X->0x%X (%d)\n", 1338 matrix[0][0]->path_rec.slid, matrix[0][0]->path_rec.dlid, 1339 matrix[0][0]->hops, matrix[0][1]->path_rec.slid, 1340 matrix[0][1]->path_rec.dlid, matrix[0][1]->hops, 1341 matrix[1][0]->path_rec.slid, matrix[1][0]->path_rec.dlid, 1342 matrix[1][0]->hops, matrix[1][1]->path_rec.slid, 1343 matrix[1][1]->path_rec.dlid, matrix[1][1]->hops); 1344 1345 /* check diagonal A {(0,0), (1,1)} */ 1346 sumA = matrix[0][0]->hops + matrix[1][1]->hops; 1347 minA = min(matrix[0][0]->hops, matrix[1][1]->hops); 1348 1349 /* check diagonal B {(0,1), (1,0)} */ 1350 sumB = matrix[0][1]->hops + matrix[1][0]->hops; 1351 minB = min(matrix[0][1]->hops, matrix[1][0]->hops); 1352 1353 /* and the winner is... */ 1354 if (minA <= minB || (minA == minB && sumA < sumB)) { 1355 /* Diag A */ 1356 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 1357 "Diag {0,0} & {1,1} is the best:\n" 1358 "\t{0,0} 0x%X->0x%X (%d)\t & {1,1} 0x%X->0x%X (%d)\n", 1359 matrix[0][0]->path_rec.slid, 1360 matrix[0][0]->path_rec.dlid, matrix[0][0]->hops, 1361 matrix[1][1]->path_rec.slid, 1362 matrix[1][1]->path_rec.dlid, matrix[1][1]->hops); 1363 cl_qlist_insert_tail(p_list, &matrix[0][0]->list_item); 1364 cl_qlist_insert_tail(p_list, &matrix[1][1]->list_item); 1365 free(matrix[0][1]); 1366 free(matrix[1][0]); 1367 } else { 1368 /* Diag B */ 1369 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 1370 "Diag {0,1} & {1,0} is the best:\n" 1371 "\t{0,1} 0x%X->0x%X (%d)\t & {1,0} 0x%X->0x%X (%d)\n", 1372 matrix[0][1]->path_rec.slid, 1373 matrix[0][1]->path_rec.dlid, matrix[0][1]->hops, 1374 matrix[1][0]->path_rec.slid, 1375 matrix[1][0]->path_rec.dlid, matrix[1][0]->hops); 1376 cl_qlist_insert_tail(p_list, &matrix[0][1]->list_item); 1377 cl_qlist_insert_tail(p_list, &matrix[1][0]->list_item); 1378 free(matrix[0][0]); 1379 free(matrix[1][1]); 1380 } 1381 1382 OSM_LOG_EXIT(sa->p_log); 1383} 1384 1385/********************************************************************** 1386 **********************************************************************/ 1387static void 1388__osm_mpr_rcv_process_pairs(IN osm_sa_t * sa, 1389 IN const ib_multipath_rec_t * const p_mpr, 1390 IN osm_port_t * const p_req_port, 1391 IN osm_port_t ** pp_ports, 1392 IN const int nsrc, 1393 IN const int ndest, 1394 IN const ib_net64_t comp_mask, 1395 IN cl_qlist_t * const p_list) 1396{ 1397 osm_port_t **pp_src_port, **pp_es; 1398 osm_port_t **pp_dest_port, **pp_ed; 1399 uint32_t max_paths, num_paths, total_paths = 0; 1400 1401 OSM_LOG_ENTER(sa->p_log); 1402 1403 if (comp_mask & IB_MPR_COMPMASK_NUMBPATH) 1404 max_paths = p_mpr->num_path & 0x7F; 1405 else 1406 max_paths = OSM_SA_MPR_MAX_NUM_PATH; 1407 1408 for (pp_src_port = pp_ports, pp_es = pp_ports + nsrc; 1409 pp_src_port < pp_es; pp_src_port++) { 1410 for (pp_dest_port = pp_es, pp_ed = pp_es + ndest; 1411 pp_dest_port < pp_ed; pp_dest_port++) { 1412 num_paths = 1413 __osm_mpr_rcv_get_port_pair_paths(sa, p_mpr, 1414 p_req_port, 1415 *pp_src_port, 1416 *pp_dest_port, 1417 max_paths - 1418 total_paths, 1419 comp_mask, 1420 p_list); 1421 total_paths += num_paths; 1422 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 1423 "%d paths %d total paths %d max paths\n", 1424 num_paths, total_paths, max_paths); 1425 /* Just take first NumbPaths found */ 1426 if (total_paths >= max_paths) 1427 goto Exit; 1428 } 1429 } 1430 1431Exit: 1432 OSM_LOG_EXIT(sa->p_log); 1433} 1434 1435/********************************************************************** 1436 **********************************************************************/ 1437void osm_mpr_rcv_process(IN void *context, IN void *data) 1438{ 1439 osm_sa_t *sa = context; 1440 osm_madw_t *p_madw = data; 1441 const ib_multipath_rec_t *p_mpr; 1442 ib_sa_mad_t *p_sa_mad; 1443 osm_port_t *requester_port; 1444 osm_port_t *pp_ports[IB_MULTIPATH_MAX_GIDS]; 1445 cl_qlist_t pr_list; 1446 ib_net16_t sa_status; 1447 int nsrc, ndest; 1448 1449 OSM_LOG_ENTER(sa->p_log); 1450 1451 CL_ASSERT(p_madw); 1452 1453 p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); 1454 p_mpr = (ib_multipath_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); 1455 1456 CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_MULTIPATH_RECORD); 1457 1458 if ((p_sa_mad->rmpp_flags & IB_RMPP_FLAG_ACTIVE) != IB_RMPP_FLAG_ACTIVE) { 1459 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4510: " 1460 "Invalid request since RMPP_FLAG_ACTIVE is not set\n"); 1461 osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); 1462 goto Exit; 1463 } 1464 1465 /* we only support SubnAdmGetMulti method */ 1466 if (p_sa_mad->method != IB_MAD_METHOD_GETMULTI) { 1467 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4513: " 1468 "Unsupported Method (%s)\n", 1469 ib_get_sa_method_str(p_sa_mad->method)); 1470 osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); 1471 goto Exit; 1472 } 1473 1474 /* update the requester physical port. */ 1475 requester_port = osm_get_port_by_mad_addr(sa->p_log, sa->p_subn, 1476 osm_madw_get_mad_addr_ptr 1477 (p_madw)); 1478 if (requester_port == NULL) { 1479 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4517: " 1480 "Cannot find requester physical port\n"); 1481 goto Exit; 1482 } 1483 1484 if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) 1485 osm_dump_multipath_record(sa->p_log, p_mpr, OSM_LOG_DEBUG); 1486 1487 cl_qlist_init(&pr_list); 1488 1489 /* 1490 Most SA functions (including this one) are read-only on the 1491 subnet object, so we grab the lock non-exclusively. 1492 */ 1493 cl_plock_acquire(sa->p_lock); 1494 1495 sa_status = __osm_mpr_rcv_get_end_points(sa, p_madw, pp_ports, 1496 &nsrc, &ndest); 1497 1498 if (sa_status != IB_SA_MAD_STATUS_SUCCESS || !nsrc || !ndest) { 1499 if (sa_status == IB_SA_MAD_STATUS_SUCCESS && (!nsrc || !ndest)) 1500 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4512: " 1501 "__osm_mpr_rcv_get_end_points failed, not enough GIDs " 1502 "(nsrc %d ndest %d)\n", nsrc, ndest); 1503 cl_plock_release(sa->p_lock); 1504 if (sa_status == IB_SA_MAD_STATUS_SUCCESS) 1505 osm_sa_send_error(sa, p_madw, 1506 IB_SA_MAD_STATUS_REQ_INVALID); 1507 else 1508 osm_sa_send_error(sa, p_madw, sa_status); 1509 goto Exit; 1510 } 1511 1512 /* APM request */ 1513 if (nsrc == 2 && ndest == 2 && (p_mpr->num_path & 0x7F) == 2) 1514 __osm_mpr_rcv_get_apm_paths(sa, p_mpr, requester_port, 1515 pp_ports, p_sa_mad->comp_mask, 1516 &pr_list); 1517 else 1518 __osm_mpr_rcv_process_pairs(sa, p_mpr, requester_port, 1519 pp_ports, nsrc, ndest, 1520 p_sa_mad->comp_mask, &pr_list); 1521 1522 cl_plock_release(sa->p_lock); 1523 1524 /* o15-0.2.7: If MultiPath is supported, then SA shall respond to a 1525 SubnAdmGetMulti() containing a valid MultiPathRecord attribute with 1526 a set of zero or more PathRecords satisfying the constraints 1527 indicated in the MultiPathRecord received. The PathRecord Attribute 1528 ID shall be used in the response. 1529 */ 1530 p_sa_mad->attr_id = IB_MAD_ATTR_PATH_RECORD; 1531 osm_sa_respond(sa, p_madw, sizeof(ib_path_rec_t), &pr_list); 1532 1533Exit: 1534 OSM_LOG_EXIT(sa->p_log); 1535} 1536#endif 1537