1/*
2 * Copyright (c) 2006-2008 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 *
34 */
35
36/*
37 * Abstract:
38 * 	Implementation of osm_mpr_rcv_t.
39 *	This object represents the MultiPath Record Receiver object.
40 *	This object is part of the opensm family of objects.
41 */
42
43#if HAVE_CONFIG_H
44#  include <config.h>
45#endif				/* HAVE_CONFIG_H */
46
47#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP)
48
49#include <string.h>
50#include <iba/ib_types.h>
51#include <complib/cl_qmap.h>
52#include <complib/cl_passivelock.h>
53#include <complib/cl_debug.h>
54#include <complib/cl_qlist.h>
55#include <vendor/osm_vendor_api.h>
56#include <opensm/osm_port.h>
57#include <opensm/osm_node.h>
58#include <opensm/osm_switch.h>
59#include <opensm/osm_partition.h>
60#include <opensm/osm_helper.h>
61#include <opensm/osm_qos_policy.h>
62#include <opensm/osm_sa.h>
63
64#define OSM_SA_MPR_MAX_NUM_PATH        127
65
66typedef struct osm_mpr_item {
67	cl_list_item_t list_item;
68	ib_path_rec_t path_rec;
69	const osm_port_t *p_src_port;
70	const osm_port_t *p_dest_port;
71	int hops;
72} osm_mpr_item_t;
73
74typedef struct osm_path_parms {
75	ib_net16_t pkey;
76	uint8_t mtu;
77	uint8_t rate;
78	uint8_t sl;
79	uint8_t pkt_life;
80	boolean_t reversible;
81	int hops;
82} osm_path_parms_t;
83
84/**********************************************************************
85 **********************************************************************/
86static inline boolean_t
87__osm_sa_multipath_rec_is_tavor_port(IN const osm_port_t * const p_port)
88{
89	osm_node_t const *p_node;
90	ib_net32_t vend_id;
91
92	p_node = p_port->p_node;
93	vend_id = ib_node_info_get_vendor_id(&p_node->node_info);
94
95	return ((p_node->node_info.device_id == CL_HTON16(23108)) &&
96		((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) ||
97		 (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) ||
98		 (vend_id == CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) ||
99		 (vend_id == CL_HTON32(OSM_VENDOR_ID_VOLTAIRE))));
100}
101
102/**********************************************************************
103 **********************************************************************/
104static boolean_t
105__osm_sa_multipath_rec_apply_tavor_mtu_limit(IN const ib_multipath_rec_t *
106					     const p_mpr,
107					     IN const osm_port_t *
108					     const p_src_port,
109					     IN const osm_port_t *
110					     const p_dest_port,
111					     IN const ib_net64_t comp_mask)
112{
113	uint8_t required_mtu;
114
115	/* only if at least one of the ports is a Tavor device */
116	if (!__osm_sa_multipath_rec_is_tavor_port(p_src_port) &&
117	    !__osm_sa_multipath_rec_is_tavor_port(p_dest_port))
118		return (FALSE);
119
120	/*
121	   we can apply the patch if either:
122	   1. No MTU required
123	   2. Required MTU <
124	   3. Required MTU = 1K or 512 or 256
125	   4. Required MTU > 256 or 512
126	 */
127	required_mtu = ib_multipath_rec_mtu(p_mpr);
128	if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) &&
129	    (comp_mask & IB_MPR_COMPMASK_MTU)) {
130		switch (ib_multipath_rec_mtu_sel(p_mpr)) {
131		case 0:	/* must be greater than */
132		case 2:	/* exact match */
133			if (IB_MTU_LEN_1024 < required_mtu)
134				return (FALSE);
135			break;
136
137		case 1:	/* must be less than */
138			/* can't be disqualified by this one */
139			break;
140
141		case 3:	/* largest available */
142			/* the ULP intentionally requested */
143			/* the largest MTU possible */
144			return (FALSE);
145			break;
146
147		default:
148			/* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */
149			CL_ASSERT(FALSE);
150			break;
151		}
152	}
153
154	return (TRUE);
155}
156
157/**********************************************************************
158 **********************************************************************/
159static ib_api_status_t
160__osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa,
161			     IN const ib_multipath_rec_t * const p_mpr,
162			     IN const osm_port_t * const p_src_port,
163			     IN const osm_port_t * const p_dest_port,
164			     IN const uint16_t dest_lid_ho,
165			     IN const ib_net64_t comp_mask,
166			     OUT osm_path_parms_t * const p_parms)
167{
168	const osm_node_t *p_node;
169	const osm_physp_t *p_physp;
170	const osm_physp_t *p_src_physp;
171	const osm_physp_t *p_dest_physp;
172	const osm_prtn_t *p_prtn = NULL;
173	const ib_port_info_t *p_pi;
174	ib_slvl_table_t *p_slvl_tbl;
175	ib_api_status_t status = IB_SUCCESS;
176	uint8_t mtu;
177	uint8_t rate;
178	uint8_t pkt_life;
179	uint8_t required_mtu;
180	uint8_t required_rate;
181	ib_net16_t required_pkey;
182	uint8_t required_sl;
183	uint8_t required_pkt_life;
184	ib_net16_t dest_lid;
185	int hops = 0;
186	int in_port_num = 0;
187	uint8_t i;
188	osm_qos_level_t *p_qos_level = NULL;
189	uint16_t valid_sl_mask = 0xffff;
190
191	OSM_LOG_ENTER(sa->p_log);
192
193	dest_lid = cl_hton16(dest_lid_ho);
194
195	p_dest_physp = p_dest_port->p_physp;
196	p_physp = p_src_port->p_physp;
197	p_src_physp = p_physp;
198	p_pi = &p_physp->port_info;
199
200	mtu = ib_port_info_get_mtu_cap(p_pi);
201	rate = ib_port_info_compute_rate(p_pi);
202
203	/*
204	   Mellanox Tavor device performance is better using 1K MTU.
205	   If required MTU and MTU selector are such that 1K is OK
206	   and at least one end of the path is Tavor we override the
207	   port MTU with 1K.
208	 */
209	if (sa->p_subn->opt.enable_quirks &&
210	    __osm_sa_multipath_rec_apply_tavor_mtu_limit(p_mpr, p_src_port,
211							 p_dest_port,
212							 comp_mask))
213		if (mtu > IB_MTU_LEN_1024) {
214			mtu = IB_MTU_LEN_1024;
215			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
216				"Optimized Path MTU to 1K for Mellanox Tavor device\n");
217		}
218
219	/*
220	   Walk the subnet object from source to destination,
221	   tracking the most restrictive rate and mtu values along the way...
222
223	   If source port node is a switch, then p_physp should
224	   point to the port that routes the destination lid
225	 */
226
227	p_node = osm_physp_get_node_ptr(p_physp);
228
229	if (p_node->sw) {
230		/*
231		 * Source node is a switch.
232		 * Make sure that p_physp points to the out port of the
233		 * switch that routes to the destination lid (dest_lid_ho)
234		 */
235		p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
236		if (p_physp == 0) {
237			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4514: "
238				"Can't find routing to LID %u from switch for GUID 0x%016"
239				PRIx64 "\n", dest_lid_ho,
240				cl_ntoh64(osm_node_get_node_guid(p_node)));
241			status = IB_NOT_FOUND;
242			goto Exit;
243		}
244	}
245
246	if (sa->p_subn->opt.qos) {
247
248		/*
249		 * Whether this node is switch or CA, the IN port for
250		 * the sl2vl table is 0, because this is a source node.
251		 */
252		p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0);
253
254		/* update valid SLs that still exist on this route */
255		for (i = 0; i < IB_MAX_NUM_VLS; i++) {
256			if (valid_sl_mask & (1 << i) &&
257			    ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL)
258				valid_sl_mask &= ~(1 << i);
259		}
260		if (!valid_sl_mask) {
261			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
262				"All the SLs lead to VL15 on this path\n");
263			status = IB_NOT_FOUND;
264			goto Exit;
265		}
266	}
267
268	/*
269	 * Same as above
270	 */
271	p_node = osm_physp_get_node_ptr(p_dest_physp);
272
273	if (p_node->sw) {
274		/*
275		 * if destination is switch, we want p_dest_physp to point to port 0
276		 */
277		p_dest_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
278
279		if (p_dest_physp == 0) {
280			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4515: "
281				"Can't find routing to LID %u from switch for GUID 0x%016"
282				PRIx64 "\n", dest_lid_ho,
283				cl_ntoh64(osm_node_get_node_guid(p_node)));
284			status = IB_NOT_FOUND;
285			goto Exit;
286		}
287
288	}
289
290	/*
291	 * Now go through the path step by step
292	 */
293
294	while (p_physp != p_dest_physp) {
295
296		p_node = osm_physp_get_node_ptr(p_physp);
297		p_physp = osm_physp_get_remote(p_physp);
298
299		if (p_physp == 0) {
300			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4505: "
301				"Can't find remote phys port when routing to LID %u from node GUID 0x%016"
302				PRIx64 "\n", dest_lid_ho,
303				cl_ntoh64(osm_node_get_node_guid(p_node)));
304			status = IB_ERROR;
305			goto Exit;
306		}
307
308		hops++;
309		in_port_num = osm_physp_get_port_num(p_physp);
310
311		/*
312		   This is point to point case (no switch in between)
313		 */
314		if (p_physp == p_dest_physp)
315			break;
316
317		p_node = osm_physp_get_node_ptr(p_physp);
318
319		if (!p_node->sw) {
320			/*
321			   There is some sort of problem in the subnet object!
322			   If this isn't a switch, we should have reached
323			   the destination by now!
324			 */
325			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4503: "
326				"Internal error, bad path\n");
327			status = IB_ERROR;
328			goto Exit;
329		}
330
331		/*
332		   Check parameters for the ingress port in this switch.
333		 */
334		p_pi = &p_physp->port_info;
335
336		if (mtu > ib_port_info_get_mtu_cap(p_pi))
337			mtu = ib_port_info_get_mtu_cap(p_pi);
338
339		if (rate > ib_port_info_compute_rate(p_pi))
340			rate = ib_port_info_compute_rate(p_pi);
341
342		/*
343		   Continue with the egress port on this switch.
344		 */
345		p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
346		if (p_physp == 0) {
347			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4516: "
348				"Dead end on path to LID %u from switch for GUID 0x%016"
349				PRIx64 "\n", dest_lid_ho,
350				cl_ntoh64(osm_node_get_node_guid(p_node)));
351			status = IB_ERROR;
352			goto Exit;
353		}
354
355		p_pi = &p_physp->port_info;
356
357		if (mtu > ib_port_info_get_mtu_cap(p_pi))
358			mtu = ib_port_info_get_mtu_cap(p_pi);
359
360		if (rate > ib_port_info_compute_rate(p_pi))
361			rate = ib_port_info_compute_rate(p_pi);
362
363		if (sa->p_subn->opt.qos) {
364			/*
365			 * Check SL2VL table of the switch and update valid SLs
366			 */
367			p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num);
368			for (i = 0; i < IB_MAX_NUM_VLS; i++) {
369				if (valid_sl_mask & (1 << i) &&
370				    ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL)
371					valid_sl_mask &= ~(1 << i);
372			}
373			if (!valid_sl_mask) {
374				OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
375					"All the SLs lead to VL15 "
376					"on this path\n");
377				status = IB_NOT_FOUND;
378				goto Exit;
379			}
380		}
381	}
382
383	/*
384	   p_physp now points to the destination
385	 */
386	p_pi = &p_physp->port_info;
387
388	if (mtu > ib_port_info_get_mtu_cap(p_pi))
389		mtu = ib_port_info_get_mtu_cap(p_pi);
390
391	if (rate > ib_port_info_compute_rate(p_pi))
392		rate = ib_port_info_compute_rate(p_pi);
393
394	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
395		"Path min MTU = %u, min rate = %u\n", mtu, rate);
396
397	/*
398	 * Get QoS Level object according to the MultiPath request
399	 * and adjust MultiPath parameters according to QoS settings
400	 */
401	if (sa->p_subn->opt.qos &&
402	    sa->p_subn->p_qos_policy &&
403	    (p_qos_level =
404	     osm_qos_policy_get_qos_level_by_mpr(sa->p_subn->p_qos_policy,
405						 p_mpr, p_src_physp,
406						 p_dest_physp, comp_mask))) {
407
408		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
409			"MultiPathRecord request matches QoS Level '%s' (%s)\n",
410			p_qos_level->name,
411			p_qos_level->use ? p_qos_level->use : "no description");
412
413		if (p_qos_level->mtu_limit_set
414		    && (mtu > p_qos_level->mtu_limit))
415			mtu = p_qos_level->mtu_limit;
416
417		if (p_qos_level->rate_limit_set
418		    && (rate > p_qos_level->rate_limit))
419			rate = p_qos_level->rate_limit;
420
421		if (p_qos_level->sl_set) {
422			required_sl = p_qos_level->sl;
423			if (!(valid_sl_mask & (1 << required_sl))) {
424				status = IB_NOT_FOUND;
425				goto Exit;
426			}
427		}
428	}
429
430	/*
431	   Determine if these values meet the user criteria
432	 */
433
434	/* we silently ignore cases where only the MTU selector is defined */
435	if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) &&
436	    (comp_mask & IB_MPR_COMPMASK_MTU)) {
437		required_mtu = ib_multipath_rec_mtu(p_mpr);
438		switch (ib_multipath_rec_mtu_sel(p_mpr)) {
439		case 0:	/* must be greater than */
440			if (mtu <= required_mtu)
441				status = IB_NOT_FOUND;
442			break;
443
444		case 1:	/* must be less than */
445			if (mtu >= required_mtu) {
446				/* adjust to use the highest mtu
447				   lower then the required one */
448				if (required_mtu > 1)
449					mtu = required_mtu - 1;
450				else
451					status = IB_NOT_FOUND;
452			}
453			break;
454
455		case 2:	/* exact match */
456			if (mtu < required_mtu)
457				status = IB_NOT_FOUND;
458			else
459				mtu = required_mtu;
460			break;
461
462		case 3:	/* largest available */
463			/* can't be disqualified by this one */
464			break;
465
466		default:
467			/* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */
468			CL_ASSERT(FALSE);
469			status = IB_ERROR;
470			break;
471		}
472	}
473	if (status != IB_SUCCESS)
474		goto Exit;
475
476	/* we silently ignore cases where only the Rate selector is defined */
477	if ((comp_mask & IB_MPR_COMPMASK_RATESELEC) &&
478	    (comp_mask & IB_MPR_COMPMASK_RATE)) {
479		required_rate = ib_multipath_rec_rate(p_mpr);
480		switch (ib_multipath_rec_rate_sel(p_mpr)) {
481		case 0:	/* must be greater than */
482			if (rate <= required_rate)
483				status = IB_NOT_FOUND;
484			break;
485
486		case 1:	/* must be less than */
487			if (rate >= required_rate) {
488				/* adjust the rate to use the highest rate
489				   lower then the required one */
490				if (required_rate > 2)
491					rate = required_rate - 1;
492				else
493					status = IB_NOT_FOUND;
494			}
495			break;
496
497		case 2:	/* exact match */
498			if (rate < required_rate)
499				status = IB_NOT_FOUND;
500			else
501				rate = required_rate;
502			break;
503
504		case 3:	/* largest available */
505			/* can't be disqualified by this one */
506			break;
507
508		default:
509			/* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */
510			CL_ASSERT(FALSE);
511			status = IB_ERROR;
512			break;
513		}
514	}
515	if (status != IB_SUCCESS)
516		goto Exit;
517
518	/* Verify the pkt_life_time */
519	/* According to spec definition IBA 1.2 Table 205 PacketLifeTime description,
520	   for loopback paths, packetLifeTime shall be zero. */
521	if (p_src_port == p_dest_port)
522		pkt_life = 0;	/* loopback */
523	else if (p_qos_level && p_qos_level->pkt_life_set)
524		pkt_life = p_qos_level->pkt_life;
525	else
526		pkt_life = sa->p_subn->opt.subnet_timeout;
527
528	/* we silently ignore cases where only the PktLife selector is defined */
529	if ((comp_mask & IB_MPR_COMPMASK_PKTLIFETIMESELEC) &&
530	    (comp_mask & IB_MPR_COMPMASK_PKTLIFETIME)) {
531		required_pkt_life = ib_multipath_rec_pkt_life(p_mpr);
532		switch (ib_multipath_rec_pkt_life_sel(p_mpr)) {
533		case 0:	/* must be greater than */
534			if (pkt_life <= required_pkt_life)
535				status = IB_NOT_FOUND;
536			break;
537
538		case 1:	/* must be less than */
539			if (pkt_life >= required_pkt_life) {
540				/* adjust the lifetime to use the highest possible
541				   lower then the required one */
542				if (required_pkt_life > 1)
543					pkt_life = required_pkt_life - 1;
544				else
545					status = IB_NOT_FOUND;
546			}
547			break;
548
549		case 2:	/* exact match */
550			if (pkt_life < required_pkt_life)
551				status = IB_NOT_FOUND;
552			else
553				pkt_life = required_pkt_life;
554			break;
555
556		case 3:	/* smallest available */
557			/* can't be disqualified by this one */
558			break;
559
560		default:
561			/* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */
562			CL_ASSERT(FALSE);
563			status = IB_ERROR;
564			break;
565		}
566	}
567
568	if (status != IB_SUCCESS)
569		goto Exit;
570
571	/*
572	 * set Pkey for this MultiPath record request
573	 */
574
575	if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC &&
576	    cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31))
577		required_pkey =
578		    osm_physp_find_common_pkey(p_src_physp, p_dest_physp);
579
580	else if (comp_mask & IB_MPR_COMPMASK_PKEY) {
581		/*
582		 * MPR request has a specific pkey:
583		 * Check that source and destination share this pkey.
584		 * If QoS level has pkeys, check that this pkey exists
585		 * in the QoS level pkeys.
586		 * MPR returned pkey is the requested pkey.
587		 */
588		required_pkey = p_mpr->pkey;
589		if (!osm_physp_share_this_pkey
590		    (p_src_physp, p_dest_physp, required_pkey)) {
591			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4518: "
592				"Ports do not share specified PKey 0x%04x\n"
593				"\t\tsrc %" PRIx64 " dst %" PRIx64 "\n",
594				cl_ntoh16(required_pkey),
595				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
596				cl_ntoh64(osm_physp_get_port_guid
597					  (p_dest_physp)));
598			status = IB_NOT_FOUND;
599			goto Exit;
600		}
601		if (p_qos_level && p_qos_level->pkey_range_len &&
602		    !osm_qos_level_has_pkey(p_qos_level, required_pkey)) {
603			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451C: "
604				"Ports do not share PKeys defined by QoS level\n");
605			status = IB_NOT_FOUND;
606			goto Exit;
607		}
608
609	} else if (p_qos_level && p_qos_level->pkey_range_len) {
610		/*
611		 * MPR request doesn't have a specific pkey, but QoS level
612		 * has pkeys - get shared pkey from QoS level pkeys
613		 */
614		required_pkey = osm_qos_level_get_shared_pkey(p_qos_level,
615							      p_src_physp,
616							      p_dest_physp);
617		if (!required_pkey) {
618			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451D: "
619				"Ports do not share PKeys defined by QoS level\n");
620			status = IB_NOT_FOUND;
621			goto Exit;
622		}
623
624	} else {
625		/*
626		 * Neither MPR request nor QoS level have pkey.
627		 * Just get any shared pkey.
628		 */
629		required_pkey =
630		    osm_physp_find_common_pkey(p_src_physp, p_dest_physp);
631		if (!required_pkey) {
632			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4519: "
633				"Ports do not have any shared PKeys\n"
634				"\t\tsrc %" PRIx64 " dst %" PRIx64 "\n",
635				cl_ntoh64(osm_physp_get_port_guid(p_physp)),
636				cl_ntoh64(osm_physp_get_port_guid
637					  (p_dest_physp)));
638			status = IB_NOT_FOUND;
639			goto Exit;
640		}
641	}
642
643	if (required_pkey) {
644		p_prtn =
645		    (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl,
646					       required_pkey &
647					       cl_ntoh16((uint16_t) ~ 0x8000));
648		if (p_prtn ==
649		    (osm_prtn_t *) cl_qmap_end(&sa->p_subn->prtn_pkey_tbl))
650			p_prtn = NULL;
651	}
652
653	/*
654	 * Set MultiPathRecord SL.
655	 */
656
657	if (comp_mask & IB_MPR_COMPMASK_SL) {
658		/*
659		 * Specific SL was requested
660		 */
661		required_sl = ib_multipath_rec_sl(p_mpr);
662
663		if (p_qos_level && p_qos_level->sl_set &&
664		    p_qos_level->sl != required_sl) {
665			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451E: "
666				"QoS constaraints: required MultiPathRecord SL (%u) "
667				"doesn't match QoS policy SL (%u)\n",
668				required_sl, p_qos_level->sl);
669			status = IB_NOT_FOUND;
670			goto Exit;
671		}
672
673	} else if (p_qos_level && p_qos_level->sl_set) {
674		/*
675		 * No specific SL was requested,
676		 * but there is an SL in QoS level.
677		 */
678		required_sl = p_qos_level->sl;
679
680		if (required_pkey && p_prtn && p_prtn->sl != p_qos_level->sl)
681			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
682				"QoS level SL (%u) overrides partition SL (%u)\n",
683				p_qos_level->sl, p_prtn->sl);
684
685	} else if (required_pkey) {
686		/*
687		 * No specific SL in request or in QoS level - use partition SL
688		 */
689		p_prtn =
690		    (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl,
691					       required_pkey &
692					       cl_ntoh16((uint16_t) ~ 0x8000));
693		if (!p_prtn) {
694			required_sl = OSM_DEFAULT_SL;
695			/* this may be possible when pkey tables are created somehow in
696			   previous runs or things are going wrong here */
697			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451A: "
698				"No partition found for PKey 0x%04x - using default SL %d\n",
699				cl_ntoh16(required_pkey), required_sl);
700		} else
701			required_sl = p_prtn->sl;
702
703	} else if (sa->p_subn->opt.qos) {
704		if (valid_sl_mask & (1 << OSM_DEFAULT_SL))
705			required_sl = OSM_DEFAULT_SL;
706		else {
707			for (i = 0; i < IB_MAX_NUM_VLS; i++)
708				if (valid_sl_mask & (1 << i))
709					break;
710			required_sl = i;
711		}
712	} else
713		required_sl = OSM_DEFAULT_SL;
714
715	if (sa->p_subn->opt.qos && !(valid_sl_mask & (1 << required_sl))) {
716		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451F: "
717			"Selected SL (%u) leads to VL15\n", required_sl);
718		status = IB_NOT_FOUND;
719		goto Exit;
720	}
721
722	/* reset pkey when raw traffic */
723	if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC &&
724	    cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31))
725		required_pkey = 0;
726
727	p_parms->mtu = mtu;
728	p_parms->rate = rate;
729	p_parms->pkey = required_pkey;
730	p_parms->pkt_life = pkt_life;
731	p_parms->sl = required_sl;
732	p_parms->hops = hops;
733
734	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "MultiPath params:"
735		" mtu = %u, rate = %u, packet lifetime = %u,"
736		" pkey = 0x%04X, sl = %u, hops = %u\n", mtu, rate,
737		pkt_life, cl_ntoh16(required_pkey), required_sl, hops);
738
739Exit:
740	OSM_LOG_EXIT(sa->p_log);
741	return (status);
742}
743
744/**********************************************************************
745 **********************************************************************/
746static void
747__osm_mpr_rcv_build_pr(IN osm_sa_t * sa,
748		       IN const osm_port_t * const p_src_port,
749		       IN const osm_port_t * const p_dest_port,
750		       IN const uint16_t src_lid_ho,
751		       IN const uint16_t dest_lid_ho,
752		       IN const uint8_t preference,
753		       IN const osm_path_parms_t * const p_parms,
754		       OUT ib_path_rec_t * const p_pr)
755{
756	const osm_physp_t *p_src_physp;
757	const osm_physp_t *p_dest_physp;
758
759	OSM_LOG_ENTER(sa->p_log);
760
761	p_src_physp = p_src_port->p_physp;
762	p_dest_physp = p_dest_port->p_physp;
763
764	p_pr->dgid.unicast.prefix = osm_physp_get_subnet_prefix(p_dest_physp);
765	p_pr->dgid.unicast.interface_id = osm_physp_get_port_guid(p_dest_physp);
766
767	p_pr->sgid.unicast.prefix = osm_physp_get_subnet_prefix(p_src_physp);
768	p_pr->sgid.unicast.interface_id = osm_physp_get_port_guid(p_src_physp);
769
770	p_pr->dlid = cl_hton16(dest_lid_ho);
771	p_pr->slid = cl_hton16(src_lid_ho);
772
773	p_pr->hop_flow_raw &= cl_hton32(1 << 31);
774
775	p_pr->pkey = p_parms->pkey;
776	ib_path_rec_set_qos_class(p_pr, 0);
777	ib_path_rec_set_sl(p_pr, p_parms->sl);
778	p_pr->mtu = (uint8_t) (p_parms->mtu | 0x80);
779	p_pr->rate = (uint8_t) (p_parms->rate | 0x80);
780
781	/* According to 1.2 spec definition Table 205 PacketLifeTime description,
782	   for loopback paths, packetLifeTime shall be zero. */
783	if (p_src_port == p_dest_port)
784		p_pr->pkt_life = 0x80;	/* loopback */
785	else
786		p_pr->pkt_life = (uint8_t) (p_parms->pkt_life | 0x80);
787
788	p_pr->preference = preference;
789
790	/* always return num_path = 0 so this is only the reversible component */
791	if (p_parms->reversible)
792		p_pr->num_path = 0x80;
793
794	OSM_LOG_EXIT(sa->p_log);
795}
796
797/**********************************************************************
798 **********************************************************************/
799static osm_mpr_item_t *
800__osm_mpr_rcv_get_lid_pair_path(IN osm_sa_t * sa,
801				IN const ib_multipath_rec_t * const p_mpr,
802				IN const osm_port_t * const p_src_port,
803				IN const osm_port_t * const p_dest_port,
804				IN const uint16_t src_lid_ho,
805				IN const uint16_t dest_lid_ho,
806				IN const ib_net64_t comp_mask,
807				IN const uint8_t preference)
808{
809	osm_path_parms_t path_parms;
810	osm_path_parms_t rev_path_parms;
811	osm_mpr_item_t *p_pr_item;
812	ib_api_status_t status, rev_path_status;
813
814	OSM_LOG_ENTER(sa->p_log);
815
816	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID %u, Dest LID %u\n",
817		src_lid_ho, dest_lid_ho);
818
819	p_pr_item = malloc(sizeof(*p_pr_item));
820	if (p_pr_item == NULL) {
821		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4501: "
822			"Unable to allocate path record\n");
823		goto Exit;
824	}
825	memset(p_pr_item, 0, sizeof(*p_pr_item));
826
827	status = __osm_mpr_rcv_get_path_parms(sa, p_mpr, p_src_port,
828					      p_dest_port, dest_lid_ho,
829					      comp_mask, &path_parms);
830
831	if (status != IB_SUCCESS) {
832		free(p_pr_item);
833		p_pr_item = NULL;
834		goto Exit;
835	}
836
837	/* now try the reversible path */
838	rev_path_status =
839	    __osm_mpr_rcv_get_path_parms(sa, p_mpr, p_dest_port, p_src_port,
840					 src_lid_ho, comp_mask,
841					 &rev_path_parms);
842	path_parms.reversible = (rev_path_status == IB_SUCCESS);
843
844	/* did we get a Reversible Path compmask ? */
845	/*
846	   NOTE that if the reversible component = 0, it is a don't care
847	   rather then requiring non-reversible paths ...
848	   see Vol1 Ver1.2 p900 l16
849	 */
850	if (comp_mask & IB_MPR_COMPMASK_REVERSIBLE) {
851		if ((!path_parms.reversible && (p_mpr->num_path & 0x80))) {
852			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
853				"Requested reversible path but failed to get one\n");
854
855			free(p_pr_item);
856			p_pr_item = NULL;
857			goto Exit;
858		}
859	}
860
861	p_pr_item->p_src_port = p_src_port;
862	p_pr_item->p_dest_port = p_dest_port;
863	p_pr_item->hops = path_parms.hops;
864
865	__osm_mpr_rcv_build_pr(sa, p_src_port, p_dest_port, src_lid_ho,
866			       dest_lid_ho, preference, &path_parms,
867			       &p_pr_item->path_rec);
868
869Exit:
870	OSM_LOG_EXIT(sa->p_log);
871	return (p_pr_item);
872}
873
874/**********************************************************************
875 **********************************************************************/
876static uint32_t
877__osm_mpr_rcv_get_port_pair_paths(IN osm_sa_t * sa,
878				  IN const ib_multipath_rec_t * const p_mpr,
879				  IN const osm_port_t * const p_req_port,
880				  IN const osm_port_t * const p_src_port,
881				  IN const osm_port_t * const p_dest_port,
882				  IN const uint32_t rem_paths,
883				  IN const ib_net64_t comp_mask,
884				  IN cl_qlist_t * const p_list)
885{
886	osm_mpr_item_t *p_pr_item;
887	uint16_t src_lid_min_ho;
888	uint16_t src_lid_max_ho;
889	uint16_t dest_lid_min_ho;
890	uint16_t dest_lid_max_ho;
891	uint16_t src_lid_ho;
892	uint16_t dest_lid_ho;
893	uint32_t path_num = 0;
894	uint8_t preference;
895	uintn_t src_offset;
896	uintn_t dest_offset;
897
898	OSM_LOG_ENTER(sa->p_log);
899
900	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
901		"Src port 0x%016" PRIx64 ", Dst port 0x%016" PRIx64 "\n",
902		cl_ntoh64(osm_port_get_guid(p_src_port)),
903		cl_ntoh64(osm_port_get_guid(p_dest_port)));
904
905	/* Check that the req_port, src_port and dest_port all share a
906	   pkey. The check is done on the default physical port of the ports. */
907	if (osm_port_share_pkey(sa->p_log, p_req_port, p_src_port) == FALSE
908	    || osm_port_share_pkey(sa->p_log, p_req_port,
909				   p_dest_port) == FALSE
910	    || osm_port_share_pkey(sa->p_log, p_src_port,
911				   p_dest_port) == FALSE)
912		/* One of the pairs doesn't share a pkey so the path is disqualified. */
913		goto Exit;
914
915	/*
916	   We shouldn't be here if the paths are disqualified in some way...
917	   Thus, we assume every possible connection is valid.
918
919	   We desire to return high-quality paths first.
920	   In OpenSM, higher quality mean least overlap with other paths.
921	   This is acheived in practice by returning paths with
922	   different LID value on each end, which means these
923	   paths are more redundant that paths with the same LID repeated
924	   on one side.  For example, in OpenSM the paths between two
925	   endpoints with LMC = 1 might be as follows:
926
927	   Port A, LID 1 <-> Port B, LID 3
928	   Port A, LID 1 <-> Port B, LID 4
929	   Port A, LID 2 <-> Port B, LID 3
930	   Port A, LID 2 <-> Port B, LID 4
931
932	   The OpenSM unicast routing algorithms attempt to disperse each path
933	   to as varied a physical path as is reasonable.  1<->3 and 1<->4 have
934	   more physical overlap (hence less redundancy) than 1<->3 and 2<->4.
935
936	   OpenSM ranks paths in three preference groups:
937
938	   Preference Value           Description
939	   ----------------           -------------------------------------------
940	   0                  Redundant in both directions with other
941	   pref value = 0 paths
942
943	   1                  Redundant in one direction with other
944	   pref value = 0 and pref value = 1 paths
945
946	   2                  Not redundant in either direction with
947	   other paths
948
949	   3-FF                       Unused
950
951	   SA clients don't need to know these details, only that the lower
952	   preference paths are preferred, as stated in the spec.  The paths
953	   may not actually be physically redundant depending on the topology
954	   of the subnet, but the point of LMC > 0 is to offer redundancy,
955	   so I assume the subnet is physically appropriate for the specified
956	   LMC value.  A more advanced implementation could inspect for physical
957	   redundancy, but I'm not going to bother with that now.
958	 */
959
960	osm_port_get_lid_range_ho(p_src_port, &src_lid_min_ho, &src_lid_max_ho);
961	osm_port_get_lid_range_ho(p_dest_port, &dest_lid_min_ho,
962				  &dest_lid_max_ho);
963
964	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID [%u-%u], Dest LID [%u-%u]\n",
965		src_lid_min_ho, src_lid_max_ho,
966		dest_lid_min_ho, dest_lid_max_ho);
967
968	src_lid_ho = src_lid_min_ho;
969	dest_lid_ho = dest_lid_min_ho;
970
971	/*
972	   Preferred paths come first in OpenSM
973	 */
974	preference = 0;
975
976	while (path_num < rem_paths) {
977		/*
978		   These paths are "fully redundant"
979		 */
980		p_pr_item = __osm_mpr_rcv_get_lid_pair_path(sa, p_mpr,
981							    p_src_port,
982							    p_dest_port,
983							    src_lid_ho,
984							    dest_lid_ho,
985							    comp_mask,
986							    preference);
987
988		if (p_pr_item) {
989			cl_qlist_insert_tail(p_list, &p_pr_item->list_item);
990			++path_num;
991		}
992
993		if (++src_lid_ho > src_lid_max_ho)
994			break;
995
996		if (++dest_lid_ho > dest_lid_max_ho)
997			break;
998	}
999
1000	/*
1001	   Check if we've accumulated all the paths that the user cares to see
1002	 */
1003	if (path_num == rem_paths)
1004		goto Exit;
1005
1006	/*
1007	   Don't bother reporting preference 1 paths for now.
1008	   It's more trouble than it's worth and can only occur
1009	   if ports have different LMC values, which isn't supported
1010	   by OpenSM right now anyway.
1011	 */
1012	preference = 2;
1013	src_lid_ho = src_lid_min_ho;
1014	dest_lid_ho = dest_lid_min_ho;
1015	src_offset = 0;
1016	dest_offset = 0;
1017
1018	/*
1019	   Iterate over the remaining paths
1020	 */
1021	while (path_num < rem_paths) {
1022		dest_offset++;
1023		dest_lid_ho++;
1024
1025		if (dest_lid_ho > dest_lid_max_ho) {
1026			src_offset++;
1027			src_lid_ho++;
1028
1029			if (src_lid_ho > src_lid_max_ho)
1030				break;	/* done */
1031
1032			dest_offset = 0;
1033			dest_lid_ho = dest_lid_min_ho;
1034		}
1035
1036		/*
1037		   These paths are "fully non-redundant" with paths already
1038		   identified above and consequently not of much value.
1039
1040		   Don't return paths we already identified above, as indicated
1041		   by the offset values being equal.
1042		 */
1043		if (src_offset == dest_offset)
1044			continue;	/* already reported */
1045
1046		p_pr_item = __osm_mpr_rcv_get_lid_pair_path(sa, p_mpr,
1047							    p_src_port,
1048							    p_dest_port,
1049							    src_lid_ho,
1050							    dest_lid_ho,
1051							    comp_mask,
1052							    preference);
1053
1054		if (p_pr_item) {
1055			cl_qlist_insert_tail(p_list, &p_pr_item->list_item);
1056			++path_num;
1057		}
1058	}
1059
1060Exit:
1061	OSM_LOG_EXIT(sa->p_log);
1062	return path_num;
1063}
1064
1065#undef min
1066#define min(x,y)	(((x) < (y)) ? (x) : (y))
1067
1068/**********************************************************************
1069 **********************************************************************/
1070static osm_mpr_item_t *
1071__osm_mpr_rcv_get_apm_port_pair_paths(IN osm_sa_t * sa,
1072				      IN const ib_multipath_rec_t * const p_mpr,
1073				      IN const osm_port_t * const p_src_port,
1074				      IN const osm_port_t * const p_dest_port,
1075				      IN int base_offs,
1076				      IN const ib_net64_t comp_mask,
1077				      IN cl_qlist_t * const p_list)
1078{
1079	osm_mpr_item_t *p_pr_item = 0;
1080	uint16_t src_lid_min_ho;
1081	uint16_t src_lid_max_ho;
1082	uint16_t dest_lid_min_ho;
1083	uint16_t dest_lid_max_ho;
1084	uint16_t src_lid_ho;
1085	uint16_t dest_lid_ho;
1086	uintn_t iterations;
1087	int src_lids, dest_lids;
1088
1089	OSM_LOG_ENTER(sa->p_log);
1090
1091	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src port 0x%016" PRIx64 ", "
1092		"Dst port 0x%016" PRIx64 ", base offs %d\n",
1093		cl_ntoh64(osm_port_get_guid(p_src_port)),
1094		cl_ntoh64(osm_port_get_guid(p_dest_port)), base_offs);
1095
1096	osm_port_get_lid_range_ho(p_src_port, &src_lid_min_ho, &src_lid_max_ho);
1097	osm_port_get_lid_range_ho(p_dest_port, &dest_lid_min_ho,
1098				  &dest_lid_max_ho);
1099
1100	src_lid_ho = src_lid_min_ho;
1101	dest_lid_ho = dest_lid_min_ho;
1102
1103	src_lids = src_lid_max_ho - src_lid_min_ho + 1;
1104	dest_lids = dest_lid_max_ho - dest_lid_min_ho + 1;
1105
1106	src_lid_ho += base_offs % src_lids;
1107	dest_lid_ho += base_offs % dest_lids;
1108
1109	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1110		"Src LIDs [%u-%u] hashed %u, "
1111		"Dest LIDs [%u-%u] hashed %u\n",
1112		src_lid_min_ho, src_lid_max_ho, src_lid_ho,
1113		dest_lid_min_ho, dest_lid_max_ho, dest_lid_ho);
1114
1115	iterations = min(src_lids, dest_lids);
1116
1117	while (iterations--) {
1118		/*
1119		   These paths are "fully redundant"
1120		 */
1121		p_pr_item = __osm_mpr_rcv_get_lid_pair_path(sa, p_mpr,
1122							    p_src_port,
1123							    p_dest_port,
1124							    src_lid_ho,
1125							    dest_lid_ho,
1126							    comp_mask, 0);
1127
1128		if (p_pr_item) {
1129			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1130				"Found matching path from Src LID %u to Dest LID %u with %d hops\n",
1131				src_lid_ho, dest_lid_ho, p_pr_item->hops);
1132			break;
1133		}
1134
1135		if (++src_lid_ho > src_lid_max_ho)
1136			src_lid_ho = src_lid_min_ho;
1137
1138		if (++dest_lid_ho > dest_lid_max_ho)
1139			dest_lid_ho = dest_lid_min_ho;
1140	}
1141
1142	OSM_LOG_EXIT(sa->p_log);
1143	return p_pr_item;
1144}
1145
1146/**********************************************************************
1147 **********************************************************************/
1148static ib_net16_t
1149__osm_mpr_rcv_get_gids(IN osm_sa_t * sa,
1150		       IN const ib_gid_t * gids,
1151		       IN int ngids, IN int is_sgid, OUT osm_port_t ** pp_port)
1152{
1153	osm_port_t *p_port;
1154	ib_net16_t ib_status = IB_SUCCESS;
1155	int i;
1156
1157	OSM_LOG_ENTER(sa->p_log);
1158
1159	for (i = 0; i < ngids; i++, gids++) {
1160		if (!ib_gid_is_link_local(gids)) {
1161			if ((is_sgid && ib_gid_is_multicast(gids)) ||
1162			    (ib_gid_get_subnet_prefix(gids) !=
1163			     sa->p_subn->opt.subnet_prefix)) {
1164				/*
1165				   This 'error' is the client's fault (bad gid)
1166				   so don't enter it as an error in our own log.
1167				   Return an error response to the client.
1168				 */
1169				OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "ERR 451B: "
1170					"%sGID 0x%016" PRIx64
1171					" is multicast or non local subnet prefix\n",
1172					is_sgid ? "S" : "D",
1173					cl_ntoh64(gids->unicast.prefix));
1174
1175				ib_status = IB_SA_MAD_STATUS_INVALID_GID;
1176				goto Exit;
1177			}
1178		}
1179
1180		p_port =
1181		    osm_get_port_by_guid(sa->p_subn,
1182					 gids->unicast.interface_id);
1183		if (!p_port) {
1184			/*
1185			   This 'error' is the client's fault (bad gid) so
1186			   don't enter it as an error in our own log.
1187			   Return an error response to the client.
1188			 */
1189			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4506: "
1190				"No port with GUID 0x%016" PRIx64 "\n",
1191				cl_ntoh64(gids->unicast.interface_id));
1192
1193			ib_status = IB_SA_MAD_STATUS_INVALID_GID;
1194			goto Exit;
1195		}
1196
1197		pp_port[i] = p_port;
1198	}
1199
1200Exit:
1201	OSM_LOG_EXIT(sa->p_log);
1202
1203	return ib_status;
1204}
1205
1206/**********************************************************************
1207 **********************************************************************/
1208static ib_net16_t
1209__osm_mpr_rcv_get_end_points(IN osm_sa_t * sa,
1210			     IN const osm_madw_t * const p_madw,
1211			     OUT osm_port_t ** pp_ports,
1212			     OUT int *nsrc, OUT int *ndest)
1213{
1214	const ib_multipath_rec_t *p_mpr;
1215	const ib_sa_mad_t *p_sa_mad;
1216	ib_net64_t comp_mask;
1217	ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS;
1218	ib_gid_t *gids;
1219
1220	OSM_LOG_ENTER(sa->p_log);
1221
1222	/*
1223	   Determine what fields are valid and then get a pointer
1224	   to the source and destination port objects, if possible.
1225	 */
1226	p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
1227	p_mpr = (ib_multipath_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad);
1228	gids = (ib_gid_t *) p_mpr->gids;
1229
1230	comp_mask = p_sa_mad->comp_mask;
1231
1232	/*
1233	   Check a few easy disqualifying cases up front before getting
1234	   into the endpoints.
1235	 */
1236	*nsrc = *ndest = 0;
1237
1238	if (comp_mask & IB_MPR_COMPMASK_SGIDCOUNT) {
1239		*nsrc = p_mpr->sgid_count;
1240		if (*nsrc > IB_MULTIPATH_MAX_GIDS)
1241			*nsrc = IB_MULTIPATH_MAX_GIDS;
1242		sa_status =
1243		    __osm_mpr_rcv_get_gids(sa, gids, *nsrc, 1, pp_ports);
1244		if (sa_status != IB_SUCCESS)
1245			goto Exit;
1246	}
1247
1248	if (comp_mask & IB_MPR_COMPMASK_DGIDCOUNT) {
1249		*ndest = p_mpr->dgid_count;
1250		if (*ndest + *nsrc > IB_MULTIPATH_MAX_GIDS)
1251			*ndest = IB_MULTIPATH_MAX_GIDS - *nsrc;
1252		sa_status =
1253		    __osm_mpr_rcv_get_gids(sa, gids + *nsrc, *ndest, 0,
1254					   pp_ports + *nsrc);
1255	}
1256
1257Exit:
1258	OSM_LOG_EXIT(sa->p_log);
1259	return (sa_status);
1260}
1261
1262#define __hash_lids(a, b, lmc)	\
1263	(((((a) >> (lmc)) << 4) | ((b) >> (lmc))) % 103)
1264
1265/**********************************************************************
1266 **********************************************************************/
1267static void
1268__osm_mpr_rcv_get_apm_paths(IN osm_sa_t * sa,
1269			    IN const ib_multipath_rec_t * const p_mpr,
1270			    IN const osm_port_t * const p_req_port,
1271			    IN osm_port_t ** _pp_ports,
1272			    IN const ib_net64_t comp_mask,
1273			    IN cl_qlist_t * const p_list)
1274{
1275	osm_port_t *pp_ports[4];
1276	osm_mpr_item_t *matrix[2][2];
1277	int base_offs, src_lid_ho, dest_lid_ho;
1278	int sumA, sumB, minA, minB;
1279
1280	OSM_LOG_ENTER(sa->p_log);
1281
1282	/*
1283	 * We want to:
1284	 *    1. use different lid offsets (from base) for the resultant paths
1285	 *    to increase the probability of redundant paths or in case
1286	 *    of Clos - to ensure it (different offset => different spine!)
1287	 *    2. keep consistent paths no matter of direction and order of ports
1288	 *    3. distibute the lid offsets to balance the load
1289	 * So, we sort the ports (within the srcs, and within the dests),
1290	 * hash the lids of S0, D0 (after the sort), and call __osm_mpr_rcv_get_apm_port_pair_paths
1291	 * with base_lid for S0, D0 and base_lid + 1 for S1, D1. This way we will get
1292	 * always the same offsets - order indepentent, and make sure different spines are used.
1293	 * Note that the diagonals on a Clos have the same number of hops, so it doesn't
1294	 * really matter which diagonal we use.
1295	 */
1296	if (_pp_ports[0]->guid < _pp_ports[1]->guid) {
1297		pp_ports[0] = _pp_ports[0];
1298		pp_ports[1] = _pp_ports[1];
1299	} else {
1300		pp_ports[0] = _pp_ports[1];
1301		pp_ports[1] = _pp_ports[0];
1302	}
1303	if (_pp_ports[2]->guid < _pp_ports[3]->guid) {
1304		pp_ports[2] = _pp_ports[2];
1305		pp_ports[3] = _pp_ports[3];
1306	} else {
1307		pp_ports[2] = _pp_ports[3];
1308		pp_ports[3] = _pp_ports[2];
1309	}
1310
1311	src_lid_ho = osm_port_get_base_lid(pp_ports[0]);
1312	dest_lid_ho = osm_port_get_base_lid(pp_ports[2]);
1313
1314	base_offs = src_lid_ho < dest_lid_ho ?
1315	    __hash_lids(src_lid_ho, dest_lid_ho, sa->p_subn->opt.lmc) :
1316	    __hash_lids(dest_lid_ho, src_lid_ho, sa->p_subn->opt.lmc);
1317
1318	matrix[0][0] =
1319	    __osm_mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_ports[0],
1320						  pp_ports[2], base_offs,
1321						  comp_mask, p_list);
1322	matrix[0][1] =
1323	    __osm_mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_ports[0],
1324						  pp_ports[3], base_offs,
1325						  comp_mask, p_list);
1326	matrix[1][0] =
1327	    __osm_mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_ports[1],
1328						  pp_ports[2], base_offs + 1,
1329						  comp_mask, p_list);
1330	matrix[1][1] =
1331	    __osm_mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_ports[1],
1332						  pp_ports[3], base_offs + 1,
1333						  comp_mask, p_list);
1334
1335	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "APM matrix:\n"
1336		"\t{0,0} 0x%X->0x%X (%d)\t| {0,1} 0x%X->0x%X (%d)\n"
1337		"\t{1,0} 0x%X->0x%X (%d)\t| {1,1} 0x%X->0x%X (%d)\n",
1338		matrix[0][0]->path_rec.slid, matrix[0][0]->path_rec.dlid,
1339		matrix[0][0]->hops, matrix[0][1]->path_rec.slid,
1340		matrix[0][1]->path_rec.dlid, matrix[0][1]->hops,
1341		matrix[1][0]->path_rec.slid, matrix[1][0]->path_rec.dlid,
1342		matrix[1][0]->hops, matrix[1][1]->path_rec.slid,
1343		matrix[1][1]->path_rec.dlid, matrix[1][1]->hops);
1344
1345	/* check diagonal A {(0,0), (1,1)} */
1346	sumA = matrix[0][0]->hops + matrix[1][1]->hops;
1347	minA = min(matrix[0][0]->hops, matrix[1][1]->hops);
1348
1349	/* check diagonal B {(0,1), (1,0)} */
1350	sumB = matrix[0][1]->hops + matrix[1][0]->hops;
1351	minB = min(matrix[0][1]->hops, matrix[1][0]->hops);
1352
1353	/* and the winner is... */
1354	if (minA <= minB || (minA == minB && sumA < sumB)) {
1355		/* Diag A */
1356		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1357			"Diag {0,0} & {1,1} is the best:\n"
1358			"\t{0,0} 0x%X->0x%X (%d)\t & {1,1} 0x%X->0x%X (%d)\n",
1359			matrix[0][0]->path_rec.slid,
1360			matrix[0][0]->path_rec.dlid, matrix[0][0]->hops,
1361			matrix[1][1]->path_rec.slid,
1362			matrix[1][1]->path_rec.dlid, matrix[1][1]->hops);
1363		cl_qlist_insert_tail(p_list, &matrix[0][0]->list_item);
1364		cl_qlist_insert_tail(p_list, &matrix[1][1]->list_item);
1365		free(matrix[0][1]);
1366		free(matrix[1][0]);
1367	} else {
1368		/* Diag B */
1369		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1370			"Diag {0,1} & {1,0} is the best:\n"
1371			"\t{0,1} 0x%X->0x%X (%d)\t & {1,0} 0x%X->0x%X (%d)\n",
1372			matrix[0][1]->path_rec.slid,
1373			matrix[0][1]->path_rec.dlid, matrix[0][1]->hops,
1374			matrix[1][0]->path_rec.slid,
1375			matrix[1][0]->path_rec.dlid, matrix[1][0]->hops);
1376		cl_qlist_insert_tail(p_list, &matrix[0][1]->list_item);
1377		cl_qlist_insert_tail(p_list, &matrix[1][0]->list_item);
1378		free(matrix[0][0]);
1379		free(matrix[1][1]);
1380	}
1381
1382	OSM_LOG_EXIT(sa->p_log);
1383}
1384
1385/**********************************************************************
1386 **********************************************************************/
1387static void
1388__osm_mpr_rcv_process_pairs(IN osm_sa_t * sa,
1389			    IN const ib_multipath_rec_t * const p_mpr,
1390			    IN osm_port_t * const p_req_port,
1391			    IN osm_port_t ** pp_ports,
1392			    IN const int nsrc,
1393			    IN const int ndest,
1394			    IN const ib_net64_t comp_mask,
1395			    IN cl_qlist_t * const p_list)
1396{
1397	osm_port_t **pp_src_port, **pp_es;
1398	osm_port_t **pp_dest_port, **pp_ed;
1399	uint32_t max_paths, num_paths, total_paths = 0;
1400
1401	OSM_LOG_ENTER(sa->p_log);
1402
1403	if (comp_mask & IB_MPR_COMPMASK_NUMBPATH)
1404		max_paths = p_mpr->num_path & 0x7F;
1405	else
1406		max_paths = OSM_SA_MPR_MAX_NUM_PATH;
1407
1408	for (pp_src_port = pp_ports, pp_es = pp_ports + nsrc;
1409	     pp_src_port < pp_es; pp_src_port++) {
1410		for (pp_dest_port = pp_es, pp_ed = pp_es + ndest;
1411		     pp_dest_port < pp_ed; pp_dest_port++) {
1412			num_paths =
1413			    __osm_mpr_rcv_get_port_pair_paths(sa, p_mpr,
1414							      p_req_port,
1415							      *pp_src_port,
1416							      *pp_dest_port,
1417							      max_paths -
1418							      total_paths,
1419							      comp_mask,
1420							      p_list);
1421			total_paths += num_paths;
1422			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1423				"%d paths %d total paths %d max paths\n",
1424				num_paths, total_paths, max_paths);
1425			/* Just take first NumbPaths found */
1426			if (total_paths >= max_paths)
1427				goto Exit;
1428		}
1429	}
1430
1431Exit:
1432	OSM_LOG_EXIT(sa->p_log);
1433}
1434
1435/**********************************************************************
1436 **********************************************************************/
1437void osm_mpr_rcv_process(IN void *context, IN void *data)
1438{
1439	osm_sa_t *sa = context;
1440	osm_madw_t *p_madw = data;
1441	const ib_multipath_rec_t *p_mpr;
1442	ib_sa_mad_t *p_sa_mad;
1443	osm_port_t *requester_port;
1444	osm_port_t *pp_ports[IB_MULTIPATH_MAX_GIDS];
1445	cl_qlist_t pr_list;
1446	ib_net16_t sa_status;
1447	int nsrc, ndest;
1448
1449	OSM_LOG_ENTER(sa->p_log);
1450
1451	CL_ASSERT(p_madw);
1452
1453	p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
1454	p_mpr = (ib_multipath_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad);
1455
1456	CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_MULTIPATH_RECORD);
1457
1458	if ((p_sa_mad->rmpp_flags & IB_RMPP_FLAG_ACTIVE) != IB_RMPP_FLAG_ACTIVE) {
1459		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4510: "
1460			"Invalid request since RMPP_FLAG_ACTIVE is not set\n");
1461		osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID);
1462		goto Exit;
1463	}
1464
1465	/* we only support SubnAdmGetMulti method */
1466	if (p_sa_mad->method != IB_MAD_METHOD_GETMULTI) {
1467		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4513: "
1468			"Unsupported Method (%s)\n",
1469			ib_get_sa_method_str(p_sa_mad->method));
1470		osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR);
1471		goto Exit;
1472	}
1473
1474	/* update the requester physical port. */
1475	requester_port = osm_get_port_by_mad_addr(sa->p_log, sa->p_subn,
1476						  osm_madw_get_mad_addr_ptr
1477						  (p_madw));
1478	if (requester_port == NULL) {
1479		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4517: "
1480			"Cannot find requester physical port\n");
1481		goto Exit;
1482	}
1483
1484	if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG))
1485		osm_dump_multipath_record(sa->p_log, p_mpr, OSM_LOG_DEBUG);
1486
1487	cl_qlist_init(&pr_list);
1488
1489	/*
1490	   Most SA functions (including this one) are read-only on the
1491	   subnet object, so we grab the lock non-exclusively.
1492	 */
1493	cl_plock_acquire(sa->p_lock);
1494
1495	sa_status = __osm_mpr_rcv_get_end_points(sa, p_madw, pp_ports,
1496						 &nsrc, &ndest);
1497
1498	if (sa_status != IB_SA_MAD_STATUS_SUCCESS || !nsrc || !ndest) {
1499		if (sa_status == IB_SA_MAD_STATUS_SUCCESS && (!nsrc || !ndest))
1500			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4512: "
1501				"__osm_mpr_rcv_get_end_points failed, not enough GIDs "
1502				"(nsrc %d ndest %d)\n", nsrc, ndest);
1503		cl_plock_release(sa->p_lock);
1504		if (sa_status == IB_SA_MAD_STATUS_SUCCESS)
1505			osm_sa_send_error(sa, p_madw,
1506					  IB_SA_MAD_STATUS_REQ_INVALID);
1507		else
1508			osm_sa_send_error(sa, p_madw, sa_status);
1509		goto Exit;
1510	}
1511
1512	/* APM request */
1513	if (nsrc == 2 && ndest == 2 && (p_mpr->num_path & 0x7F) == 2)
1514		__osm_mpr_rcv_get_apm_paths(sa, p_mpr, requester_port,
1515					    pp_ports, p_sa_mad->comp_mask,
1516					    &pr_list);
1517	else
1518		__osm_mpr_rcv_process_pairs(sa, p_mpr, requester_port,
1519					    pp_ports, nsrc, ndest,
1520					    p_sa_mad->comp_mask, &pr_list);
1521
1522	cl_plock_release(sa->p_lock);
1523
1524	/* o15-0.2.7: If MultiPath is supported, then SA shall respond to a
1525	   SubnAdmGetMulti() containing a valid MultiPathRecord attribute with
1526	   a set of zero or more PathRecords satisfying the constraints
1527	   indicated in the MultiPathRecord received. The PathRecord Attribute
1528	   ID shall be used in the response.
1529	 */
1530	p_sa_mad->attr_id = IB_MAD_ATTR_PATH_RECORD;
1531	osm_sa_respond(sa, p_madw, sizeof(ib_path_rec_t), &pr_list);
1532
1533Exit:
1534	OSM_LOG_EXIT(sa->p_log);
1535}
1536#endif
1537