1/*
2 * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 */
36
37/*
38 * Abstract:
39 * 	Implementation of osm_mpr_rcv_t.
40 *	This object represents the MultiPath Record Receiver object.
41 *	This object is part of the opensm family of objects.
42 */
43
44#if HAVE_CONFIG_H
45#  include <config.h>
46#endif				/* HAVE_CONFIG_H */
47
48#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP)
49
50#include <string.h>
51#include <iba/ib_types.h>
52#include <complib/cl_qmap.h>
53#include <complib/cl_passivelock.h>
54#include <complib/cl_debug.h>
55#include <complib/cl_qlist.h>
56#include <opensm/osm_file_ids.h>
57#define FILE_ID OSM_FILE_SA_MULTIPATH_RECORD_C
58#include <vendor/osm_vendor_api.h>
59#include <opensm/osm_port.h>
60#include <opensm/osm_node.h>
61#include <opensm/osm_switch.h>
62#include <opensm/osm_partition.h>
63#include <opensm/osm_helper.h>
64#include <opensm/osm_qos_policy.h>
65#include <opensm/osm_sa.h>
66
67#define OSM_SA_MPR_MAX_NUM_PATH        127
68#define MAX_HOPS 64
69
70#define SA_MPR_RESP_SIZE SA_ITEM_RESP_SIZE(mpr_rec)
71
72static boolean_t sa_multipath_rec_is_tavor_port(IN const osm_port_t * p_port)
73{
74	osm_node_t const *p_node;
75	ib_net32_t vend_id;
76
77	p_node = p_port->p_node;
78	vend_id = ib_node_info_get_vendor_id(&p_node->node_info);
79
80	return ((p_node->node_info.device_id == CL_HTON16(23108)) &&
81		((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) ||
82		 (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) ||
83		 (vend_id == CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) ||
84		 (vend_id == CL_HTON32(OSM_VENDOR_ID_VOLTAIRE))));
85}
86
87static boolean_t
88sa_multipath_rec_apply_tavor_mtu_limit(IN const ib_multipath_rec_t * p_mpr,
89				       IN const osm_port_t * p_src_port,
90				       IN const osm_port_t * p_dest_port,
91				       IN const ib_net64_t comp_mask)
92{
93	uint8_t required_mtu;
94
95	/* only if at least one of the ports is a Tavor device */
96	if (!sa_multipath_rec_is_tavor_port(p_src_port) &&
97	    !sa_multipath_rec_is_tavor_port(p_dest_port))
98		return FALSE;
99
100	/*
101	   we can apply the patch if either:
102	   1. No MTU required
103	   2. Required MTU <
104	   3. Required MTU = 1K or 512 or 256
105	   4. Required MTU > 256 or 512
106	 */
107	required_mtu = ib_multipath_rec_mtu(p_mpr);
108	if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) &&
109	    (comp_mask & IB_MPR_COMPMASK_MTU)) {
110		switch (ib_multipath_rec_mtu_sel(p_mpr)) {
111		case 0:	/* must be greater than */
112		case 2:	/* exact match */
113			if (IB_MTU_LEN_1024 < required_mtu)
114				return FALSE;
115			break;
116
117		case 1:	/* must be less than */
118			/* can't be disqualified by this one */
119			break;
120
121		case 3:	/* largest available */
122			/* the ULP intentionally requested */
123			/* the largest MTU possible */
124			return FALSE;
125			break;
126
127		default:
128			/* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */
129			CL_ASSERT(FALSE);
130			break;
131		}
132	}
133
134	return TRUE;
135}
136
137static ib_api_status_t mpr_rcv_get_path_parms(IN osm_sa_t * sa,
138					      IN const ib_multipath_rec_t *
139					      p_mpr,
140					      IN const osm_alias_guid_t * p_src_alias_guid,
141					      IN const osm_alias_guid_t * p_dest_alias_guid,
142					      IN const uint16_t src_lid_ho,
143					      IN const uint16_t dest_lid_ho,
144					      IN const ib_net64_t comp_mask,
145					      OUT osm_path_parms_t * p_parms)
146{
147	const osm_node_t *p_node;
148	const osm_physp_t *p_physp, *p_physp0;
149	const osm_physp_t *p_src_physp;
150	const osm_physp_t *p_dest_physp;
151	const osm_prtn_t *p_prtn = NULL;
152	const ib_port_info_t *p_pi, *p_pi0;
153	ib_slvl_table_t *p_slvl_tbl;
154	ib_api_status_t status = IB_SUCCESS;
155	uint8_t mtu;
156	uint8_t rate, p0_extended_rate, dest_rate;
157	uint8_t pkt_life;
158	uint8_t required_mtu;
159	uint8_t required_rate;
160	ib_net16_t required_pkey;
161	uint8_t required_sl;
162	uint8_t required_pkt_life;
163	ib_net16_t dest_lid;
164	int hops = 0;
165	int in_port_num = 0;
166	uint8_t i;
167	osm_qos_level_t *p_qos_level = NULL;
168	uint16_t valid_sl_mask = 0xffff;
169	int extended, p0_extended;
170
171	OSM_LOG_ENTER(sa->p_log);
172
173	dest_lid = cl_hton16(dest_lid_ho);
174
175	p_dest_physp = p_dest_alias_guid->p_base_port->p_physp;
176	p_physp = p_src_alias_guid->p_base_port->p_physp;
177	p_src_physp = p_physp;
178	p_pi = &p_physp->port_info;
179
180	mtu = ib_port_info_get_mtu_cap(p_pi);
181	extended = p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS;
182	rate = ib_port_info_compute_rate(p_pi, extended);
183
184	/*
185	   Mellanox Tavor device performance is better using 1K MTU.
186	   If required MTU and MTU selector are such that 1K is OK
187	   and at least one end of the path is Tavor we override the
188	   port MTU with 1K.
189	 */
190	if (sa->p_subn->opt.enable_quirks &&
191	    sa_multipath_rec_apply_tavor_mtu_limit(p_mpr,
192						   p_src_alias_guid->p_base_port,
193						   p_dest_alias_guid->p_base_port,
194						   comp_mask))
195		if (mtu > IB_MTU_LEN_1024) {
196			mtu = IB_MTU_LEN_1024;
197			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
198				"Optimized Path MTU to 1K for Mellanox Tavor device\n");
199		}
200
201	/*
202	   Walk the subnet object from source to destination,
203	   tracking the most restrictive rate and mtu values along the way...
204
205	   If source port node is a switch, then p_physp should
206	   point to the port that routes the destination lid
207	 */
208
209	p_node = osm_physp_get_node_ptr(p_physp);
210
211	if (p_node->sw) {
212		/*
213		 * Source node is a switch.
214		 * Make sure that p_physp points to the out port of the
215		 * switch that routes to the destination lid (dest_lid_ho)
216		 */
217		p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
218		if (p_physp == 0) {
219			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4514: "
220				"Can't find routing from LID %u to LID %u on "
221				"switch %s (GUID 0x%016" PRIx64 ")\n",
222				src_lid_ho, dest_lid_ho, p_node->print_desc,
223				cl_ntoh64(osm_node_get_node_guid(p_node)));
224			status = IB_NOT_FOUND;
225			goto Exit;
226		}
227	}
228
229	if (sa->p_subn->opt.qos) {
230
231		/*
232		 * Whether this node is switch or CA, the IN port for
233		 * the sl2vl table is 0, because this is a source node.
234		 */
235		p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0);
236
237		/* update valid SLs that still exist on this route */
238		for (i = 0; i < IB_MAX_NUM_VLS; i++) {
239			if (valid_sl_mask & (1 << i) &&
240			    ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL)
241				valid_sl_mask &= ~(1 << i);
242		}
243		if (!valid_sl_mask) {
244			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
245				"All the SLs lead to VL15 on this path\n");
246			status = IB_NOT_FOUND;
247			goto Exit;
248		}
249	}
250
251	/*
252	 * Same as above
253	 */
254	p_node = osm_physp_get_node_ptr(p_dest_physp);
255
256	if (p_node->sw) {
257		/*
258		 * if destination is switch, we want p_dest_physp to point to port 0
259		 */
260		p_dest_physp =
261		    osm_switch_get_route_by_lid(p_node->sw, dest_lid);
262
263		if (p_dest_physp == 0) {
264			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4515: "
265				"Can't find routing from LID %u to LID %u on "
266				"switch %s (GUID 0x%016" PRIx64 ")\n",
267				src_lid_ho, dest_lid_ho, p_node->print_desc,
268				cl_ntoh64(osm_node_get_node_guid(p_node)));
269			status = IB_NOT_FOUND;
270			goto Exit;
271		}
272
273	}
274
275	/*
276	 * Now go through the path step by step
277	 */
278
279	while (p_physp != p_dest_physp) {
280
281		int tmp_pnum = p_physp->port_num;
282		p_node = osm_physp_get_node_ptr(p_physp);
283		p_physp = osm_physp_get_remote(p_physp);
284
285		if (p_physp == 0) {
286			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4505: "
287				"Can't find remote phys port of %s (GUID "
288				"0x%016" PRIx64 ") port %d "
289				"while routing from LID %u to LID %u",
290				p_node->print_desc,
291				cl_ntoh64(osm_node_get_node_guid(p_node)),
292				tmp_pnum, src_lid_ho, dest_lid_ho);
293			status = IB_ERROR;
294			goto Exit;
295		}
296
297		/* update number of hops traversed */
298		hops++;
299		if (hops > MAX_HOPS) {
300			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4520: "
301				"Path from GUID 0x%016" PRIx64 " (%s) to"
302				" lid %u GUID 0x%016" PRIx64 " (%s) needs"
303				" more than %d hops, max %d hops allowed\n",
304				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
305				p_src_physp->p_node->print_desc, dest_lid_ho,
306				cl_ntoh64(osm_physp_get_port_guid
307					  (p_dest_physp)),
308				p_dest_physp->p_node->print_desc, hops,
309				MAX_HOPS);
310			status = IB_NOT_FOUND;
311			goto Exit;
312		}
313
314		in_port_num = osm_physp_get_port_num(p_physp);
315
316		/*
317		   This is point to point case (no switch in between)
318		 */
319		if (p_physp == p_dest_physp)
320			break;
321
322		p_node = osm_physp_get_node_ptr(p_physp);
323
324		if (!p_node->sw) {
325			/*
326			   There is some sort of problem in the subnet object!
327			   If this isn't a switch, we should have reached
328			   the destination by now!
329			 */
330			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4503: "
331				"Internal error, bad path while routing "
332				"from %s (GUID: 0x%016"PRIx64") port %d "
333				"to %s (GUID: 0x%016"PRIx64") port %d; "
334				"ended at %s port %d\n",
335				p_src_alias_guid->p_base_port->p_node->print_desc,
336				cl_ntoh64(p_src_alias_guid->p_base_port->p_node->node_info.node_guid),
337				p_src_alias_guid->p_base_port->p_physp->port_num,
338				p_dest_alias_guid->p_base_port->p_node->print_desc,
339				cl_ntoh64(p_dest_alias_guid->p_base_port->p_node->node_info.node_guid),
340				p_dest_alias_guid->p_base_port->p_physp->port_num,
341				p_node->print_desc,
342				p_physp->port_num);
343			status = IB_ERROR;
344			goto Exit;
345		}
346
347		/*
348		   Check parameters for the ingress port in this switch.
349		 */
350		p_pi = &p_physp->port_info;
351
352		if (mtu > ib_port_info_get_mtu_cap(p_pi))
353			mtu = ib_port_info_get_mtu_cap(p_pi);
354
355		p_physp0 = osm_node_get_physp_ptr((osm_node_t *)p_node, 0);
356		p_pi0 = &p_physp0->port_info;
357		p0_extended = p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS;
358		p0_extended_rate = ib_port_info_compute_rate(p_pi, p0_extended);
359		if (ib_path_compare_rates(rate, p0_extended_rate) > 0)
360			rate = p0_extended_rate;
361
362		/*
363		   Continue with the egress port on this switch.
364		 */
365		p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
366		if (p_physp == 0) {
367			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4516: "
368				"Dead end path on switch "
369				"%s (GUID: 0x%016"PRIx64") to LID %u\n",
370				p_node->print_desc,
371				cl_ntoh64(osm_node_get_node_guid(p_node)),
372				dest_lid_ho);
373			status = IB_ERROR;
374			goto Exit;
375		}
376
377		p_pi = &p_physp->port_info;
378
379		if (mtu > ib_port_info_get_mtu_cap(p_pi))
380			mtu = ib_port_info_get_mtu_cap(p_pi);
381
382		p_physp0 = osm_node_get_physp_ptr((osm_node_t *)p_node, 0);
383		p_pi0 = &p_physp0->port_info;
384		p0_extended = p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS;
385		p0_extended_rate = ib_port_info_compute_rate(p_pi, p0_extended);
386		if (ib_path_compare_rates(rate, p0_extended_rate) > 0)
387			rate = p0_extended_rate;
388
389		if (sa->p_subn->opt.qos) {
390			/*
391			 * Check SL2VL table of the switch and update valid SLs
392			 */
393			p_slvl_tbl =
394			    osm_physp_get_slvl_tbl(p_physp, in_port_num);
395			for (i = 0; i < IB_MAX_NUM_VLS; i++) {
396				if (valid_sl_mask & (1 << i) &&
397				    ib_slvl_table_get(p_slvl_tbl,
398						      i) == IB_DROP_VL)
399					valid_sl_mask &= ~(1 << i);
400			}
401			if (!valid_sl_mask) {
402				OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
403					"All the SLs lead to VL15 "
404					"on this path\n");
405				status = IB_NOT_FOUND;
406				goto Exit;
407			}
408		}
409	}
410
411	/*
412	   p_physp now points to the destination
413	 */
414	p_pi = &p_physp->port_info;
415
416	if (mtu > ib_port_info_get_mtu_cap(p_pi))
417		mtu = ib_port_info_get_mtu_cap(p_pi);
418
419	extended = p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS;
420	dest_rate = ib_port_info_compute_rate(p_pi, extended);
421	if (ib_path_compare_rates(rate, dest_rate) > 0)
422		rate = dest_rate;
423
424	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
425		"Path min MTU = %u, min rate = %u\n", mtu, rate);
426
427	/*
428	 * Get QoS Level object according to the MultiPath request
429	 * and adjust MultiPath parameters according to QoS settings
430	 */
431	if (sa->p_subn->opt.qos && sa->p_subn->p_qos_policy &&
432	    (p_qos_level =
433	     osm_qos_policy_get_qos_level_by_mpr(sa->p_subn->p_qos_policy,
434						 p_mpr, p_src_physp,
435						 p_dest_physp, comp_mask))) {
436
437		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
438			"MultiPathRecord request matches QoS Level '%s' (%s)\n",
439			p_qos_level->name,
440			p_qos_level->use ? p_qos_level->use : "no description");
441
442		if (p_qos_level->mtu_limit_set
443		    && (mtu > p_qos_level->mtu_limit))
444			mtu = p_qos_level->mtu_limit;
445
446		if (p_qos_level->rate_limit_set
447		    && (ib_path_compare_rates(rate, p_qos_level->rate_limit) > 0))
448			rate = p_qos_level->rate_limit;
449
450		if (p_qos_level->sl_set) {
451			required_sl = p_qos_level->sl;
452			if (!(valid_sl_mask & (1 << required_sl))) {
453				status = IB_NOT_FOUND;
454				goto Exit;
455			}
456		}
457	}
458
459	/*
460	   Determine if these values meet the user criteria
461	 */
462
463	/* we silently ignore cases where only the MTU selector is defined */
464	if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) &&
465	    (comp_mask & IB_MPR_COMPMASK_MTU)) {
466		required_mtu = ib_multipath_rec_mtu(p_mpr);
467		switch (ib_multipath_rec_mtu_sel(p_mpr)) {
468		case 0:	/* must be greater than */
469			if (mtu <= required_mtu)
470				status = IB_NOT_FOUND;
471			break;
472
473		case 1:	/* must be less than */
474			if (mtu >= required_mtu) {
475				/* adjust to use the highest mtu
476				   lower then the required one */
477				if (required_mtu > 1)
478					mtu = required_mtu - 1;
479				else
480					status = IB_NOT_FOUND;
481			}
482			break;
483
484		case 2:	/* exact match */
485			if (mtu < required_mtu)
486				status = IB_NOT_FOUND;
487			else
488				mtu = required_mtu;
489			break;
490
491		case 3:	/* largest available */
492			/* can't be disqualified by this one */
493			break;
494
495		default:
496			/* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */
497			CL_ASSERT(FALSE);
498			status = IB_ERROR;
499			break;
500		}
501	}
502	if (status != IB_SUCCESS)
503		goto Exit;
504
505	/* we silently ignore cases where only the Rate selector is defined */
506	if ((comp_mask & IB_MPR_COMPMASK_RATESELEC) &&
507	    (comp_mask & IB_MPR_COMPMASK_RATE)) {
508		required_rate = ib_multipath_rec_rate(p_mpr);
509		switch (ib_multipath_rec_rate_sel(p_mpr)) {
510		case 0:	/* must be greater than */
511			if (ib_path_compare_rates(rate, required_rate) <= 0)
512				status = IB_NOT_FOUND;
513			break;
514
515		case 1:	/* must be less than */
516			if (ib_path_compare_rates(rate, required_rate) >= 0) {
517				/* adjust the rate to use the highest rate
518				   lower then the required one */
519				rate = ib_path_rate_get_prev(required_rate);
520				if (!rate)
521					status = IB_NOT_FOUND;
522			}
523			break;
524
525		case 2:	/* exact match */
526			if (ib_path_compare_rates(rate, required_rate))
527				status = IB_NOT_FOUND;
528			else
529				rate = required_rate;
530			break;
531
532		case 3:	/* largest available */
533			/* can't be disqualified by this one */
534			break;
535
536		default:
537			/* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */
538			CL_ASSERT(FALSE);
539			status = IB_ERROR;
540			break;
541		}
542	}
543	if (status != IB_SUCCESS)
544		goto Exit;
545
546	/* Verify the pkt_life_time */
547	/* According to spec definition IBA 1.2 Table 205 PacketLifeTime description,
548	   for loopback paths, packetLifeTime shall be zero. */
549	if (p_src_alias_guid->p_base_port == p_dest_alias_guid->p_base_port)
550		pkt_life = 0;	/* loopback */
551	else if (p_qos_level && p_qos_level->pkt_life_set)
552		pkt_life = p_qos_level->pkt_life;
553	else
554		pkt_life = sa->p_subn->opt.subnet_timeout;
555
556	/* we silently ignore cases where only the PktLife selector is defined */
557	if ((comp_mask & IB_MPR_COMPMASK_PKTLIFETIMESELEC) &&
558	    (comp_mask & IB_MPR_COMPMASK_PKTLIFETIME)) {
559		required_pkt_life = ib_multipath_rec_pkt_life(p_mpr);
560		switch (ib_multipath_rec_pkt_life_sel(p_mpr)) {
561		case 0:	/* must be greater than */
562			if (pkt_life <= required_pkt_life)
563				status = IB_NOT_FOUND;
564			break;
565
566		case 1:	/* must be less than */
567			if (pkt_life >= required_pkt_life) {
568				/* adjust the lifetime to use the highest possible
569				   lower then the required one */
570				if (required_pkt_life > 1)
571					pkt_life = required_pkt_life - 1;
572				else
573					status = IB_NOT_FOUND;
574			}
575			break;
576
577		case 2:	/* exact match */
578			if (pkt_life < required_pkt_life)
579				status = IB_NOT_FOUND;
580			else
581				pkt_life = required_pkt_life;
582			break;
583
584		case 3:	/* smallest available */
585			/* can't be disqualified by this one */
586			break;
587
588		default:
589			/* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */
590			CL_ASSERT(FALSE);
591			status = IB_ERROR;
592			break;
593		}
594	}
595
596	if (status != IB_SUCCESS)
597		goto Exit;
598
599	/*
600	 * set Pkey for this MultiPath record request
601	 */
602
603	if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC &&
604	    cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31))
605		required_pkey =
606		    osm_physp_find_common_pkey(p_src_physp, p_dest_physp,
607					       sa->p_subn->opt.allow_both_pkeys);
608
609	else if (comp_mask & IB_MPR_COMPMASK_PKEY) {
610		/*
611		 * MPR request has a specific pkey:
612		 * Check that source and destination share this pkey.
613		 * If QoS level has pkeys, check that this pkey exists
614		 * in the QoS level pkeys.
615		 * MPR returned pkey is the requested pkey.
616		 */
617		required_pkey = p_mpr->pkey;
618		if (!osm_physp_share_this_pkey
619		    (p_src_physp, p_dest_physp, required_pkey,
620		     sa->p_subn->opt.allow_both_pkeys)) {
621			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4518: "
622				"Ports src 0x%016"PRIx64" (%s port %d) "
623				"and dst 0x%016"PRIx64" (%s port %d) "
624				"do not share the specified PKey 0x%04x\n",
625				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
626				p_src_physp->p_node->print_desc,
627				p_src_physp->port_num,
628				cl_ntoh64(osm_physp_get_port_guid
629					  (p_dest_physp)),
630				p_dest_physp->p_node->print_desc,
631				p_dest_physp->port_num,
632				cl_ntoh16(required_pkey));
633			status = IB_NOT_FOUND;
634			goto Exit;
635		}
636		if (p_qos_level && p_qos_level->pkey_range_len &&
637		    !osm_qos_level_has_pkey(p_qos_level, required_pkey)) {
638			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451C: "
639				"Ports src 0x%016"PRIx64" (%s port %d) "
640				"and dst 0x%016"PRIx64" (%s port %d) "
641				"do not share specified PKey (0x%04x) as "
642				"defined by QoS level \"%s\"\n",
643				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
644				p_src_physp->p_node->print_desc,
645				p_src_physp->port_num,
646				cl_ntoh64(osm_physp_get_port_guid
647					  (p_dest_physp)),
648				p_dest_physp->p_node->print_desc,
649				p_dest_physp->port_num,
650				cl_ntoh16(required_pkey),
651				p_qos_level->name);
652			status = IB_NOT_FOUND;
653			goto Exit;
654		}
655
656	} else if (p_qos_level && p_qos_level->pkey_range_len) {
657		/*
658		 * MPR request doesn't have a specific pkey, but QoS level
659		 * has pkeys - get shared pkey from QoS level pkeys
660		 */
661		required_pkey = osm_qos_level_get_shared_pkey(p_qos_level,
662							      p_src_physp,
663							      p_dest_physp,
664							      sa->p_subn->opt.allow_both_pkeys);
665		if (!required_pkey) {
666			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451D: "
667				"Ports src 0x%016"PRIx64" (%s port %d) "
668				"and dst 0x%016"PRIx64" (%s port %d) "
669				"do not share a PKey as defined by QoS "
670				"level \"%s\"\n",
671				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
672				p_src_physp->p_node->print_desc,
673				p_src_physp->port_num,
674				cl_ntoh64(osm_physp_get_port_guid
675					  (p_dest_physp)),
676				p_dest_physp->p_node->print_desc,
677				p_dest_physp->port_num,
678				p_qos_level->name);
679			status = IB_NOT_FOUND;
680			goto Exit;
681		}
682
683	} else {
684		/*
685		 * Neither MPR request nor QoS level have pkey.
686		 * Just get any shared pkey.
687		 */
688		required_pkey =
689		    osm_physp_find_common_pkey(p_src_physp, p_dest_physp,
690					       sa->p_subn->opt.allow_both_pkeys);
691		if (!required_pkey) {
692			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4519: "
693				"Ports src 0x%016"PRIx64" (%s port %d) "
694				"and dst 0x%016"PRIx64" (%s port %d) "
695				"do not have any shared PKeys\n",
696				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
697				p_src_physp->p_node->print_desc,
698				p_src_physp->port_num,
699				cl_ntoh64(osm_physp_get_port_guid
700					  (p_dest_physp)),
701				p_dest_physp->p_node->print_desc,
702				p_dest_physp->port_num);
703			status = IB_NOT_FOUND;
704			goto Exit;
705		}
706	}
707
708	if (required_pkey) {
709		p_prtn =
710		    (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl,
711					       required_pkey &
712					       cl_ntoh16((uint16_t) ~ 0x8000));
713		if (p_prtn ==
714		    (osm_prtn_t *) cl_qmap_end(&sa->p_subn->prtn_pkey_tbl))
715			p_prtn = NULL;
716	}
717
718	/*
719	 * Set MultiPathRecord SL.
720	 */
721
722	if (comp_mask & IB_MPR_COMPMASK_SL) {
723		/*
724		 * Specific SL was requested
725		 */
726		required_sl = ib_multipath_rec_sl(p_mpr);
727
728		if (p_qos_level && p_qos_level->sl_set &&
729		    p_qos_level->sl != required_sl) {
730			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451E: "
731				"QoS constraints: required MultiPathRecord SL "
732				"(%u) doesn't match QoS policy \"%s\" SL (%u) "
733				"[%s port %d <-> %s port %d]\n", required_sl,
734				p_qos_level->name,
735				p_qos_level->sl,
736				p_src_alias_guid->p_base_port->p_node->print_desc,
737				p_src_alias_guid->p_base_port->p_physp->port_num,
738				p_dest_alias_guid->p_base_port->p_node->print_desc,
739				p_dest_alias_guid->p_base_port->p_physp->port_num);
740			status = IB_NOT_FOUND;
741			goto Exit;
742		}
743
744	} else if (p_qos_level && p_qos_level->sl_set) {
745		/*
746		 * No specific SL was requested,
747		 * but there is an SL in QoS level.
748		 */
749		required_sl = p_qos_level->sl;
750
751		if (required_pkey && p_prtn && p_prtn->sl != p_qos_level->sl)
752			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
753				"QoS level SL (%u) overrides partition SL (%u)\n",
754				p_qos_level->sl, p_prtn->sl);
755
756	} else if (required_pkey) {
757		/*
758		 * No specific SL in request or in QoS level - use partition SL
759		 */
760		p_prtn =
761		    (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl,
762					       required_pkey &
763					       cl_ntoh16((uint16_t) ~ 0x8000));
764		if (!p_prtn) {
765			required_sl = OSM_DEFAULT_SL;
766			/* this may be possible when pkey tables are created somehow in
767			   previous runs or things are going wrong here */
768			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451A: "
769				"No partition found for PKey 0x%04x - "
770				"using default SL %d "
771				"[%s port %d <-> %s port %d]\n",
772				cl_ntoh16(required_pkey), required_sl,
773				p_src_alias_guid->p_base_port->p_node->print_desc,
774				p_src_alias_guid->p_base_port->p_physp->port_num,
775				p_dest_alias_guid->p_base_port->p_node->print_desc,
776				p_dest_alias_guid->p_base_port->p_physp->port_num);
777		} else
778			required_sl = p_prtn->sl;
779
780	} else if (sa->p_subn->opt.qos) {
781		if (valid_sl_mask & (1 << OSM_DEFAULT_SL))
782			required_sl = OSM_DEFAULT_SL;
783		else {
784			for (i = 0; i < IB_MAX_NUM_VLS; i++)
785				if (valid_sl_mask & (1 << i))
786					break;
787			required_sl = i;
788		}
789	} else
790		required_sl = OSM_DEFAULT_SL;
791
792	if (sa->p_subn->opt.qos && !(valid_sl_mask & (1 << required_sl))) {
793		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451F: "
794			"Selected SL (%u) leads to VL15 "
795			"[%s port %d <-> %s port %d]\n",
796			required_sl,
797			p_src_alias_guid->p_base_port->p_node->print_desc,
798			p_src_alias_guid->p_base_port->p_physp->port_num,
799			p_dest_alias_guid->p_base_port->p_node->print_desc,
800			p_dest_alias_guid->p_base_port->p_physp->port_num);
801		status = IB_NOT_FOUND;
802		goto Exit;
803	}
804
805	/* reset pkey when raw traffic */
806	if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC &&
807	    cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31))
808		required_pkey = 0;
809
810	p_parms->mtu = mtu;
811	p_parms->rate = rate;
812	p_parms->pkey = required_pkey;
813	p_parms->pkt_life = pkt_life;
814	p_parms->sl = required_sl;
815	p_parms->hops = hops;
816
817	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "MultiPath params:"
818		" mtu = %u, rate = %u, packet lifetime = %u,"
819		" pkey = 0x%04X, sl = %u, hops = %u\n", mtu, rate,
820		pkt_life, cl_ntoh16(required_pkey), required_sl, hops);
821
822Exit:
823	OSM_LOG_EXIT(sa->p_log);
824	return status;
825}
826
827static void mpr_rcv_build_pr(IN osm_sa_t * sa,
828			     IN const osm_alias_guid_t * p_src_alias_guid,
829			     IN const osm_alias_guid_t * p_dest_alias_guid,
830			     IN uint16_t src_lid_ho, IN uint16_t dest_lid_ho,
831			     IN uint8_t preference,
832			     IN const osm_path_parms_t * p_parms,
833			     OUT ib_path_rec_t * p_pr)
834{
835	const osm_physp_t *p_src_physp, *p_dest_physp;
836
837	OSM_LOG_ENTER(sa->p_log);
838
839	p_src_physp = p_src_alias_guid->p_base_port->p_physp;
840	p_dest_physp = p_dest_alias_guid->p_base_port->p_physp;
841
842	p_pr->dgid.unicast.prefix = osm_physp_get_subnet_prefix(p_dest_physp);
843	p_pr->dgid.unicast.interface_id = p_dest_alias_guid->alias_guid;
844
845	p_pr->sgid.unicast.prefix = osm_physp_get_subnet_prefix(p_src_physp);
846	p_pr->sgid.unicast.interface_id = p_src_alias_guid->alias_guid;
847
848	p_pr->dlid = cl_hton16(dest_lid_ho);
849	p_pr->slid = cl_hton16(src_lid_ho);
850
851	p_pr->hop_flow_raw &= cl_hton32(1 << 31);
852
853	p_pr->pkey = p_parms->pkey;
854	ib_path_rec_set_qos_class(p_pr, 0);
855	ib_path_rec_set_sl(p_pr, p_parms->sl);
856	p_pr->mtu = (uint8_t) (p_parms->mtu | 0x80);
857	p_pr->rate = (uint8_t) (p_parms->rate | 0x80);
858
859	/* According to 1.2 spec definition Table 205 PacketLifeTime description,
860	   for loopback paths, packetLifeTime shall be zero. */
861	if (p_src_alias_guid->p_base_port == p_dest_alias_guid->p_base_port)
862		p_pr->pkt_life = 0x80;	/* loopback */
863	else
864		p_pr->pkt_life = (uint8_t) (p_parms->pkt_life | 0x80);
865
866	p_pr->preference = preference;
867
868	/* always return num_path = 0 so this is only the reversible component */
869	if (p_parms->reversible)
870		p_pr->num_path = 0x80;
871
872	OSM_LOG_EXIT(sa->p_log);
873}
874
875static osm_sa_item_t *mpr_rcv_get_lid_pair_path(IN osm_sa_t * sa,
876						IN const ib_multipath_rec_t *
877						p_mpr,
878						IN const osm_alias_guid_t *
879						p_src_alias_guid,
880						IN const osm_alias_guid_t *
881						p_dest_alias_guid,
882						IN const uint16_t src_lid_ho,
883						IN const uint16_t dest_lid_ho,
884						IN const ib_net64_t comp_mask,
885						IN const uint8_t preference)
886{
887	osm_path_parms_t path_parms;
888	osm_path_parms_t rev_path_parms;
889	osm_sa_item_t *p_pr_item;
890	ib_api_status_t status, rev_path_status;
891
892	OSM_LOG_ENTER(sa->p_log);
893
894	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID %u, Dest LID %u\n",
895		src_lid_ho, dest_lid_ho);
896
897	p_pr_item = malloc(SA_MPR_RESP_SIZE);
898	if (p_pr_item == NULL) {
899		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4501: "
900			"Unable to allocate path record\n");
901		goto Exit;
902	}
903	memset(p_pr_item, 0, SA_MPR_RESP_SIZE);
904
905	status = mpr_rcv_get_path_parms(sa, p_mpr, p_src_alias_guid,
906					p_dest_alias_guid,
907					src_lid_ho, dest_lid_ho,
908					comp_mask, &path_parms);
909
910	if (status != IB_SUCCESS) {
911		free(p_pr_item);
912		p_pr_item = NULL;
913		goto Exit;
914	}
915
916	/* now try the reversible path */
917	rev_path_status = mpr_rcv_get_path_parms(sa, p_mpr, p_dest_alias_guid,
918						 p_src_alias_guid,
919						 dest_lid_ho, src_lid_ho,
920						 comp_mask, &rev_path_parms);
921	path_parms.reversible = (rev_path_status == IB_SUCCESS);
922
923	/* did we get a Reversible Path compmask ? */
924	/*
925	   NOTE that if the reversible component = 0, it is a don't care
926	   rather then requiring non-reversible paths ...
927	   see Vol1 Ver1.2 p900 l16
928	 */
929	if (comp_mask & IB_MPR_COMPMASK_REVERSIBLE) {
930		if ((!path_parms.reversible && (p_mpr->num_path & 0x80))) {
931			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
932				"Requested reversible path but failed to get one\n");
933
934			free(p_pr_item);
935			p_pr_item = NULL;
936			goto Exit;
937		}
938	}
939
940	p_pr_item->resp.mpr_rec.p_src_port = p_src_alias_guid->p_base_port;
941	p_pr_item->resp.mpr_rec.p_dest_port = p_dest_alias_guid->p_base_port;
942	p_pr_item->resp.mpr_rec.hops = path_parms.hops;
943
944	mpr_rcv_build_pr(sa, p_src_alias_guid, p_dest_alias_guid, src_lid_ho,
945			 dest_lid_ho, preference, &path_parms,
946			 &p_pr_item->resp.mpr_rec.path_rec);
947
948Exit:
949	OSM_LOG_EXIT(sa->p_log);
950	return p_pr_item;
951}
952
953static uint32_t mpr_rcv_get_port_pair_paths(IN osm_sa_t * sa,
954					    IN const ib_multipath_rec_t * p_mpr,
955					    IN const osm_port_t * p_req_port,
956					    IN const osm_alias_guid_t * p_src_alias_guid,
957					    IN const osm_alias_guid_t * p_dest_alias_guid,
958					    IN const uint32_t rem_paths,
959					    IN const ib_net64_t comp_mask,
960					    IN cl_qlist_t * p_list)
961{
962	osm_sa_item_t *p_pr_item;
963	uint16_t src_lid_min_ho;
964	uint16_t src_lid_max_ho;
965	uint16_t dest_lid_min_ho;
966	uint16_t dest_lid_max_ho;
967	uint16_t src_lid_ho;
968	uint16_t dest_lid_ho;
969	uint32_t path_num = 0;
970	uint8_t preference;
971	unsigned src_offset, dest_offset;
972
973	OSM_LOG_ENTER(sa->p_log);
974
975	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
976		"Src port 0x%016" PRIx64 ", Dst port 0x%016" PRIx64 "\n",
977		cl_ntoh64(p_src_alias_guid->alias_guid),
978		cl_ntoh64(p_dest_alias_guid->alias_guid));
979
980	/* Check that the req_port, src_port and dest_port all share a
981	   pkey. The check is done on the default physical port of the ports. */
982	if (osm_port_share_pkey(sa->p_log, p_req_port,
983				p_src_alias_guid->p_base_port,
984				sa->p_subn->opt.allow_both_pkeys) == FALSE
985	    || osm_port_share_pkey(sa->p_log, p_req_port,
986				   p_dest_alias_guid->p_base_port,
987				   sa->p_subn->opt.allow_both_pkeys) == FALSE
988	    || osm_port_share_pkey(sa->p_log, p_src_alias_guid->p_base_port,
989				   p_dest_alias_guid->p_base_port,
990				   sa->p_subn->opt.allow_both_pkeys) == FALSE)
991		/* One of the pairs doesn't share a pkey so the path is disqualified. */
992		goto Exit;
993
994	/*
995	   We shouldn't be here if the paths are disqualified in some way...
996	   Thus, we assume every possible connection is valid.
997
998	   We desire to return high-quality paths first.
999	   In OpenSM, higher quality mean least overlap with other paths.
1000	   This is acheived in practice by returning paths with
1001	   different LID value on each end, which means these
1002	   paths are more redundant that paths with the same LID repeated
1003	   on one side.  For example, in OpenSM the paths between two
1004	   endpoints with LMC = 1 might be as follows:
1005
1006	   Port A, LID 1 <-> Port B, LID 3
1007	   Port A, LID 1 <-> Port B, LID 4
1008	   Port A, LID 2 <-> Port B, LID 3
1009	   Port A, LID 2 <-> Port B, LID 4
1010
1011	   The OpenSM unicast routing algorithms attempt to disperse each path
1012	   to as varied a physical path as is reasonable.  1<->3 and 1<->4 have
1013	   more physical overlap (hence less redundancy) than 1<->3 and 2<->4.
1014
1015	   OpenSM ranks paths in three preference groups:
1016
1017	   Preference Value           Description
1018	   ----------------           -------------------------------------------
1019	   0                  Redundant in both directions with other
1020	   pref value = 0 paths
1021
1022	   1                  Redundant in one direction with other
1023	   pref value = 0 and pref value = 1 paths
1024
1025	   2                  Not redundant in either direction with
1026	   other paths
1027
1028	   3-FF                       Unused
1029
1030	   SA clients don't need to know these details, only that the lower
1031	   preference paths are preferred, as stated in the spec.  The paths
1032	   may not actually be physically redundant depending on the topology
1033	   of the subnet, but the point of LMC > 0 is to offer redundancy,
1034	   so I assume the subnet is physically appropriate for the specified
1035	   LMC value.  A more advanced implementation could inspect for physical
1036	   redundancy, but I'm not going to bother with that now.
1037	 */
1038
1039	osm_port_get_lid_range_ho(p_src_alias_guid->p_base_port,
1040				  &src_lid_min_ho, &src_lid_max_ho);
1041	osm_port_get_lid_range_ho(p_dest_alias_guid->p_base_port,
1042				  &dest_lid_min_ho, &dest_lid_max_ho);
1043
1044	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID [%u-%u], Dest LID [%u-%u]\n",
1045		src_lid_min_ho, src_lid_max_ho,
1046		dest_lid_min_ho, dest_lid_max_ho);
1047
1048	src_lid_ho = src_lid_min_ho;
1049	dest_lid_ho = dest_lid_min_ho;
1050
1051	/*
1052	   Preferred paths come first in OpenSM
1053	 */
1054	preference = 0;
1055
1056	while (path_num < rem_paths) {
1057		/*
1058		   These paths are "fully redundant"
1059		 */
1060		p_pr_item = mpr_rcv_get_lid_pair_path(sa, p_mpr,
1061						      p_src_alias_guid,
1062						      p_dest_alias_guid,
1063						      src_lid_ho, dest_lid_ho,
1064						      comp_mask, preference);
1065
1066		if (p_pr_item) {
1067			cl_qlist_insert_tail(p_list, &p_pr_item->list_item);
1068			++path_num;
1069		}
1070
1071		if (++src_lid_ho > src_lid_max_ho)
1072			break;
1073
1074		if (++dest_lid_ho > dest_lid_max_ho)
1075			break;
1076	}
1077
1078	/*
1079	   Check if we've accumulated all the paths that the user cares to see
1080	 */
1081	if (path_num == rem_paths)
1082		goto Exit;
1083
1084	/*
1085	   Don't bother reporting preference 1 paths for now.
1086	   It's more trouble than it's worth and can only occur
1087	   if ports have different LMC values, which isn't supported
1088	   by OpenSM right now anyway.
1089	 */
1090	preference = 2;
1091	src_lid_ho = src_lid_min_ho;
1092	dest_lid_ho = dest_lid_min_ho;
1093	src_offset = 0;
1094	dest_offset = 0;
1095
1096	/*
1097	   Iterate over the remaining paths
1098	 */
1099	while (path_num < rem_paths) {
1100		dest_offset++;
1101		dest_lid_ho++;
1102
1103		if (dest_lid_ho > dest_lid_max_ho) {
1104			src_offset++;
1105			src_lid_ho++;
1106
1107			if (src_lid_ho > src_lid_max_ho)
1108				break;	/* done */
1109
1110			dest_offset = 0;
1111			dest_lid_ho = dest_lid_min_ho;
1112		}
1113
1114		/*
1115		   These paths are "fully non-redundant" with paths already
1116		   identified above and consequently not of much value.
1117
1118		   Don't return paths we already identified above, as indicated
1119		   by the offset values being equal.
1120		 */
1121		if (src_offset == dest_offset)
1122			continue;	/* already reported */
1123
1124		p_pr_item = mpr_rcv_get_lid_pair_path(sa, p_mpr,
1125						      p_src_alias_guid,
1126						      p_dest_alias_guid,
1127						      src_lid_ho, dest_lid_ho,
1128						      comp_mask, preference);
1129
1130		if (p_pr_item) {
1131			cl_qlist_insert_tail(p_list, &p_pr_item->list_item);
1132			++path_num;
1133		}
1134	}
1135
1136Exit:
1137	OSM_LOG_EXIT(sa->p_log);
1138	return path_num;
1139}
1140
1141#undef min
1142#define min(x,y)	(((x) < (y)) ? (x) : (y))
1143
1144static osm_sa_item_t *mpr_rcv_get_apm_port_pair_paths(IN osm_sa_t * sa,
1145						      IN const
1146						      ib_multipath_rec_t *
1147						      p_mpr,
1148						      IN const osm_alias_guid_t *
1149						      p_src_alias_guid,
1150						      IN const osm_alias_guid_t *
1151						      p_dest_alias_guid,
1152						      IN int base_offs,
1153						      IN const ib_net64_t
1154						      comp_mask,
1155						      IN cl_qlist_t * p_list)
1156{
1157	osm_sa_item_t *p_pr_item = 0;
1158	uint16_t src_lid_min_ho;
1159	uint16_t src_lid_max_ho;
1160	uint16_t dest_lid_min_ho;
1161	uint16_t dest_lid_max_ho;
1162	uint16_t src_lid_ho;
1163	uint16_t dest_lid_ho;
1164	unsigned iterations;
1165	int src_lids, dest_lids;
1166
1167	OSM_LOG_ENTER(sa->p_log);
1168
1169	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src port 0x%016" PRIx64 ", "
1170		"Dst port 0x%016" PRIx64 ", base offs %d\n",
1171		cl_ntoh64(p_src_alias_guid->alias_guid),
1172		cl_ntoh64(p_dest_alias_guid->alias_guid),
1173		base_offs);
1174
1175	osm_port_get_lid_range_ho(p_src_alias_guid->p_base_port,
1176				  &src_lid_min_ho, &src_lid_max_ho);
1177	osm_port_get_lid_range_ho(p_dest_alias_guid->p_base_port,
1178				  &dest_lid_min_ho, &dest_lid_max_ho);
1179
1180	src_lid_ho = src_lid_min_ho;
1181	dest_lid_ho = dest_lid_min_ho;
1182
1183	src_lids = src_lid_max_ho - src_lid_min_ho + 1;
1184	dest_lids = dest_lid_max_ho - dest_lid_min_ho + 1;
1185
1186	src_lid_ho += base_offs % src_lids;
1187	dest_lid_ho += base_offs % dest_lids;
1188
1189	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1190		"Src LIDs [%u-%u] hashed %u, "
1191		"Dest LIDs [%u-%u] hashed %u\n",
1192		src_lid_min_ho, src_lid_max_ho, src_lid_ho,
1193		dest_lid_min_ho, dest_lid_max_ho, dest_lid_ho);
1194
1195	iterations = min(src_lids, dest_lids);
1196
1197	while (iterations--) {
1198		/*
1199		   These paths are "fully redundant"
1200		 */
1201		p_pr_item = mpr_rcv_get_lid_pair_path(sa, p_mpr,
1202						      p_src_alias_guid,
1203						      p_dest_alias_guid,
1204						      src_lid_ho, dest_lid_ho,
1205						      comp_mask, 0);
1206
1207		if (p_pr_item) {
1208			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1209				"Found matching path from Src LID %u to Dest LID %u with %d hops\n",
1210				src_lid_ho, dest_lid_ho, p_pr_item->resp.mpr_rec.hops);
1211			break;
1212		}
1213
1214		if (++src_lid_ho > src_lid_max_ho)
1215			src_lid_ho = src_lid_min_ho;
1216
1217		if (++dest_lid_ho > dest_lid_max_ho)
1218			dest_lid_ho = dest_lid_min_ho;
1219	}
1220
1221	OSM_LOG_EXIT(sa->p_log);
1222	return p_pr_item;
1223}
1224
1225static ib_net16_t mpr_rcv_get_gids(IN osm_sa_t * sa, IN const ib_gid_t * gids,
1226				   IN int ngids, IN int is_sgid,
1227				   OUT osm_alias_guid_t ** pp_alias_guid)
1228{
1229	osm_alias_guid_t *p_alias_guid;
1230	ib_net16_t ib_status = IB_SUCCESS;
1231	int i;
1232
1233	OSM_LOG_ENTER(sa->p_log);
1234
1235	for (i = 0; i < ngids; i++, gids++) {
1236		if (!ib_gid_is_link_local(gids)) {
1237			if ((is_sgid && ib_gid_is_multicast(gids)) ||
1238			    (ib_gid_get_subnet_prefix(gids) !=
1239			     sa->p_subn->opt.subnet_prefix)) {
1240				/*
1241				   This 'error' is the client's fault (bad gid)
1242				   so don't enter it as an error in our own log.
1243				   Return an error response to the client.
1244				 */
1245				OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "ERR 451B: "
1246					"%sGID 0x%016" PRIx64
1247					" is multicast or non local subnet prefix\n",
1248					is_sgid ? "S" : "D",
1249					cl_ntoh64(gids->unicast.prefix));
1250
1251				ib_status = IB_SA_MAD_STATUS_INVALID_GID;
1252				goto Exit;
1253			}
1254		}
1255
1256		p_alias_guid =
1257		    osm_get_alias_guid_by_guid(sa->p_subn,
1258					       gids->unicast.interface_id);
1259		if (!p_alias_guid) {
1260			/*
1261			   This 'error' is the client's fault (bad gid) so
1262			   don't enter it as an error in our own log.
1263			   Return an error response to the client.
1264			 */
1265			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4506: "
1266				"No port with GUID 0x%016" PRIx64 "\n",
1267				cl_ntoh64(gids->unicast.interface_id));
1268
1269			ib_status = IB_SA_MAD_STATUS_INVALID_GID;
1270			goto Exit;
1271		}
1272
1273		pp_alias_guid[i] = p_alias_guid;
1274	}
1275
1276Exit:
1277	OSM_LOG_EXIT(sa->p_log);
1278
1279	return ib_status;
1280}
1281
1282static ib_net16_t mpr_rcv_get_end_points(IN osm_sa_t * sa,
1283					 IN const osm_madw_t * p_madw,
1284					 OUT osm_alias_guid_t ** pp_alias_guids,
1285					 OUT int *nsrc, OUT int *ndest)
1286{
1287	const ib_multipath_rec_t *p_mpr;
1288	const ib_sa_mad_t *p_sa_mad;
1289	ib_net64_t comp_mask;
1290	ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS;
1291	ib_gid_t *gids;
1292
1293	OSM_LOG_ENTER(sa->p_log);
1294
1295	/*
1296	   Determine what fields are valid and then get a pointer
1297	   to the source and destination port objects, if possible.
1298	 */
1299	p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
1300	p_mpr = (ib_multipath_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad);
1301	gids = (ib_gid_t *) p_mpr->gids;
1302
1303	comp_mask = p_sa_mad->comp_mask;
1304
1305	/*
1306	   Check a few easy disqualifying cases up front before getting
1307	   into the endpoints.
1308	 */
1309	*nsrc = *ndest = 0;
1310
1311	if (comp_mask & IB_MPR_COMPMASK_SGIDCOUNT) {
1312		*nsrc = p_mpr->sgid_count;
1313		if (*nsrc > IB_MULTIPATH_MAX_GIDS)
1314			*nsrc = IB_MULTIPATH_MAX_GIDS;
1315		sa_status = mpr_rcv_get_gids(sa, gids, *nsrc, 1, pp_alias_guids);
1316		if (sa_status != IB_SUCCESS)
1317			goto Exit;
1318	}
1319
1320	if (comp_mask & IB_MPR_COMPMASK_DGIDCOUNT) {
1321		*ndest = p_mpr->dgid_count;
1322		if (*ndest + *nsrc > IB_MULTIPATH_MAX_GIDS)
1323			*ndest = IB_MULTIPATH_MAX_GIDS - *nsrc;
1324		sa_status =
1325		    mpr_rcv_get_gids(sa, gids + *nsrc, *ndest, 0,
1326				     pp_alias_guids + *nsrc);
1327	}
1328
1329Exit:
1330	OSM_LOG_EXIT(sa->p_log);
1331	return sa_status;
1332}
1333
1334#define hash_lids(a, b, lmc)	\
1335	(((((a) >> (lmc)) << 4) | ((b) >> (lmc))) % 103)
1336
1337static void mpr_rcv_get_apm_paths(IN osm_sa_t * sa,
1338				  IN const ib_multipath_rec_t * p_mpr,
1339				  IN const osm_port_t * p_req_port,
1340				  IN osm_alias_guid_t ** _pp_alias_guids,
1341				  IN const ib_net64_t comp_mask,
1342				  IN cl_qlist_t * p_list)
1343{
1344	osm_alias_guid_t *pp_alias_guids[4];
1345	osm_sa_item_t *matrix[2][2];
1346	int base_offs, src_lid_ho, dest_lid_ho;
1347	int sumA, sumB, minA, minB;
1348
1349	OSM_LOG_ENTER(sa->p_log);
1350
1351	/*
1352	 * We want to:
1353	 *    1. use different lid offsets (from base) for the resultant paths
1354	 *    to increase the probability of redundant paths or in case
1355	 *    of Clos - to ensure it (different offset => different spine!)
1356	 *    2. keep consistent paths no matter of direction and order of ports
1357	 *    3. distibute the lid offsets to balance the load
1358	 * So, we sort the ports (within the srcs, and within the dests),
1359	 * hash the lids of S0, D0 (after the sort), and call mpr_rcv_get_apm_port_pair_paths
1360	 * with base_lid for S0, D0 and base_lid + 1 for S1, D1. This way we will get
1361	 * always the same offsets - order independent, and make sure different spines are used.
1362	 * Note that the diagonals on a Clos have the same number of hops, so it doesn't
1363	 * really matter which diagonal we use.
1364	 */
1365	if (_pp_alias_guids[0]->p_base_port->guid <
1366	    _pp_alias_guids[1]->p_base_port->guid) {
1367		pp_alias_guids[0] = _pp_alias_guids[0];
1368		pp_alias_guids[1] = _pp_alias_guids[1];
1369	} else {
1370		pp_alias_guids[0] = _pp_alias_guids[1];
1371		pp_alias_guids[1] = _pp_alias_guids[0];
1372	}
1373	if (_pp_alias_guids[2]->p_base_port->guid <
1374	    _pp_alias_guids[3]->p_base_port->guid) {
1375		pp_alias_guids[2] = _pp_alias_guids[2];
1376		pp_alias_guids[3] = _pp_alias_guids[3];
1377	} else {
1378		pp_alias_guids[2] = _pp_alias_guids[3];
1379		pp_alias_guids[3] = _pp_alias_guids[2];
1380	}
1381
1382	src_lid_ho = osm_port_get_base_lid(pp_alias_guids[0]->p_base_port);
1383	dest_lid_ho = osm_port_get_base_lid(pp_alias_guids[2]->p_base_port);
1384
1385	base_offs = src_lid_ho < dest_lid_ho ?
1386	    hash_lids(src_lid_ho, dest_lid_ho, sa->p_subn->opt.lmc) :
1387	    hash_lids(dest_lid_ho, src_lid_ho, sa->p_subn->opt.lmc);
1388
1389	matrix[0][0] =
1390	    mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[0],
1391					    pp_alias_guids[2], base_offs,
1392					    comp_mask, p_list);
1393	matrix[0][1] =
1394	    mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[0],
1395					    pp_alias_guids[3], base_offs,
1396					    comp_mask, p_list);
1397	matrix[1][0] =
1398	    mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[1],
1399					    pp_alias_guids[2], base_offs + 1,
1400					    comp_mask, p_list);
1401	matrix[1][1] =
1402	    mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[1],
1403					    pp_alias_guids[3], base_offs + 1,
1404					    comp_mask, p_list);
1405
1406	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "APM matrix:\n"
1407		"\t{0,0} 0x%X->0x%X (%d)\t| {0,1} 0x%X->0x%X (%d)\n"
1408		"\t{1,0} 0x%X->0x%X (%d)\t| {1,1} 0x%X->0x%X (%d)\n",
1409		matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.slid : 0,
1410		matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.dlid : 0,
1411		matrix[0][0] ? matrix[0][0]->resp.mpr_rec.hops : 0,
1412		matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.slid : 0,
1413		matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.dlid : 0,
1414		matrix[0][1] ? matrix[0][1]->resp.mpr_rec.hops : 0,
1415		matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.slid : 0,
1416		matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.dlid : 0,
1417		matrix[1][0] ? matrix[1][0]->resp.mpr_rec.hops : 0,
1418		matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.slid : 0,
1419		matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.dlid : 0,
1420		matrix[1][1] ? matrix[1][1]->resp.mpr_rec.hops : 0);
1421
1422	sumA = minA = sumB = minB = 0;
1423
1424	/* check diagonal A {(0,0), (1,1)} */
1425	if (matrix[0][0]) {
1426		sumA += matrix[0][0]->resp.mpr_rec.hops;
1427		minA = matrix[0][0]->resp.mpr_rec.hops;
1428	}
1429	if (matrix[1][1]) {
1430		sumA += matrix[1][1]->resp.mpr_rec.hops;
1431		if (minA)
1432			minA = min(minA, matrix[1][1]->resp.mpr_rec.hops);
1433		else
1434			minA = matrix[1][1]->resp.mpr_rec.hops;
1435	}
1436
1437	/* check diagonal B {(0,1), (1,0)} */
1438	if (matrix[0][1]) {
1439		sumB += matrix[0][1]->resp.mpr_rec.hops;
1440		minB = matrix[0][1]->resp.mpr_rec.hops;
1441	}
1442	if (matrix[1][0]) {
1443		sumB += matrix[1][0]->resp.mpr_rec.hops;
1444		if (minB)
1445			minB = min(minB, matrix[1][0]->resp.mpr_rec.hops);
1446		else
1447			minB = matrix[1][0]->resp.mpr_rec.hops;
1448	}
1449
1450	/* and the winner is... */
1451	if (minA <= minB || (minA == minB && sumA < sumB)) {
1452		/* Diag A */
1453		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1454			"Diag {0,0} & {1,1} is the best:\n"
1455			"\t{0,0} 0x%X->0x%X (%d)\t & {1,1} 0x%X->0x%X (%d)\n",
1456			matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.slid : 0,
1457			matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.dlid : 0,
1458			matrix[0][0] ? matrix[0][0]->resp.mpr_rec.hops : 0,
1459			matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.slid : 0,
1460			matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.dlid : 0,
1461			matrix[1][1] ? matrix[1][1]->resp.mpr_rec.hops : 0);
1462		if (matrix[0][0])
1463			cl_qlist_insert_tail(p_list, &matrix[0][0]->list_item);
1464		if (matrix[1][1])
1465			cl_qlist_insert_tail(p_list, &matrix[1][1]->list_item);
1466		free(matrix[0][1]);
1467		free(matrix[1][0]);
1468	} else {
1469		/* Diag B */
1470		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1471			"Diag {0,1} & {1,0} is the best:\n"
1472			"\t{0,1} 0x%X->0x%X (%d)\t & {1,0} 0x%X->0x%X (%d)\n",
1473			matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.slid : 0,
1474			matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.dlid : 0,
1475			matrix[0][1] ? matrix[0][1]->resp.mpr_rec.hops : 0,
1476			matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.slid : 0,
1477			matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.dlid: 0,
1478			matrix[1][0] ? matrix[1][0]->resp.mpr_rec.hops : 0);
1479		if (matrix[0][1])
1480			cl_qlist_insert_tail(p_list, &matrix[0][1]->list_item);
1481		if (matrix[1][0])
1482			cl_qlist_insert_tail(p_list, &matrix[1][0]->list_item);
1483		free(matrix[0][0]);
1484		free(matrix[1][1]);
1485	}
1486
1487	OSM_LOG_EXIT(sa->p_log);
1488}
1489
1490static void mpr_rcv_process_pairs(IN osm_sa_t * sa,
1491				  IN const ib_multipath_rec_t * p_mpr,
1492				  IN osm_port_t * p_req_port,
1493				  IN osm_alias_guid_t ** pp_alias_guids,
1494				  IN const int nsrc, IN int ndest,
1495				  IN ib_net64_t comp_mask,
1496				  IN cl_qlist_t * p_list)
1497{
1498	osm_alias_guid_t **pp_src_alias_guid, **pp_es;
1499	osm_alias_guid_t **pp_dest_alias_guid, **pp_ed;
1500	uint32_t max_paths, num_paths, total_paths = 0;
1501
1502	OSM_LOG_ENTER(sa->p_log);
1503
1504	if (comp_mask & IB_MPR_COMPMASK_NUMBPATH)
1505		max_paths = p_mpr->num_path & 0x7F;
1506	else
1507		max_paths = OSM_SA_MPR_MAX_NUM_PATH;
1508
1509	for (pp_src_alias_guid = pp_alias_guids, pp_es = pp_alias_guids + nsrc;
1510	     pp_src_alias_guid < pp_es; pp_src_alias_guid++) {
1511		for (pp_dest_alias_guid = pp_es, pp_ed = pp_es + ndest;
1512		     pp_dest_alias_guid < pp_ed; pp_dest_alias_guid++) {
1513			num_paths =
1514			    mpr_rcv_get_port_pair_paths(sa, p_mpr, p_req_port,
1515							*pp_src_alias_guid,
1516							*pp_dest_alias_guid,
1517							max_paths - total_paths,
1518							comp_mask, p_list);
1519			total_paths += num_paths;
1520			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1521				"%d paths %d total paths %d max paths\n",
1522				num_paths, total_paths, max_paths);
1523			/* Just take first NumbPaths found */
1524			if (total_paths >= max_paths)
1525				goto Exit;
1526		}
1527	}
1528
1529Exit:
1530	OSM_LOG_EXIT(sa->p_log);
1531}
1532
1533void osm_mpr_rcv_process(IN void *context, IN void *data)
1534{
1535	osm_sa_t *sa = context;
1536	osm_madw_t *p_madw = data;
1537	const ib_multipath_rec_t *p_mpr;
1538	ib_sa_mad_t *p_sa_mad;
1539	osm_port_t *requester_port;
1540	osm_alias_guid_t *pp_alias_guids[IB_MULTIPATH_MAX_GIDS];
1541	cl_qlist_t pr_list;
1542	ib_net16_t sa_status;
1543	int nsrc, ndest;
1544	uint8_t rate, mtu;
1545
1546	OSM_LOG_ENTER(sa->p_log);
1547
1548	CL_ASSERT(p_madw);
1549
1550	p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
1551	p_mpr = (ib_multipath_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad);
1552
1553	CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_MULTIPATH_RECORD);
1554
1555	if ((p_sa_mad->rmpp_flags & IB_RMPP_FLAG_ACTIVE) != IB_RMPP_FLAG_ACTIVE) {
1556		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4510: "
1557			"Invalid request since RMPP_FLAG_ACTIVE is not set\n");
1558		osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID);
1559		goto Exit;
1560	}
1561
1562	/* we only support SubnAdmGetMulti method */
1563	if (p_sa_mad->method != IB_MAD_METHOD_GETMULTI) {
1564		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4513: "
1565			"Unsupported Method (%s) for MultiPathRecord request\n",
1566			ib_get_sa_method_str(p_sa_mad->method));
1567		osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR);
1568		goto Exit;
1569	}
1570
1571	if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG))
1572		osm_dump_multipath_record_v2(sa->p_log, p_mpr, FILE_ID, OSM_LOG_DEBUG);
1573
1574	/* Make sure required components (S/DGIDCount) are supplied */
1575	if (!(p_sa_mad->comp_mask & IB_MPR_COMPMASK_SGIDCOUNT) ||
1576	    !(p_sa_mad->comp_mask & IB_MPR_COMPMASK_DGIDCOUNT)) {
1577		osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_INSUF_COMPS);
1578		goto Exit;
1579	}
1580
1581	/* Validate rate if supplied */
1582	if ((p_sa_mad->comp_mask & IB_MPR_COMPMASK_RATESELEC) &&
1583	    (p_sa_mad->comp_mask & IB_MPR_COMPMASK_RATE)) {
1584		rate = ib_multipath_rec_rate(p_mpr);
1585		if (!ib_rate_is_valid(rate)) {
1586			osm_sa_send_error(sa, p_madw,
1587					  IB_SA_MAD_STATUS_REQ_INVALID);
1588			goto Exit;
1589		}
1590	}
1591	/* Validate MTU if supplied */
1592	if ((p_sa_mad->comp_mask & IB_MPR_COMPMASK_MTUSELEC) &&
1593	    (p_sa_mad->comp_mask & IB_MPR_COMPMASK_MTU)) {
1594		mtu = ib_multipath_rec_mtu(p_mpr);
1595		if (!ib_mtu_is_valid(mtu)) {
1596			osm_sa_send_error(sa, p_madw,
1597					  IB_SA_MAD_STATUS_REQ_INVALID);
1598			goto Exit;
1599		}
1600	}
1601
1602	/* Make sure either none or both ServiceID parameters are supplied */
1603	if ((p_sa_mad->comp_mask & IB_MPR_COMPMASK_SERVICEID) != 0 &&
1604	    (p_sa_mad->comp_mask & IB_MPR_COMPMASK_SERVICEID) !=
1605	     IB_MPR_COMPMASK_SERVICEID) {
1606		osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_INSUF_COMPS);
1607		goto Exit;
1608	}
1609
1610	cl_qlist_init(&pr_list);
1611
1612	/*
1613	   Most SA functions (including this one) are read-only on the
1614	   subnet object, so we grab the lock non-exclusively.
1615	 */
1616	cl_plock_acquire(sa->p_lock);
1617
1618	/* update the requester physical port */
1619	requester_port = osm_get_port_by_mad_addr(sa->p_log, sa->p_subn,
1620						  osm_madw_get_mad_addr_ptr
1621						  (p_madw));
1622	if (requester_port == NULL) {
1623		cl_plock_release(sa->p_lock);
1624		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4517: "
1625			"Cannot find requester physical port\n");
1626		goto Exit;
1627	}
1628
1629	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1630		"Requester port GUID 0x%" PRIx64 "\n",
1631		cl_ntoh64(osm_port_get_guid(requester_port)));
1632
1633	sa_status = mpr_rcv_get_end_points(sa, p_madw, pp_alias_guids,
1634					   &nsrc, &ndest);
1635
1636	if (sa_status != IB_SA_MAD_STATUS_SUCCESS || !nsrc || !ndest) {
1637		cl_plock_release(sa->p_lock);
1638		if (sa_status == IB_SA_MAD_STATUS_SUCCESS && (!nsrc || !ndest))
1639			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4512: "
1640				"mpr_rcv_get_end_points failed, # GIDs found; "
1641				"src %d; dest %d)\n", nsrc, ndest);
1642		if (sa_status == IB_SA_MAD_STATUS_SUCCESS)
1643			osm_sa_send_error(sa, p_madw,
1644					  IB_SA_MAD_STATUS_REQ_INVALID);
1645		else
1646			osm_sa_send_error(sa, p_madw, sa_status);
1647		goto Exit;
1648	}
1649
1650	/* APM request */
1651	if (nsrc == 2 && ndest == 2 && (p_mpr->num_path & 0x7F) == 2)
1652		mpr_rcv_get_apm_paths(sa, p_mpr, requester_port, pp_alias_guids,
1653				      p_sa_mad->comp_mask, &pr_list);
1654	else
1655		mpr_rcv_process_pairs(sa, p_mpr, requester_port, pp_alias_guids,
1656				      nsrc, ndest, p_sa_mad->comp_mask,
1657				      &pr_list);
1658
1659	cl_plock_release(sa->p_lock);
1660
1661	/* o15-0.2.7: If MultiPath is supported, then SA shall respond to a
1662	   SubnAdmGetMulti() containing a valid MultiPathRecord attribute with
1663	   a set of zero or more PathRecords satisfying the constraints
1664	   indicated in the MultiPathRecord received. The PathRecord Attribute
1665	   ID shall be used in the response.
1666	 */
1667	p_sa_mad->attr_id = IB_MAD_ATTR_PATH_RECORD;
1668	osm_sa_respond(sa, p_madw, sizeof(ib_path_rec_t), &pr_list);
1669
1670Exit:
1671	OSM_LOG_EXIT(sa->p_log);
1672}
1673#endif
1674