1/*
2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 */
36
37/*
38 * Abstract:
39 *    Implementation of osm_pr_rcv_t.
40 * This object represents the PathRecord Receiver object.
41 * This object is part of the opensm family of objects.
42 */
43
44#if HAVE_CONFIG_H
45#  include <config.h>
46#endif				/* HAVE_CONFIG_H */
47
48#include <string.h>
49#include <arpa/inet.h>
50#include <iba/ib_types.h>
51#include <complib/cl_qmap.h>
52#include <complib/cl_passivelock.h>
53#include <complib/cl_debug.h>
54#include <complib/cl_qlist.h>
55#include <vendor/osm_vendor_api.h>
56#include <opensm/osm_base.h>
57#include <opensm/osm_port.h>
58#include <opensm/osm_node.h>
59#include <opensm/osm_switch.h>
60#include <opensm/osm_helper.h>
61#include <opensm/osm_pkey.h>
62#include <opensm/osm_multicast.h>
63#include <opensm/osm_partition.h>
64#include <opensm/osm_opensm.h>
65#include <opensm/osm_qos_policy.h>
66#include <opensm/osm_sa.h>
67#include <opensm/osm_router.h>
68#include <opensm/osm_prefix_route.h>
69
70#include <sys/socket.h>
71
72extern uint8_t osm_get_lash_sl(osm_opensm_t * p_osm,
73			       const osm_port_t * p_src_port,
74			       const osm_port_t * p_dst_port);
75
76typedef struct osm_pr_item {
77	cl_list_item_t list_item;
78	ib_path_rec_t path_rec;
79} osm_pr_item_t;
80
81typedef struct osm_path_parms {
82	ib_net16_t pkey;
83	uint8_t mtu;
84	uint8_t rate;
85	uint8_t sl;
86	uint8_t pkt_life;
87	boolean_t reversible;
88} osm_path_parms_t;
89
90static const ib_gid_t zero_gid = { {0x00, 0x00, 0x00, 0x00,
91				    0x00, 0x00, 0x00, 0x00,
92				    0x00, 0x00, 0x00, 0x00,
93				    0x00, 0x00, 0x00, 0x00},
94};
95
96/**********************************************************************
97 **********************************************************************/
98static inline boolean_t
99__osm_sa_path_rec_is_tavor_port(IN const osm_port_t * const p_port)
100{
101	osm_node_t const *p_node;
102	ib_net32_t vend_id;
103
104	p_node = p_port->p_node;
105	vend_id = ib_node_info_get_vendor_id(&p_node->node_info);
106
107	return ((p_node->node_info.device_id == CL_HTON16(23108)) &&
108		((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) ||
109		 (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) ||
110		 (vend_id == CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) ||
111		 (vend_id == CL_HTON32(OSM_VENDOR_ID_VOLTAIRE))));
112}
113
114/**********************************************************************
115 **********************************************************************/
116static boolean_t
117__osm_sa_path_rec_apply_tavor_mtu_limit(IN const ib_path_rec_t * const p_pr,
118					IN const osm_port_t * const p_src_port,
119					IN const osm_port_t * const p_dest_port,
120					IN const ib_net64_t comp_mask)
121{
122	uint8_t required_mtu;
123
124	/* only if at least one of the ports is a Tavor device */
125	if (!__osm_sa_path_rec_is_tavor_port(p_src_port) &&
126	    !__osm_sa_path_rec_is_tavor_port(p_dest_port))
127		return (FALSE);
128
129	/*
130	   we can apply the patch if either:
131	   1. No MTU required
132	   2. Required MTU <
133	   3. Required MTU = 1K or 512 or 256
134	   4. Required MTU > 256 or 512
135	 */
136	required_mtu = ib_path_rec_mtu(p_pr);
137	if ((comp_mask & IB_PR_COMPMASK_MTUSELEC) &&
138	    (comp_mask & IB_PR_COMPMASK_MTU)) {
139		switch (ib_path_rec_mtu_sel(p_pr)) {
140		case 0:	/* must be greater than */
141		case 2:	/* exact match */
142			if (IB_MTU_LEN_1024 < required_mtu)
143				return (FALSE);
144			break;
145
146		case 1:	/* must be less than */
147			/* can't be disqualified by this one */
148			break;
149
150		case 3:	/* largest available */
151			/* the ULP intentionally requested */
152			/* the largest MTU possible */
153			return (FALSE);
154			break;
155
156		default:
157			/* if we're here, there's a bug in ib_path_rec_mtu_sel() */
158			CL_ASSERT(FALSE);
159			break;
160		}
161	}
162
163	return (TRUE);
164}
165
166/**********************************************************************
167 **********************************************************************/
168static ib_api_status_t
169__osm_pr_rcv_get_path_parms(IN osm_sa_t * sa,
170			    IN const ib_path_rec_t * const p_pr,
171			    IN const osm_port_t * const p_src_port,
172			    IN const osm_port_t * const p_dest_port,
173			    IN const uint16_t dest_lid_ho,
174			    IN const ib_net64_t comp_mask,
175			    OUT osm_path_parms_t * const p_parms)
176{
177	const osm_node_t *p_node;
178	const osm_physp_t *p_physp;
179	const osm_physp_t *p_src_physp;
180	const osm_physp_t *p_dest_physp;
181	const osm_prtn_t *p_prtn = NULL;
182	osm_opensm_t *p_osm;
183	const ib_port_info_t *p_pi;
184	ib_api_status_t status = IB_SUCCESS;
185	ib_net16_t pkey;
186	uint8_t mtu;
187	uint8_t rate;
188	uint8_t pkt_life;
189	uint8_t required_mtu;
190	uint8_t required_rate;
191	uint8_t required_pkt_life;
192	uint8_t sl;
193	uint8_t in_port_num;
194	ib_net16_t dest_lid;
195	uint8_t i;
196	ib_slvl_table_t *p_slvl_tbl = NULL;
197	osm_qos_level_t *p_qos_level = NULL;
198	uint16_t valid_sl_mask = 0xffff;
199	int is_lash;
200
201	OSM_LOG_ENTER(sa->p_log);
202
203	dest_lid = cl_hton16(dest_lid_ho);
204
205	p_dest_physp = p_dest_port->p_physp;
206	p_physp = p_src_port->p_physp;
207	p_src_physp = p_physp;
208	p_pi = &p_physp->port_info;
209	p_osm = sa->p_subn->p_osm;
210
211	mtu = ib_port_info_get_mtu_cap(p_pi);
212	rate = ib_port_info_compute_rate(p_pi);
213
214	/*
215	   Mellanox Tavor device performance is better using 1K MTU.
216	   If required MTU and MTU selector are such that 1K is OK
217	   and at least one end of the path is Tavor we override the
218	   port MTU with 1K.
219	 */
220	if (sa->p_subn->opt.enable_quirks &&
221	    __osm_sa_path_rec_apply_tavor_mtu_limit(p_pr, p_src_port,
222						    p_dest_port, comp_mask))
223		if (mtu > IB_MTU_LEN_1024) {
224			mtu = IB_MTU_LEN_1024;
225			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
226				"Optimized Path MTU to 1K for Mellanox Tavor device\n");
227		}
228
229	/*
230	   Walk the subnet object from source to destination,
231	   tracking the most restrictive rate and mtu values along the way...
232
233	   If source port node is a switch, then p_physp should
234	   point to the port that routes the destination lid
235	 */
236
237	p_node = osm_physp_get_node_ptr(p_physp);
238
239	if (p_node->sw) {
240		/*
241		 * Source node is a switch.
242		 * Make sure that p_physp points to the out port of the
243		 * switch that routes to the destination lid (dest_lid_ho)
244		 */
245		p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
246		if (p_physp == 0) {
247			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F02: "
248				"Cannot find routing to LID %u from switch for GUID 0x%016"
249				PRIx64 "\n", dest_lid_ho,
250				cl_ntoh64(osm_node_get_node_guid(p_node)));
251			status = IB_NOT_FOUND;
252			goto Exit;
253		}
254	}
255
256	if (sa->p_subn->opt.qos) {
257
258		/*
259		 * Whether this node is switch or CA, the IN port for
260		 * the sl2vl table is 0, because this is a source node.
261		 */
262		p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0);
263
264		/* update valid SLs that still exist on this route */
265		for (i = 0; i < IB_MAX_NUM_VLS; i++) {
266			if (valid_sl_mask & (1 << i) &&
267			    ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL)
268				valid_sl_mask &= ~(1 << i);
269		}
270		if (!valid_sl_mask) {
271			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
272				"All the SLs lead to VL15 on this path\n");
273			status = IB_NOT_FOUND;
274			goto Exit;
275		}
276	}
277
278	/*
279	 * Same as above
280	 */
281	p_node = osm_physp_get_node_ptr(p_dest_physp);
282
283	if (p_node->sw) {
284		/*
285		 * if destination is switch, we want p_dest_physp to point to port 0
286		 */
287		p_dest_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
288
289		if (p_dest_physp == 0) {
290			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F03: "
291				"Cannot find routing to LID %u from switch for GUID 0x%016"
292				PRIx64 "\n", dest_lid_ho,
293				cl_ntoh64(osm_node_get_node_guid(p_node)));
294			status = IB_NOT_FOUND;
295			goto Exit;
296		}
297
298	}
299
300	/*
301	 * Now go through the path step by step
302	 */
303
304	while (p_physp != p_dest_physp) {
305
306		p_node = osm_physp_get_node_ptr(p_physp);
307		p_physp = osm_physp_get_remote(p_physp);
308
309		if (p_physp == 0) {
310			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F05: "
311				"Cannot find remote phys port when routing to LID %u from node GUID 0x%016"
312				PRIx64 "\n", dest_lid_ho,
313				cl_ntoh64(osm_node_get_node_guid(p_node)));
314			status = IB_ERROR;
315			goto Exit;
316		}
317
318		in_port_num = osm_physp_get_port_num(p_physp);
319
320		/*
321		   This is point to point case (no switch in between)
322		 */
323		if (p_physp == p_dest_physp)
324			break;
325
326		p_node = osm_physp_get_node_ptr(p_physp);
327
328		if (!p_node->sw) {
329			/*
330			   There is some sort of problem in the subnet object!
331			   If this isn't a switch, we should have reached
332			   the destination by now!
333			 */
334			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F06: "
335				"Internal error, bad path\n");
336			status = IB_ERROR;
337			goto Exit;
338		}
339
340		/*
341		   Check parameters for the ingress port in this switch.
342		 */
343		p_pi = &p_physp->port_info;
344
345		if (mtu > ib_port_info_get_mtu_cap(p_pi))
346			mtu = ib_port_info_get_mtu_cap(p_pi);
347
348		if (rate > ib_port_info_compute_rate(p_pi))
349			rate = ib_port_info_compute_rate(p_pi);
350
351		/*
352		   Continue with the egress port on this switch.
353		 */
354		p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
355		if (p_physp == 0) {
356			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F07: "
357				"Dead end on path to LID %u from switch for GUID 0x%016"
358				PRIx64 "\n", dest_lid_ho,
359				cl_ntoh64(osm_node_get_node_guid(p_node)));
360			status = IB_ERROR;
361			goto Exit;
362		}
363
364		p_pi = &p_physp->port_info;
365
366		if (mtu > ib_port_info_get_mtu_cap(p_pi))
367			mtu = ib_port_info_get_mtu_cap(p_pi);
368
369		if (rate > ib_port_info_compute_rate(p_pi))
370			rate = ib_port_info_compute_rate(p_pi);
371
372		if (sa->p_subn->opt.qos) {
373			/*
374			 * Check SL2VL table of the switch and update valid SLs
375			 */
376			p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num);
377			for (i = 0; i < IB_MAX_NUM_VLS; i++) {
378				if (valid_sl_mask & (1 << i) &&
379				    ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL)
380					valid_sl_mask &= ~(1 << i);
381			}
382			if (!valid_sl_mask) {
383				OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "All the SLs "
384					"lead to VL15 on this path\n");
385				status = IB_NOT_FOUND;
386				goto Exit;
387			}
388		}
389	}
390
391	/*
392	   p_physp now points to the destination
393	 */
394	p_pi = &p_physp->port_info;
395
396	if (mtu > ib_port_info_get_mtu_cap(p_pi))
397		mtu = ib_port_info_get_mtu_cap(p_pi);
398
399	if (rate > ib_port_info_compute_rate(p_pi))
400		rate = ib_port_info_compute_rate(p_pi);
401
402	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
403		"Path min MTU = %u, min rate = %u\n", mtu, rate);
404
405	/*
406	 * Get QoS Level object according to the path request
407	 * and adjust path parameters according to QoS settings
408	 */
409	if (sa->p_subn->opt.qos &&
410	    sa->p_subn->p_qos_policy &&
411	    (p_qos_level =
412	     osm_qos_policy_get_qos_level_by_pr(sa->p_subn->p_qos_policy,
413						p_pr, p_src_physp, p_dest_physp,
414						comp_mask))) {
415		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
416			"PathRecord request matches QoS Level '%s' (%s)\n",
417			p_qos_level->name, p_qos_level->use ?
418			p_qos_level->use : "no description");
419
420		if (p_qos_level->mtu_limit_set
421		    && (mtu > p_qos_level->mtu_limit))
422			mtu = p_qos_level->mtu_limit;
423
424		if (p_qos_level->rate_limit_set
425		    && (rate > p_qos_level->rate_limit))
426			rate = p_qos_level->rate_limit;
427
428		if (p_qos_level->sl_set) {
429			sl = p_qos_level->sl;
430			if (!(valid_sl_mask & (1 << sl))) {
431				status = IB_NOT_FOUND;
432				goto Exit;
433			}
434		}
435	}
436
437	/*
438	 * Set packet lifetime.
439	 * According to spec definition IBA 1.2 Table 205
440	 * PacketLifeTime description, for loopback paths,
441	 * packetLifeTime shall be zero.
442	 */
443	if (p_src_port == p_dest_port)
444		pkt_life = 0;
445	else if (p_qos_level && p_qos_level->pkt_life_set)
446		pkt_life = p_qos_level->pkt_life;
447	else
448		pkt_life = sa->p_subn->opt.subnet_timeout;
449
450	/*
451	   Determine if these values meet the user criteria
452	   and adjust appropriately
453	 */
454
455	/* we silently ignore cases where only the MTU selector is defined */
456	if ((comp_mask & IB_PR_COMPMASK_MTUSELEC) &&
457	    (comp_mask & IB_PR_COMPMASK_MTU)) {
458		required_mtu = ib_path_rec_mtu(p_pr);
459		switch (ib_path_rec_mtu_sel(p_pr)) {
460		case 0:	/* must be greater than */
461			if (mtu <= required_mtu)
462				status = IB_NOT_FOUND;
463			break;
464
465		case 1:	/* must be less than */
466			if (mtu >= required_mtu) {
467				/* adjust to use the highest mtu
468				   lower then the required one */
469				if (required_mtu > 1)
470					mtu = required_mtu - 1;
471				else
472					status = IB_NOT_FOUND;
473			}
474			break;
475
476		case 2:	/* exact match */
477			if (mtu < required_mtu)
478				status = IB_NOT_FOUND;
479			else
480				mtu = required_mtu;
481			break;
482
483		case 3:	/* largest available */
484			/* can't be disqualified by this one */
485			break;
486
487		default:
488			/* if we're here, there's a bug in ib_path_rec_mtu_sel() */
489			CL_ASSERT(FALSE);
490			status = IB_ERROR;
491			break;
492		}
493	}
494	if (status != IB_SUCCESS)
495		goto Exit;
496
497	/* we silently ignore cases where only the Rate selector is defined */
498	if ((comp_mask & IB_PR_COMPMASK_RATESELEC) &&
499	    (comp_mask & IB_PR_COMPMASK_RATE)) {
500		required_rate = ib_path_rec_rate(p_pr);
501		switch (ib_path_rec_rate_sel(p_pr)) {
502		case 0:	/* must be greater than */
503			if (rate <= required_rate)
504				status = IB_NOT_FOUND;
505			break;
506
507		case 1:	/* must be less than */
508			if (rate >= required_rate) {
509				/* adjust the rate to use the highest rate
510				   lower then the required one */
511				if (required_rate > 2)
512					rate = required_rate - 1;
513				else
514					status = IB_NOT_FOUND;
515			}
516			break;
517
518		case 2:	/* exact match */
519			if (rate < required_rate)
520				status = IB_NOT_FOUND;
521			else
522				rate = required_rate;
523			break;
524
525		case 3:	/* largest available */
526			/* can't be disqualified by this one */
527			break;
528
529		default:
530			/* if we're here, there's a bug in ib_path_rec_mtu_sel() */
531			CL_ASSERT(FALSE);
532			status = IB_ERROR;
533			break;
534		}
535	}
536	if (status != IB_SUCCESS)
537		goto Exit;
538
539	/* we silently ignore cases where only the PktLife selector is defined */
540	if ((comp_mask & IB_PR_COMPMASK_PKTLIFETIMESELEC) &&
541	    (comp_mask & IB_PR_COMPMASK_PKTLIFETIME)) {
542		required_pkt_life = ib_path_rec_pkt_life(p_pr);
543		switch (ib_path_rec_pkt_life_sel(p_pr)) {
544		case 0:	/* must be greater than */
545			if (pkt_life <= required_pkt_life)
546				status = IB_NOT_FOUND;
547			break;
548
549		case 1:	/* must be less than */
550			if (pkt_life >= required_pkt_life) {
551				/* adjust the lifetime to use the highest possible
552				   lower then the required one */
553				if (required_pkt_life > 1)
554					pkt_life = required_pkt_life - 1;
555				else
556					status = IB_NOT_FOUND;
557			}
558			break;
559
560		case 2:	/* exact match */
561			if (pkt_life < required_pkt_life)
562				status = IB_NOT_FOUND;
563			else
564				pkt_life = required_pkt_life;
565			break;
566
567		case 3:	/* smallest available */
568			/* can't be disqualified by this one */
569			break;
570
571		default:
572			/* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */
573			CL_ASSERT(FALSE);
574			status = IB_ERROR;
575			break;
576		}
577	}
578
579	if (status != IB_SUCCESS)
580		goto Exit;
581
582	/*
583	 * set Pkey for this path record request
584	 */
585
586	if ((comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) &&
587	    (cl_ntoh32(p_pr->hop_flow_raw) & (1 << 31)))
588		pkey = osm_physp_find_common_pkey(p_src_physp, p_dest_physp);
589
590	else if (comp_mask & IB_PR_COMPMASK_PKEY) {
591		/*
592		 * PR request has a specific pkey:
593		 * Check that source and destination share this pkey.
594		 * If QoS level has pkeys, check that this pkey exists
595		 * in the QoS level pkeys.
596		 * PR returned pkey is the requested pkey.
597		 */
598		pkey = p_pr->pkey;
599		if (!osm_physp_share_this_pkey(p_src_physp, p_dest_physp, pkey)) {
600			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1A: "
601				"Ports 0x%016" PRIx64 " 0x%016" PRIx64
602				" do not share specified PKey 0x%04x\n",
603				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
604				cl_ntoh64(osm_physp_get_port_guid(p_dest_physp)),
605				cl_ntoh16(pkey));
606			status = IB_NOT_FOUND;
607			goto Exit;
608		}
609		if (p_qos_level && p_qos_level->pkey_range_len &&
610		    !osm_qos_level_has_pkey(p_qos_level, pkey)) {
611			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1D: "
612				"Ports do not share PKeys defined by QoS level\n");
613			status = IB_NOT_FOUND;
614			goto Exit;
615		}
616
617	} else if (p_qos_level && p_qos_level->pkey_range_len) {
618		/*
619		 * PR request doesn't have a specific pkey, but QoS level
620		 * has pkeys - get shared pkey from QoS level pkeys
621		 */
622		pkey = osm_qos_level_get_shared_pkey(p_qos_level,
623						     p_src_physp, p_dest_physp);
624		if (!pkey) {
625			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1E: "
626				"Ports 0x%016" PRIx64 " 0x%016" PRIx64
627				" do not share PKeys defined by QoS level\n",
628				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
629				cl_ntoh64(osm_physp_get_port_guid(p_dest_physp)));
630			status = IB_NOT_FOUND;
631			goto Exit;
632		}
633	} else {
634		/*
635		 * Neither PR request nor QoS level have pkey.
636		 * Just get any shared pkey.
637		 */
638		pkey = osm_physp_find_common_pkey(p_src_physp, p_dest_physp);
639		if (!pkey) {
640			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1B: "
641				"Ports 0x%016" PRIx64 " 0x%016" PRIx64
642				" do not have any shared PKeys\n",
643				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
644				cl_ntoh64(osm_physp_get_port_guid(p_dest_physp)));
645			status = IB_NOT_FOUND;
646			goto Exit;
647		}
648	}
649
650	if (pkey) {
651		p_prtn =
652		    (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl,
653					       pkey & cl_hton16((uint16_t) ~
654								0x8000));
655		if (p_prtn ==
656		    (osm_prtn_t *) cl_qmap_end(&sa->p_subn->prtn_pkey_tbl))
657			p_prtn = NULL;
658	}
659
660	/*
661	 * Set PathRecord SL.
662	 */
663
664	is_lash = (p_osm->routing_engine_used == OSM_ROUTING_ENGINE_TYPE_LASH);
665
666	if (comp_mask & IB_PR_COMPMASK_SL) {
667		/*
668		 * Specific SL was requested
669		 */
670		sl = ib_path_rec_sl(p_pr);
671
672		if (p_qos_level && p_qos_level->sl_set
673		    && (p_qos_level->sl != sl)) {
674			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1F: "
675				"QoS constaraints: required PathRecord SL (%u) "
676				"doesn't match QoS policy SL (%u)\n", sl,
677				p_qos_level->sl);
678			status = IB_NOT_FOUND;
679			goto Exit;
680		}
681
682		if (is_lash
683		    && osm_get_lash_sl(p_osm, p_src_port, p_dest_port) != sl) {
684			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F23: "
685				"Required PathRecord SL (%u) doesn't "
686				"match LASH SL\n", sl);
687			status = IB_NOT_FOUND;
688			goto Exit;
689		}
690
691	} else if (is_lash) {
692		/*
693		 * No specific SL in PathRecord request.
694		 * If it's LASH routing - use its SL.
695		 * slid and dest_lid are stored in network in lash.
696		 */
697		sl = osm_get_lash_sl(p_osm, p_src_port, p_dest_port);
698	} else if (p_qos_level && p_qos_level->sl_set) {
699		/*
700		 * No specific SL was requested, and we're not in
701		 * LASH routing, but there is an SL in QoS level.
702		 */
703		sl = p_qos_level->sl;
704
705		if (pkey && p_prtn && p_prtn->sl != p_qos_level->sl)
706			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
707				"QoS level SL (%u) overrides partition SL (%u)\n",
708				p_qos_level->sl, p_prtn->sl);
709
710	} else if (pkey) {
711		/*
712		 * No specific SL in request or in QoS level - use partition SL
713		 */
714		if (!p_prtn) {
715			sl = OSM_DEFAULT_SL;
716			/* this may be possible when pkey tables are created somehow in
717			   previous runs or things are going wrong here */
718			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1C: "
719				"No partition found for PKey 0x%04x - using default SL %d\n",
720				cl_ntoh16(pkey), sl);
721		} else
722			sl = p_prtn->sl;
723	} else if (sa->p_subn->opt.qos) {
724		if (valid_sl_mask & (1 << OSM_DEFAULT_SL))
725			sl = OSM_DEFAULT_SL;
726		else {
727			for (i = 0; i < IB_MAX_NUM_VLS; i++)
728				if (valid_sl_mask & (1 << i))
729					break;
730			sl = i;
731		}
732	} else
733		sl = OSM_DEFAULT_SL;
734
735	if (sa->p_subn->opt.qos && !(valid_sl_mask & (1 << sl))) {
736		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F24: "
737			"Selected SL (%u) leads to VL15\n", sl);
738		status = IB_NOT_FOUND;
739		goto Exit;
740	}
741
742	/* reset pkey when raw traffic */
743	if (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC &&
744	    cl_ntoh32(p_pr->hop_flow_raw) & (1 << 31))
745		pkey = 0;
746
747	p_parms->mtu = mtu;
748	p_parms->rate = rate;
749	p_parms->pkt_life = pkt_life;
750	p_parms->pkey = pkey;
751	p_parms->sl = sl;
752
753	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Path params: mtu = %u, rate = %u,"
754		" packet lifetime = %u, pkey = 0x%04X, sl = %u\n",
755		mtu, rate, pkt_life, cl_ntoh16(pkey), sl);
756Exit:
757	OSM_LOG_EXIT(sa->p_log);
758	return (status);
759}
760
761/**********************************************************************
762 **********************************************************************/
763static void
764__osm_pr_rcv_build_pr(IN osm_sa_t * sa,
765		      IN const osm_port_t * const p_src_port,
766		      IN const osm_port_t * const p_dest_port,
767		      IN const ib_gid_t * const p_dgid,
768		      IN const uint16_t src_lid_ho,
769		      IN const uint16_t dest_lid_ho,
770		      IN const uint8_t preference,
771		      IN const osm_path_parms_t * const p_parms,
772		      OUT ib_path_rec_t * const p_pr)
773{
774	const osm_physp_t *p_src_physp;
775	const osm_physp_t *p_dest_physp;
776	boolean_t is_nonzero_gid = 0;
777
778	OSM_LOG_ENTER(sa->p_log);
779
780	p_src_physp = p_src_port->p_physp;
781
782	if (p_dgid) {
783		if (memcmp(p_dgid, &zero_gid, sizeof(*p_dgid)))
784			is_nonzero_gid = 1;
785	}
786
787	if (is_nonzero_gid)
788		p_pr->dgid = *p_dgid;
789	else {
790		p_dest_physp = p_dest_port->p_physp;
791
792		p_pr->dgid.unicast.prefix =
793		    osm_physp_get_subnet_prefix(p_dest_physp);
794		p_pr->dgid.unicast.interface_id =
795		    osm_physp_get_port_guid(p_dest_physp);
796	}
797
798	p_pr->sgid.unicast.prefix = osm_physp_get_subnet_prefix(p_src_physp);
799	p_pr->sgid.unicast.interface_id = osm_physp_get_port_guid(p_src_physp);
800
801	p_pr->dlid = cl_hton16(dest_lid_ho);
802	p_pr->slid = cl_hton16(src_lid_ho);
803
804	p_pr->hop_flow_raw &= cl_hton32(1 << 31);
805
806	/* Only set HopLimit if going through a router */
807	if (is_nonzero_gid)
808		p_pr->hop_flow_raw |= cl_hton32(IB_HOPLIMIT_MAX);
809
810	p_pr->pkey = p_parms->pkey;
811	ib_path_rec_set_sl(p_pr, p_parms->sl);
812	ib_path_rec_set_qos_class(p_pr, 0);
813	p_pr->mtu = (uint8_t) (p_parms->mtu | 0x80);
814	p_pr->rate = (uint8_t) (p_parms->rate | 0x80);
815
816	/* According to 1.2 spec definition Table 205 PacketLifeTime description,
817	   for loopback paths, packetLifeTime shall be zero. */
818	if (p_src_port == p_dest_port)
819		p_pr->pkt_life = 0x80;	/* loopback */
820	else
821		p_pr->pkt_life = (uint8_t) (p_parms->pkt_life | 0x80);
822
823	p_pr->preference = preference;
824
825	/* always return num_path = 0 so this is only the reversible component */
826	if (p_parms->reversible)
827		p_pr->num_path = 0x80;
828
829	OSM_LOG_EXIT(sa->p_log);
830}
831
832/**********************************************************************
833 **********************************************************************/
834static osm_pr_item_t *
835__osm_pr_rcv_get_lid_pair_path(IN osm_sa_t * sa,
836			       IN const ib_path_rec_t * const p_pr,
837			       IN const osm_port_t * const p_src_port,
838			       IN const osm_port_t * const p_dest_port,
839			       IN const ib_gid_t * const p_dgid,
840			       IN const uint16_t src_lid_ho,
841			       IN const uint16_t dest_lid_ho,
842			       IN const ib_net64_t comp_mask,
843			       IN const uint8_t preference)
844{
845	osm_path_parms_t path_parms;
846	osm_path_parms_t rev_path_parms;
847	osm_pr_item_t *p_pr_item;
848	ib_api_status_t status, rev_path_status;
849
850	OSM_LOG_ENTER(sa->p_log);
851
852	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID %u, Dest LID %u\n",
853		src_lid_ho, dest_lid_ho);
854
855	p_pr_item = malloc(sizeof(*p_pr_item));
856	if (p_pr_item == NULL) {
857		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F01: "
858			"Unable to allocate path record\n");
859		goto Exit;
860	}
861	memset(p_pr_item, 0, sizeof(*p_pr_item));
862
863	status = __osm_pr_rcv_get_path_parms(sa, p_pr, p_src_port,
864					     p_dest_port, dest_lid_ho,
865					     comp_mask, &path_parms);
866
867	if (status != IB_SUCCESS) {
868		free(p_pr_item);
869		p_pr_item = NULL;
870		goto Exit;
871	}
872
873	/* now try the reversible path */
874	rev_path_status = __osm_pr_rcv_get_path_parms(sa, p_pr, p_dest_port,
875						      p_src_port, src_lid_ho,
876						      comp_mask,
877						      &rev_path_parms);
878	path_parms.reversible = (rev_path_status == IB_SUCCESS);
879
880	/* did we get a Reversible Path compmask ? */
881	/*
882	   NOTE that if the reversible component = 0, it is a don't care
883	   rather then requiring non-reversible paths ...
884	   see Vol1 Ver1.2 p900 l16
885	 */
886	if (comp_mask & IB_PR_COMPMASK_REVERSIBLE) {
887		if ((!path_parms.reversible && (p_pr->num_path & 0x80))) {
888			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
889				"Requested reversible path but failed to get one\n");
890
891			free(p_pr_item);
892			p_pr_item = NULL;
893			goto Exit;
894		}
895	}
896
897	__osm_pr_rcv_build_pr(sa, p_src_port, p_dest_port, p_dgid,
898			      src_lid_ho, dest_lid_ho, preference, &path_parms,
899			      &p_pr_item->path_rec);
900
901Exit:
902	OSM_LOG_EXIT(sa->p_log);
903	return (p_pr_item);
904}
905
906/**********************************************************************
907 **********************************************************************/
908static void
909__osm_pr_rcv_get_port_pair_paths(IN osm_sa_t * sa,
910				 IN const osm_madw_t * const p_madw,
911				 IN const osm_port_t * const p_req_port,
912				 IN const osm_port_t * const p_src_port,
913				 IN const osm_port_t * const p_dest_port,
914				 IN const ib_gid_t * const p_dgid,
915				 IN const ib_net64_t comp_mask,
916				 IN cl_qlist_t * const p_list)
917{
918	const ib_path_rec_t *p_pr;
919	const ib_sa_mad_t *p_sa_mad;
920	osm_pr_item_t *p_pr_item;
921	uint16_t src_lid_min_ho;
922	uint16_t src_lid_max_ho;
923	uint16_t dest_lid_min_ho;
924	uint16_t dest_lid_max_ho;
925	uint16_t src_lid_ho;
926	uint16_t dest_lid_ho;
927	uint32_t path_num;
928	uint8_t preference;
929	uintn_t iterations;
930	uintn_t src_offset;
931	uintn_t dest_offset;
932
933	OSM_LOG_ENTER(sa->p_log);
934
935	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
936		"Src port 0x%016" PRIx64 ", Dst port 0x%016" PRIx64 "\n",
937		cl_ntoh64(osm_port_get_guid(p_src_port)),
938		cl_ntoh64(osm_port_get_guid(p_dest_port)));
939
940	/* Check that the req_port, src_port and dest_port all share a
941	   pkey. The check is done on the default physical port of the ports. */
942	if (osm_port_share_pkey(sa->p_log, p_req_port, p_src_port) == FALSE
943	    || osm_port_share_pkey(sa->p_log, p_req_port,
944				   p_dest_port) == FALSE
945	    || osm_port_share_pkey(sa->p_log, p_src_port,
946				   p_dest_port) == FALSE)
947		/* One of the pairs doesn't share a pkey so the path is disqualified. */
948		goto Exit;
949
950	p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
951	p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad);
952
953	/*
954	   We shouldn't be here if the paths are disqualified in some way...
955	   Thus, we assume every possible connection is valid.
956
957	   We desire to return high-quality paths first.
958	   In OpenSM, higher quality means least overlap with other paths.
959	   This is acheived in practice by returning paths with
960	   different LID value on each end, which means these
961	   paths are more redundant that paths with the same LID repeated
962	   on one side.  For example, in OpenSM the paths between two
963	   endpoints with LMC = 1 might be as follows:
964
965	   Port A, LID 1 <-> Port B, LID 3
966	   Port A, LID 1 <-> Port B, LID 4
967	   Port A, LID 2 <-> Port B, LID 3
968	   Port A, LID 2 <-> Port B, LID 4
969
970	   The OpenSM unicast routing algorithms attempt to disperse each path
971	   to as varied a physical path as is reasonable.  1<->3 and 1<->4 have
972	   more physical overlap (hence less redundancy) than 1<->3 and 2<->4.
973
974	   OpenSM ranks paths in three preference groups:
975
976	   Preference Value    Description
977	   ----------------    -------------------------------------------
978	   0             Redundant in both directions with other
979	   pref value = 0 paths
980
981	   1             Redundant in one direction with other
982	   pref value = 0 and pref value = 1 paths
983
984	   2             Not redundant in either direction with
985	   other paths
986
987	   3-FF          Unused
988
989	   SA clients don't need to know these details, only that the lower
990	   preference paths are preferred, as stated in the spec.  The paths
991	   may not actually be physically redundant depending on the topology
992	   of the subnet, but the point of LMC > 0 is to offer redundancy,
993	   so it is assumed that the subnet is physically appropriate for the
994	   specified LMC value.  A more advanced implementation would inspect for
995	   physical redundancy, but I'm not going to bother with that now.
996	 */
997
998	/*
999	   Refine our search if the client specified end-point LIDs
1000	 */
1001	if (comp_mask & IB_PR_COMPMASK_DLID) {
1002		dest_lid_min_ho = cl_ntoh16(p_pr->dlid);
1003		dest_lid_max_ho = cl_ntoh16(p_pr->dlid);
1004	} else
1005		osm_port_get_lid_range_ho(p_dest_port, &dest_lid_min_ho,
1006					  &dest_lid_max_ho);
1007
1008	if (comp_mask & IB_PR_COMPMASK_SLID) {
1009		src_lid_min_ho = cl_ntoh16(p_pr->slid);
1010		src_lid_max_ho = cl_ntoh16(p_pr->slid);
1011	} else
1012		osm_port_get_lid_range_ho(p_src_port, &src_lid_min_ho,
1013					  &src_lid_max_ho);
1014
1015	if (src_lid_min_ho == 0) {
1016		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F20:"
1017			"Obtained source LID of 0. No such LID possible\n");
1018		goto Exit;
1019	}
1020
1021	if (dest_lid_min_ho == 0) {
1022		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F21:"
1023			"Obtained destination LID of 0. No such LID possible\n");
1024		goto Exit;
1025	}
1026
1027	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
1028		"Src LIDs [%u-%u], Dest LIDs [%u-%u]\n",
1029		src_lid_min_ho, src_lid_max_ho,
1030		dest_lid_min_ho, dest_lid_max_ho);
1031
1032	src_lid_ho = src_lid_min_ho;
1033	dest_lid_ho = dest_lid_min_ho;
1034
1035	/*
1036	   Preferred paths come first in OpenSM
1037	 */
1038	preference = 0;
1039	path_num = 0;
1040
1041	/* If SubnAdmGet, assume NumbPaths 1 (1.2 erratum) */
1042	if (p_sa_mad->method != IB_MAD_METHOD_GET)
1043		if (comp_mask & IB_PR_COMPMASK_NUMBPATH)
1044			iterations = ib_path_rec_num_path(p_pr);
1045		else
1046			iterations = (uintn_t) (-1);
1047	else
1048		iterations = 1;
1049
1050	while (path_num < iterations) {
1051		/*
1052		   These paths are "fully redundant"
1053		 */
1054
1055		p_pr_item = __osm_pr_rcv_get_lid_pair_path(sa, p_pr,
1056							   p_src_port,
1057							   p_dest_port, p_dgid,
1058							   src_lid_ho,
1059							   dest_lid_ho,
1060							   comp_mask,
1061							   preference);
1062
1063		if (p_pr_item) {
1064			cl_qlist_insert_tail(p_list, &p_pr_item->list_item);
1065			++path_num;
1066		}
1067
1068		if (++src_lid_ho > src_lid_max_ho)
1069			break;
1070
1071		if (++dest_lid_ho > dest_lid_max_ho)
1072			break;
1073	}
1074
1075	/*
1076	   Check if we've accumulated all the paths that the user cares to see
1077	 */
1078	if (path_num == iterations)
1079		goto Exit;
1080
1081	/*
1082	   Don't bother reporting preference 1 paths for now.
1083	   It's more trouble than it's worth and can only occur
1084	   if ports have different LMC values, which isn't supported
1085	   by OpenSM right now anyway.
1086	 */
1087	preference = 2;
1088	src_lid_ho = src_lid_min_ho;
1089	dest_lid_ho = dest_lid_min_ho;
1090	src_offset = 0;
1091	dest_offset = 0;
1092
1093	/*
1094	   Iterate over the remaining paths
1095	 */
1096	while (path_num < iterations) {
1097		dest_offset++;
1098		dest_lid_ho++;
1099
1100		if (dest_lid_ho > dest_lid_max_ho) {
1101			src_offset++;
1102			src_lid_ho++;
1103
1104			if (src_lid_ho > src_lid_max_ho)
1105				break;	/* done */
1106
1107			dest_offset = 0;
1108			dest_lid_ho = dest_lid_min_ho;
1109		}
1110
1111		/*
1112		   These paths are "fully non-redundant" with paths already
1113		   identified above and consequently not of much value.
1114
1115		   Don't return paths we already identified above, as indicated
1116		   by the offset values being equal.
1117		 */
1118		if (src_offset == dest_offset)
1119			continue;	/* already reported */
1120
1121		p_pr_item = __osm_pr_rcv_get_lid_pair_path(sa, p_pr,
1122							   p_src_port,
1123							   p_dest_port, p_dgid,
1124							   src_lid_ho,
1125							   dest_lid_ho,
1126							   comp_mask,
1127							   preference);
1128
1129		if (p_pr_item) {
1130			cl_qlist_insert_tail(p_list, &p_pr_item->list_item);
1131			++path_num;
1132		}
1133	}
1134
1135Exit:
1136	OSM_LOG_EXIT(sa->p_log);
1137}
1138
1139/**********************************************************************
1140 **********************************************************************/
1141static ib_net16_t
1142__osm_pr_rcv_get_end_points(IN osm_sa_t * sa,
1143			    IN const osm_madw_t * const p_madw,
1144			    OUT const osm_port_t ** const pp_src_port,
1145			    OUT const osm_port_t ** const pp_dest_port,
1146			    OUT ib_gid_t * const p_dgid)
1147{
1148	const ib_path_rec_t *p_pr;
1149	const ib_sa_mad_t *p_sa_mad;
1150	ib_net64_t comp_mask;
1151	ib_net64_t dest_guid;
1152	ib_api_status_t status;
1153	ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS;
1154	osm_router_t *p_rtr;
1155	osm_port_t *p_rtr_port;
1156
1157	OSM_LOG_ENTER(sa->p_log);
1158
1159	/*
1160	   Determine what fields are valid and then get a pointer
1161	   to the source and destination port objects, if possible.
1162	 */
1163
1164	p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
1165	p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad);
1166
1167	comp_mask = p_sa_mad->comp_mask;
1168
1169	/*
1170	   Check a few easy disqualifying cases up front before getting
1171	   into the endpoints.
1172	 */
1173
1174	if (comp_mask & IB_PR_COMPMASK_SGID) {
1175		if (!ib_gid_is_link_local(&p_pr->sgid)) {
1176			if (ib_gid_get_subnet_prefix(&p_pr->sgid) !=
1177			    sa->p_subn->opt.subnet_prefix) {
1178				/*
1179				   This 'error' is the client's fault (bad gid)
1180				   so don't enter it as an error in our own log.
1181				   Return an error response to the client.
1182				 */
1183				OSM_LOG(sa->p_log, OSM_LOG_VERBOSE,
1184					"Non local SGID subnet prefix 0x%016"
1185					PRIx64 "\n",
1186					cl_ntoh64(p_pr->sgid.unicast.prefix));
1187
1188				sa_status = IB_SA_MAD_STATUS_INVALID_GID;
1189				goto Exit;
1190			}
1191		}
1192
1193		*pp_src_port = osm_get_port_by_guid(sa->p_subn,
1194						    p_pr->sgid.unicast.
1195						    interface_id);
1196		if (!*pp_src_port) {
1197			/*
1198			   This 'error' is the client's fault (bad gid) so
1199			   don't enter it as an error in our own log.
1200			   Return an error response to the client.
1201			 */
1202			OSM_LOG(sa->p_log, OSM_LOG_VERBOSE,
1203				"No source port with GUID 0x%016" PRIx64 "\n",
1204				cl_ntoh64(p_pr->sgid.unicast.interface_id));
1205
1206			sa_status = IB_SA_MAD_STATUS_INVALID_GID;
1207			goto Exit;
1208		}
1209	} else {
1210		*pp_src_port = 0;
1211		if (comp_mask & IB_PR_COMPMASK_SLID) {
1212			status = cl_ptr_vector_at(&sa->p_subn->port_lid_tbl,
1213						  cl_ntoh16(p_pr->slid),
1214						  (void **)pp_src_port);
1215
1216			if ((status != CL_SUCCESS) || (*pp_src_port == NULL)) {
1217				/*
1218				   This 'error' is the client's fault (bad lid) so
1219				   don't enter it as an error in our own log.
1220				   Return an error response to the client.
1221				 */
1222				OSM_LOG(sa->p_log, OSM_LOG_VERBOSE,
1223					"No source port with LID %u\n",
1224					cl_ntoh16(p_pr->slid));
1225
1226				sa_status = IB_SA_MAD_STATUS_NO_RECORDS;
1227				goto Exit;
1228			}
1229		}
1230	}
1231
1232	if (p_dgid)
1233		memset(p_dgid, 0, sizeof(*p_dgid));
1234
1235	if (comp_mask & IB_PR_COMPMASK_DGID) {
1236		dest_guid = p_pr->dgid.unicast.interface_id;
1237		if (!ib_gid_is_link_local(&p_pr->dgid)) {
1238			if (!ib_gid_is_multicast(&p_pr->dgid) &&
1239			    ib_gid_get_subnet_prefix(&p_pr->dgid) !=
1240			    sa->p_subn->opt.subnet_prefix) {
1241				OSM_LOG(sa->p_log, OSM_LOG_VERBOSE,
1242					"Non local DGID subnet prefix 0x%016"
1243					PRIx64 "\n",
1244					cl_ntoh64(p_pr->dgid.unicast.prefix));
1245
1246				/* Find the router port that is configured to
1247				   handle this prefix, if any */
1248				osm_prefix_route_t *route = NULL;
1249				osm_prefix_route_t *r = (osm_prefix_route_t *)
1250					cl_qlist_head(&sa->p_subn->prefix_routes_list);
1251
1252				while (r != (osm_prefix_route_t *)
1253				       cl_qlist_end(&sa->p_subn->prefix_routes_list))
1254				{
1255					if (r->prefix == p_pr->dgid.unicast.prefix ||
1256					    r->prefix == 0)
1257					{
1258						route = r;
1259						break;
1260					}
1261					r = (osm_prefix_route_t *) cl_qlist_next(&r->list_item);
1262				}
1263
1264				if (!route) {
1265					/*
1266					  This 'error' is the client's fault (bad gid) so
1267					  don't enter it as an error in our own log.
1268					  Return an error response to the client.
1269					*/
1270					sa_status = IB_SA_MAD_STATUS_INVALID_GID;
1271					goto Exit;
1272				} else if (route->guid == 0) {
1273					/* first router */
1274					p_rtr = (osm_router_t *)
1275						cl_qmap_head(&sa->
1276							     p_subn->
1277							     rtr_guid_tbl);
1278				} else {
1279					p_rtr = (osm_router_t *)
1280						cl_qmap_get(&sa->
1281							    p_subn->
1282							    rtr_guid_tbl,
1283							    route->guid);
1284				}
1285
1286				if (p_rtr ==
1287				    (osm_router_t *) cl_qmap_end(&sa->
1288								 p_subn->
1289								 rtr_guid_tbl))
1290				{
1291					OSM_LOG(sa->p_log, OSM_LOG_ERROR,
1292						"ERR 1F22: "
1293						"Off subnet DGID but router not found\n");
1294					sa_status =
1295					    IB_SA_MAD_STATUS_INVALID_GID;
1296					goto Exit;
1297				}
1298
1299				p_rtr_port = osm_router_get_port_ptr(p_rtr);
1300				dest_guid = osm_port_get_guid(p_rtr_port);
1301				if (p_dgid)
1302					*p_dgid = p_pr->dgid;
1303			}
1304		}
1305
1306		*pp_dest_port = osm_get_port_by_guid(sa->p_subn, dest_guid);
1307		if (!*pp_dest_port) {
1308			/*
1309			   This 'error' is the client's fault (bad gid) so
1310			   don't enter it as an error in our own log.
1311			   Return an error response to the client.
1312			 */
1313			OSM_LOG(sa->p_log, OSM_LOG_VERBOSE,
1314				"No dest port with GUID 0x%016" PRIx64 "\n",
1315				cl_ntoh64(dest_guid));
1316
1317			sa_status = IB_SA_MAD_STATUS_INVALID_GID;
1318			goto Exit;
1319		}
1320	} else {
1321		*pp_dest_port = 0;
1322		if (comp_mask & IB_PR_COMPMASK_DLID) {
1323			status = cl_ptr_vector_at(&sa->p_subn->port_lid_tbl,
1324						  cl_ntoh16(p_pr->dlid),
1325						  (void **)pp_dest_port);
1326
1327			if ((status != CL_SUCCESS) || (*pp_dest_port == NULL)) {
1328				/*
1329				   This 'error' is the client's fault (bad lid)
1330				   so don't enter it as an error in our own log.
1331				   Return an error response to the client.
1332				 */
1333				OSM_LOG(sa->p_log, OSM_LOG_VERBOSE,
1334					"No dest port with LID %u\n",
1335					cl_ntoh16(p_pr->dlid));
1336
1337				sa_status = IB_SA_MAD_STATUS_NO_RECORDS;
1338				goto Exit;
1339			}
1340		}
1341	}
1342
1343Exit:
1344	OSM_LOG_EXIT(sa->p_log);
1345	return (sa_status);
1346}
1347
1348/**********************************************************************
1349 **********************************************************************/
1350static void
1351__osm_pr_rcv_process_world(IN osm_sa_t * sa,
1352			   IN const osm_madw_t * const p_madw,
1353			   IN const osm_port_t * const requester_port,
1354			   IN const ib_gid_t * const p_dgid,
1355			   IN const ib_net64_t comp_mask,
1356			   IN cl_qlist_t * const p_list)
1357{
1358	const cl_qmap_t *p_tbl;
1359	const osm_port_t *p_dest_port;
1360	const osm_port_t *p_src_port;
1361
1362	OSM_LOG_ENTER(sa->p_log);
1363
1364	/*
1365	   Iterate the entire port space over itself.
1366	   A path record from a port to itself is legit, so no
1367	   need for a special case there.
1368
1369	   We compute both A -> B and B -> A, since we don't have
1370	   any check to determine the reversability of the paths.
1371	 */
1372	p_tbl = &sa->p_subn->port_guid_tbl;
1373
1374	p_dest_port = (osm_port_t *) cl_qmap_head(p_tbl);
1375	while (p_dest_port != (osm_port_t *) cl_qmap_end(p_tbl)) {
1376		p_src_port = (osm_port_t *) cl_qmap_head(p_tbl);
1377		while (p_src_port != (osm_port_t *) cl_qmap_end(p_tbl)) {
1378			__osm_pr_rcv_get_port_pair_paths(sa, p_madw,
1379							 requester_port,
1380							 p_src_port,
1381							 p_dest_port, p_dgid,
1382							 comp_mask, p_list);
1383
1384			p_src_port =
1385			    (osm_port_t *) cl_qmap_next(&p_src_port->map_item);
1386		}
1387
1388		p_dest_port =
1389		    (osm_port_t *) cl_qmap_next(&p_dest_port->map_item);
1390	}
1391
1392	OSM_LOG_EXIT(sa->p_log);
1393}
1394
1395/**********************************************************************
1396 **********************************************************************/
1397static void
1398__osm_pr_rcv_process_half(IN osm_sa_t * sa,
1399			  IN const osm_madw_t * const p_madw,
1400			  IN const osm_port_t * const requester_port,
1401			  IN const osm_port_t * const p_src_port,
1402			  IN const osm_port_t * const p_dest_port,
1403			  IN const ib_gid_t * const p_dgid,
1404			  IN const ib_net64_t comp_mask,
1405			  IN cl_qlist_t * const p_list)
1406{
1407	const cl_qmap_t *p_tbl;
1408	const osm_port_t *p_port;
1409
1410	OSM_LOG_ENTER(sa->p_log);
1411
1412	/*
1413	   Iterate over every port, looking for matches...
1414	   A path record from a port to itself is legit, so no
1415	   need to special case that one.
1416	 */
1417	p_tbl = &sa->p_subn->port_guid_tbl;
1418
1419	if (p_src_port) {
1420		/*
1421		   The src port if fixed, so iterate over destination ports.
1422		 */
1423		p_port = (osm_port_t *) cl_qmap_head(p_tbl);
1424		while (p_port != (osm_port_t *) cl_qmap_end(p_tbl)) {
1425			__osm_pr_rcv_get_port_pair_paths(sa, p_madw,
1426							 requester_port,
1427							 p_src_port, p_port,
1428							 p_dgid, comp_mask,
1429							 p_list);
1430			p_port = (osm_port_t *) cl_qmap_next(&p_port->map_item);
1431		}
1432	} else {
1433		/*
1434		   The dest port if fixed, so iterate over source ports.
1435		 */
1436		p_port = (osm_port_t *) cl_qmap_head(p_tbl);
1437		while (p_port != (osm_port_t *) cl_qmap_end(p_tbl)) {
1438			__osm_pr_rcv_get_port_pair_paths(sa, p_madw,
1439							 requester_port, p_port,
1440							 p_dest_port, p_dgid,
1441							 comp_mask, p_list);
1442			p_port = (osm_port_t *) cl_qmap_next(&p_port->map_item);
1443		}
1444	}
1445
1446	OSM_LOG_EXIT(sa->p_log);
1447}
1448
1449/**********************************************************************
1450 **********************************************************************/
1451static void
1452__osm_pr_rcv_process_pair(IN osm_sa_t * sa,
1453			  IN const osm_madw_t * const p_madw,
1454			  IN const osm_port_t * const requester_port,
1455			  IN const osm_port_t * const p_src_port,
1456			  IN const osm_port_t * const p_dest_port,
1457			  IN const ib_gid_t * const p_dgid,
1458			  IN const ib_net64_t comp_mask,
1459			  IN cl_qlist_t * const p_list)
1460{
1461	OSM_LOG_ENTER(sa->p_log);
1462
1463	__osm_pr_rcv_get_port_pair_paths(sa, p_madw, requester_port,
1464					 p_src_port, p_dest_port, p_dgid,
1465					 comp_mask, p_list);
1466
1467	OSM_LOG_EXIT(sa->p_log);
1468}
1469
1470/**********************************************************************
1471 **********************************************************************/
1472static osm_mgrp_t *pr_get_mgrp(IN osm_sa_t * sa,
1473			       IN const osm_madw_t * const p_madw)
1474{
1475	ib_path_rec_t *p_pr;
1476	const ib_sa_mad_t *p_sa_mad;
1477	ib_net64_t comp_mask;
1478	osm_mgrp_t *mgrp = NULL;
1479
1480	p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
1481	p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad);
1482
1483	comp_mask = p_sa_mad->comp_mask;
1484
1485	if ((comp_mask & IB_PR_COMPMASK_DGID) &&
1486	    !(mgrp = osm_get_mgrp_by_mgid(sa, &p_pr->dgid))) {
1487		char gid_str[INET6_ADDRSTRLEN];
1488		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F09: "
1489			"No MC group found for PathRecord destination GID %s\n",
1490			inet_ntop(AF_INET6, p_pr->dgid.raw, gid_str,
1491				  sizeof gid_str));
1492		goto Exit;
1493	}
1494
1495	if (comp_mask & IB_PR_COMPMASK_DLID) {
1496		if (mgrp) {
1497			/* check that the MLID in the MC group is */
1498			/* the same as the DLID in the PathRecord */
1499			if (mgrp->mlid != p_pr->dlid) {
1500				/* Note: perhaps this might be better indicated as an invalid request */
1501				OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F10: "
1502					"MC group MLID 0x%x does not match "
1503					"PathRecord destination LID 0x%x\n",
1504					mgrp->mlid, p_pr->dlid);
1505				mgrp = NULL;
1506				goto Exit;
1507			}
1508		} else if (!(mgrp = osm_get_mgrp_by_mlid(sa->p_subn, p_pr->dlid)))
1509			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F11: "
1510				"No MC group found for PathRecord "
1511				"destination LID 0x%x\n", p_pr->dlid);
1512	}
1513
1514Exit:
1515	return mgrp;
1516}
1517
1518/**********************************************************************
1519 **********************************************************************/
1520static ib_api_status_t
1521__osm_pr_match_mgrp_attributes(IN osm_sa_t * sa,
1522			       IN const osm_madw_t * const p_madw,
1523			       IN const osm_mgrp_t * const p_mgrp)
1524{
1525	const ib_path_rec_t *p_pr;
1526	const ib_sa_mad_t *p_sa_mad;
1527	ib_net64_t comp_mask;
1528	ib_api_status_t status = IB_ERROR;
1529	uint32_t flow_label;
1530	uint8_t sl;
1531	uint8_t hop_limit;
1532
1533	OSM_LOG_ENTER(sa->p_log);
1534
1535	p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
1536	p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad);
1537
1538	comp_mask = p_sa_mad->comp_mask;
1539
1540	/* If SGID and/or SLID specified, should validate as member of MC group */
1541	/* Also, MTU, rate, packet lifetime, and raw traffic requested are not currently checked */
1542	if (comp_mask & IB_PR_COMPMASK_PKEY) {
1543		if (p_pr->pkey != p_mgrp->mcmember_rec.pkey)
1544			goto Exit;
1545	}
1546
1547	ib_member_get_sl_flow_hop(p_mgrp->mcmember_rec.sl_flow_hop,
1548				  &sl, &flow_label, &hop_limit);
1549
1550	if (comp_mask & IB_PR_COMPMASK_SL) {
1551		if (ib_path_rec_sl(p_pr) != sl)
1552			goto Exit;
1553	}
1554
1555	/* If SubnAdmGet, assume NumbPaths of 1 (1.2 erratum) */
1556	if ((comp_mask & IB_PR_COMPMASK_NUMBPATH) &&
1557	    (p_sa_mad->method != IB_MAD_METHOD_GET)) {
1558		if (ib_path_rec_num_path(p_pr) == 0)
1559			goto Exit;
1560	}
1561
1562	if (comp_mask & IB_PR_COMPMASK_FLOWLABEL) {
1563		if (ib_path_rec_flow_lbl(p_pr) != flow_label)
1564			goto Exit;
1565	}
1566
1567	if (comp_mask & IB_PR_COMPMASK_HOPLIMIT) {
1568		if (ib_path_rec_hop_limit(p_pr) != hop_limit)
1569			goto Exit;
1570	}
1571
1572	if (comp_mask & IB_PR_COMPMASK_TCLASS) {
1573		if (p_pr->tclass != p_mgrp->mcmember_rec.tclass)
1574			goto Exit;
1575	}
1576
1577	status = IB_SUCCESS;
1578
1579Exit:
1580	OSM_LOG_EXIT(sa->p_log);
1581	return (status);
1582}
1583
1584/**********************************************************************
1585 **********************************************************************/
1586static int
1587__osm_pr_rcv_check_mcast_dest(IN osm_sa_t * sa,
1588			      IN const osm_madw_t * const p_madw)
1589{
1590	const ib_path_rec_t *p_pr;
1591	const ib_sa_mad_t *p_sa_mad;
1592	ib_net64_t comp_mask;
1593	int is_multicast = 0;
1594
1595	OSM_LOG_ENTER(sa->p_log);
1596
1597	p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
1598	p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad);
1599
1600	comp_mask = p_sa_mad->comp_mask;
1601
1602	if (comp_mask & IB_PR_COMPMASK_DGID) {
1603		is_multicast = ib_gid_is_multicast(&p_pr->dgid);
1604		if (!is_multicast)
1605			goto Exit;
1606	}
1607
1608	if (comp_mask & IB_PR_COMPMASK_DLID) {
1609		if (cl_ntoh16(p_pr->dlid) >= IB_LID_MCAST_START_HO &&
1610		    cl_ntoh16(p_pr->dlid) <= IB_LID_MCAST_END_HO)
1611			is_multicast = 1;
1612		else if (is_multicast) {
1613			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F12: "
1614				"PathRecord request indicates MGID but not MLID\n");
1615			is_multicast = -1;
1616		}
1617	}
1618
1619Exit:
1620	OSM_LOG_EXIT(sa->p_log);
1621	return (is_multicast);
1622}
1623
1624/**********************************************************************
1625 **********************************************************************/
1626void osm_pr_rcv_process(IN void *context, IN void *data)
1627{
1628	osm_sa_t *sa = context;
1629	osm_madw_t *p_madw = data;
1630	const ib_path_rec_t *p_pr;
1631	const ib_sa_mad_t *p_sa_mad;
1632	const osm_port_t *p_src_port;
1633	const osm_port_t *p_dest_port;
1634	cl_qlist_t pr_list;
1635	ib_gid_t dgid;
1636	ib_net16_t sa_status;
1637	osm_port_t *requester_port;
1638	int ret;
1639
1640	OSM_LOG_ENTER(sa->p_log);
1641
1642	CL_ASSERT(p_madw);
1643
1644	p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
1645	p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad);
1646
1647	CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_PATH_RECORD);
1648
1649	/* we only support SubnAdmGet and SubnAdmGetTable methods */
1650	if (p_sa_mad->method != IB_MAD_METHOD_GET &&
1651	    p_sa_mad->method != IB_MAD_METHOD_GETTABLE) {
1652		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F17: "
1653			"Unsupported Method (%s)\n",
1654			ib_get_sa_method_str(p_sa_mad->method));
1655		osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR);
1656		goto Exit;
1657	}
1658
1659	/* update the requester physical port. */
1660	requester_port = osm_get_port_by_mad_addr(sa->p_log, sa->p_subn,
1661						  osm_madw_get_mad_addr_ptr
1662						  (p_madw));
1663	if (requester_port == NULL) {
1664		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F16: "
1665			"Cannot find requester physical port\n");
1666		goto Exit;
1667	}
1668
1669	if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG))
1670		osm_dump_path_record(sa->p_log, p_pr, OSM_LOG_DEBUG);
1671
1672	cl_qlist_init(&pr_list);
1673
1674	/*
1675	   Most SA functions (including this one) are read-only on the
1676	   subnet object, so we grab the lock non-exclusively.
1677	 */
1678	cl_plock_acquire(sa->p_lock);
1679
1680	/* Handle multicast destinations separately */
1681	if ((ret = __osm_pr_rcv_check_mcast_dest(sa, p_madw)) < 0) {
1682		/* Multicast DGID with unicast DLID */
1683		cl_plock_release(sa->p_lock);
1684		osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_INVALID_FIELD);
1685		goto Exit;
1686	}
1687
1688	if (ret > 0)
1689		goto McastDest;
1690
1691	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Unicast destination requested\n");
1692
1693	sa_status = __osm_pr_rcv_get_end_points(sa, p_madw,
1694						&p_src_port, &p_dest_port,
1695						&dgid);
1696
1697	if (sa_status == IB_SA_MAD_STATUS_SUCCESS) {
1698		/*
1699		   What happens next depends on the type of endpoint information
1700		   that was specified....
1701		 */
1702		if (p_src_port) {
1703			if (p_dest_port)
1704				__osm_pr_rcv_process_pair(sa, p_madw,
1705							  requester_port,
1706							  p_src_port,
1707							  p_dest_port, &dgid,
1708							  p_sa_mad->comp_mask,
1709							  &pr_list);
1710			else
1711				__osm_pr_rcv_process_half(sa, p_madw,
1712							  requester_port,
1713							  p_src_port, NULL,
1714							  &dgid,
1715							  p_sa_mad->comp_mask,
1716							  &pr_list);
1717		} else {
1718			if (p_dest_port)
1719				__osm_pr_rcv_process_half(sa, p_madw,
1720							  requester_port, NULL,
1721							  p_dest_port, &dgid,
1722							  p_sa_mad->comp_mask,
1723							  &pr_list);
1724			else
1725				/*
1726				   Katie, bar the door!
1727				 */
1728				__osm_pr_rcv_process_world(sa, p_madw,
1729							   requester_port,
1730							   &dgid,
1731							   p_sa_mad->comp_mask,
1732							   &pr_list);
1733		}
1734	}
1735	goto Unlock;
1736
1737McastDest:
1738	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Multicast destination requested\n");
1739	{
1740		osm_mgrp_t *p_mgrp = NULL;
1741		ib_api_status_t status;
1742		osm_pr_item_t *p_pr_item;
1743		uint32_t flow_label;
1744		uint8_t sl;
1745		uint8_t hop_limit;
1746
1747		/* First, get the MC info */
1748		p_mgrp = pr_get_mgrp(sa, p_madw);
1749
1750		if (!p_mgrp)
1751			goto Unlock;
1752
1753		/* Make sure the rest of the PathRecord matches the MC group attributes */
1754		status = __osm_pr_match_mgrp_attributes(sa, p_madw, p_mgrp);
1755		if (status != IB_SUCCESS) {
1756			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F19: "
1757				"MC group attributes don't match PathRecord request\n");
1758			goto Unlock;
1759		}
1760
1761		p_pr_item = malloc(sizeof(*p_pr_item));
1762		if (p_pr_item == NULL) {
1763			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F18: "
1764				"Unable to allocate path record for MC group\n");
1765			goto Unlock;
1766		}
1767		memset(p_pr_item, 0, sizeof(*p_pr_item));
1768
1769		/* Copy PathRecord request into response */
1770		p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
1771		p_pr = (ib_path_rec_t *)
1772		    ib_sa_mad_get_payload_ptr(p_sa_mad);
1773		p_pr_item->path_rec = *p_pr;
1774
1775		/* Now, use the MC info to cruft up the PathRecord response */
1776		p_pr_item->path_rec.dgid = p_mgrp->mcmember_rec.mgid;
1777		p_pr_item->path_rec.dlid = p_mgrp->mcmember_rec.mlid;
1778		p_pr_item->path_rec.tclass = p_mgrp->mcmember_rec.tclass;
1779		p_pr_item->path_rec.num_path = 1;
1780		p_pr_item->path_rec.pkey = p_mgrp->mcmember_rec.pkey;
1781
1782		/* MTU, rate, and packet lifetime should be exactly */
1783		p_pr_item->path_rec.mtu = (2 << 6) | p_mgrp->mcmember_rec.mtu;
1784		p_pr_item->path_rec.rate = (2 << 6) | p_mgrp->mcmember_rec.rate;
1785		p_pr_item->path_rec.pkt_life =
1786		    (2 << 6) | p_mgrp->mcmember_rec.pkt_life;
1787
1788		/* SL, Hop Limit, and Flow Label */
1789		ib_member_get_sl_flow_hop(p_mgrp->mcmember_rec.sl_flow_hop,
1790					  &sl, &flow_label, &hop_limit);
1791		ib_path_rec_set_sl(&p_pr_item->path_rec, sl);
1792		ib_path_rec_set_qos_class(&p_pr_item->path_rec, 0);
1793
1794		/* HopLimit is not yet set in non link local MC groups */
1795		/* If it were, this would not be needed */
1796		if (ib_mgid_get_scope(&p_mgrp->mcmember_rec.mgid) != IB_MC_SCOPE_LINK_LOCAL)
1797			hop_limit = IB_HOPLIMIT_MAX;
1798
1799		p_pr_item->path_rec.hop_flow_raw =
1800			cl_hton32(hop_limit) | (flow_label << 8);
1801
1802		cl_qlist_insert_tail(&pr_list, &p_pr_item->list_item);
1803	}
1804
1805Unlock:
1806	cl_plock_release(sa->p_lock);
1807
1808	/* Now, (finally) respond to the PathRecord request */
1809	osm_sa_respond(sa, p_madw, sizeof(ib_path_rec_t), &pr_list);
1810
1811Exit:
1812	OSM_LOG_EXIT(sa->p_log);
1813}
1814