ibcm_arp_link.c revision 11042:2d6e217af1b4
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/types.h>
27#include <net/if.h>
28#include <net/if_types.h>
29#include <inet/ip.h>
30#include <inet/ip_ire.h>
31#include <inet/ip_if.h>
32#include <sys/ib/mgt/ibcm/ibcm_arp.h>
33
34extern char cmlog[];
35
36_NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_streams_t))
37
38static void ibcm_resolver_ack(ip2mac_t *, void *);
39static int ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zid);
40
41/*
42 * delete a wait queue node from the list.
43 * assumes mutex is acquired
44 */
45void
46ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t *wqnp)
47{
48	ibcm_arp_streams_t *ib_s;
49
50	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_delete_prwqn(%p)", wqnp);
51
52	ib_s = wqnp->ib_str;
53	ib_s->wqnp = NULL;
54	kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t));
55}
56
57/*
58 * allocate a wait queue node, and insert it in the list
59 */
60static ibcm_arp_prwqn_t *
61ibcm_arp_create_prwqn(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
62    ibt_ip_addr_t *src_addr)
63{
64	ibcm_arp_prwqn_t *wqnp;
65
66	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn(ib_s: 0x%p)", ib_s);
67
68	if (dst_addr == NULL) {
69		return (NULL);
70	}
71	if ((wqnp = kmem_zalloc(sizeof (ibcm_arp_prwqn_t), KM_NOSLEEP)) ==
72	    NULL) {
73		return (NULL);
74	}
75	wqnp->dst_addr = *dst_addr;
76
77	if (src_addr) {
78		wqnp->usrc_addr = *src_addr;
79	}
80	wqnp->ib_str = ib_s;
81	wqnp->ifproto = (dst_addr->family == AF_INET) ?
82	    ETHERTYPE_IP : ETHERTYPE_IPV6;
83
84	ib_s->wqnp = wqnp;
85
86	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn: Return wqnp: %p", wqnp);
87
88	return (wqnp);
89}
90
91
92/*
93 * Check if the interface is loopback or IB.
94 */
95static int
96ibcm_arp_check_interface(ill_t *ill)
97{
98	if (IS_LOOPBACK(ill) || ill->ill_type == IFT_IB)
99		return (0);
100
101	return (ETIMEDOUT);
102}
103
104int
105ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
106    ibt_ip_addr_t *src_addr)
107{
108	ibcm_arp_prwqn_t *wqnp;
109	ire_t	*ire = NULL;
110	ipif_t	*ipif = NULL;
111	ill_t	*ill = NULL;
112	ill_t	*hwaddr_ill = NULL;
113	ip_stack_t *ipst;
114	int		len;
115	ipaddr_t	setsrcv4;
116	in6_addr_t	setsrcv6;
117
118	IBCM_PRINT_IP("ibcm_arp_pr_lookup: SRC", src_addr);
119	IBCM_PRINT_IP("ibcm_arp_pr_lookup: DST", dst_addr);
120
121	if ((wqnp = ibcm_arp_create_prwqn(ib_s, dst_addr, src_addr)) == NULL) {
122		IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
123		    "ibcm_arp_create_prwqn failed");
124		ib_s->status = ENOMEM;
125		return (1);
126	}
127
128	ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip;
129	if (dst_addr->family == AF_INET) {
130		/*
131		 * A local address is always specified, and it is used
132		 * to find the zoneid.
133		 */
134		ipif = ipif_lookup_addr(src_addr->un.ip4addr, NULL, ALL_ZONES,
135		    ipst);
136		if (ipif == NULL) {
137			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
138			    "ipif_lookup_addr failed");
139			ib_s->status = EFAULT;
140			goto fail;
141		}
142
143		/*
144		 * get an ire for the destination adress.
145		 * Note that we can't use MATCH_IRE_ILL since that would
146		 * require that the first ill we find have ire_ill set. Thus
147		 * we compare ire_ill against ipif_ill after the lookup.
148		 */
149		setsrcv4 = INADDR_ANY;
150		ire = ire_route_recursive_v4(dst_addr->un.ip4addr, 0, NULL,
151		    ipif->ipif_zoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
152		    &setsrcv4, NULL, NULL);
153
154		ASSERT(ire != NULL);
155		if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
156			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
157			    "ire_route_recursive_v4 failed");
158			ib_s->status = EFAULT;
159			goto fail;
160		}
161		ill = ire_nexthop_ill(ire);
162		if (ill == NULL) {
163			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
164			    "ire_nexthop_ill failed");
165			ib_s->status = EFAULT;
166			goto fail;
167		}
168		if (ill != ipif->ipif_ill) {
169			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
170			    "wrong ill");
171			ib_s->status = EFAULT;
172			goto fail;
173		}
174
175		wqnp->gateway.un.ip4addr = ire->ire_gateway_addr;
176		wqnp->netmask.un.ip4addr = ire->ire_mask;
177		wqnp->src_addr.un.ip4addr = src_addr->un.ip4addr;
178		wqnp->src_addr.family = wqnp->gateway.family =
179		    wqnp->netmask.family = AF_INET;
180
181	} else if (dst_addr->family == AF_INET6) {
182		/*
183		 * A local address is always specified, and it is used
184		 * to find the zoneid.
185		 * We should really match on scopeid for link locals here.
186		 */
187		ipif = ipif_lookup_addr_v6(&src_addr->un.ip6addr, NULL,
188		    ALL_ZONES, ipst);
189		if (ipif == NULL) {
190			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
191			    "ipif_lookup_addr_v6 failed");
192			ib_s->status = EFAULT;
193			goto fail;
194		}
195
196		/*
197		 * get an ire for the destination adress.
198		 * Note that we can't use MATCH_IRE_ILL since that would
199		 * require that the first ill we find have ire_ill set. Thus
200		 * we compare ire_ill against ipif_ill after the lookup.
201		 */
202		setsrcv6 = ipv6_all_zeros;
203		ire = ire_route_recursive_v6(&dst_addr->un.ip6addr, 0, NULL,
204		    ipif->ipif_zoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
205		    &setsrcv6, NULL, NULL);
206
207		ASSERT(ire != NULL);
208		if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
209			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
210			    "ire_route_recursive_v6 failed");
211			ib_s->status = EFAULT;
212			goto fail;
213		}
214		ill = ire_nexthop_ill(ire);
215		if (ill == NULL) {
216			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
217			    "ire_nexthop_ill failed");
218			ib_s->status = EFAULT;
219			goto fail;
220		}
221
222		if (ill != ipif->ipif_ill) {
223			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
224			    "wrong ill");
225			ib_s->status = EFAULT;
226			goto fail;
227		}
228
229		wqnp->gateway.un.ip6addr = ire->ire_gateway_addr_v6;
230		wqnp->netmask.un.ip6addr = ire->ire_mask_v6;
231		wqnp->src_addr.un.ip6addr = src_addr->un.ip6addr;
232		wqnp->src_addr.family = wqnp->gateway.family =
233		    wqnp->netmask.family = AF_INET6;
234	}
235
236	(void) strlcpy(wqnp->ifname, ill->ill_name, sizeof (wqnp->ifname));
237
238	/*
239	 * For IPMP data addresses, we need to use the hardware address of the
240	 * interface bound to the given address.
241	 */
242	if (IS_IPMP(ill)) {
243		if ((hwaddr_ill = ipmp_ipif_hold_bound_ill(ipif)) == NULL) {
244			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
245			    "no bound ill for IPMP interface %s",
246			    ill->ill_name);
247			ib_s->status = EFAULT;
248			goto fail;
249		}
250	} else {
251		hwaddr_ill = ill;
252		ill_refhold(hwaddr_ill);	/* for symmetry */
253	}
254
255	if ((ib_s->status = ibcm_arp_check_interface(hwaddr_ill)) != 0) {
256		IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
257		    "ibcm_arp_check_interface failed");
258		goto fail;
259	}
260
261	bcopy(hwaddr_ill->ill_phys_addr, &wqnp->src_mac,
262	    hwaddr_ill->ill_phys_addr_length);
263
264	IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: outgoing if:%s",
265	    wqnp->ifname);
266
267	/*
268	 * if the user supplied a address, then verify rts returned
269	 * the same address
270	 */
271	if (wqnp->usrc_addr.family) {
272		len = (wqnp->usrc_addr.family == AF_INET) ?
273		    IP_ADDR_LEN : sizeof (in6_addr_t);
274		if (bcmp(&wqnp->usrc_addr.un, &wqnp->src_addr.un, len)) {
275			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
276			    "srcaddr mismatch:%d", ENETUNREACH);
277			goto fail;
278		}
279	}
280
281	/*
282	 * at this stage, we have the source address and the IB
283	 * interface, now get the destination mac address from
284	 * arp or ipv6 drivers
285	 */
286	ib_s->status = ibcm_nce_lookup(wqnp, ill, getzoneid());
287	if (ib_s->status != 0) {
288		IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
289		    "ibcm_nce_lookup failed: %d", ib_s->status);
290		goto fail;
291	}
292
293	ill_refrele(hwaddr_ill);
294	ill_refrele(ill);
295	ire_refrele(ire);
296	ipif_refrele(ipif);
297	netstack_rele(ipst->ips_netstack);
298
299	IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: Return: 0x%p", wqnp);
300	return (0);
301fail:
302	if (hwaddr_ill != NULL)
303		ill_refrele(hwaddr_ill);
304	if (ill != NULL)
305		ill_refrele(ill);
306	if (ire != NULL)
307		ire_refrele(ire);
308	if (ipif != NULL)
309		ipif_refrele(ipif);
310	ibcm_arp_delete_prwqn(wqnp);
311	netstack_rele(ipst->ips_netstack);
312	return (1);
313}
314
315/*
316 * Query the neighbor cache for IPv4/IPv6 to mac address mapping.
317 */
318static int
319ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zoneid)
320{
321	ip2mac_t	ip2m;
322	sin_t		*sin;
323	sin6_t		*sin6;
324	ip2mac_id_t	ip2mid;
325	int		err;
326
327	if (wqnp->src_addr.family != wqnp->dst_addr.family) {
328		IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Mis-match SRC_ADDR "
329		    "Family: %d, DST_ADDR Family %d", wqnp->src_addr.family,
330		    wqnp->dst_addr.family);
331		return (1);
332	}
333	bzero(&ip2m, sizeof (ip2m));
334
335	if (wqnp->dst_addr.family == AF_INET) {
336		sin = (sin_t *)&ip2m.ip2mac_pa;
337		sin->sin_family = AF_INET;
338		sin->sin_addr.s_addr = wqnp->dst_addr.un.ip4addr;
339	} else if (wqnp->dst_addr.family == AF_INET6) {
340		sin6 = (sin6_t *)&ip2m.ip2mac_pa;
341		sin6->sin6_family = AF_INET6;
342		sin6->sin6_addr = wqnp->dst_addr.un.ip6addr;
343	} else {
344		IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Invalid DST_ADDR "
345		    "Family: %d", wqnp->dst_addr.family);
346		return (1);
347	}
348
349	ip2m.ip2mac_ifindex = ill->ill_phyint->phyint_ifindex;
350
351	wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
352
353	/*
354	 * issue the request to IP for Neighbor Discovery
355	 */
356	ip2mid = ip2mac(IP2MAC_RESOLVE, &ip2m, ibcm_resolver_ack, wqnp,
357	    zoneid);
358	err = ip2m.ip2mac_err;
359	if (err == EINPROGRESS) {
360		wqnp->ip2mac_id = ip2mid;
361		wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
362		err = 0;
363	} else if (err == 0) {
364		ibcm_resolver_ack(&ip2m, wqnp);
365	}
366	return (err);
367}
368
369/*
370 * do sanity checks on the link-level sockaddr
371 */
372static boolean_t
373ibcm_check_sockdl(struct sockaddr_dl *sdl)
374{
375
376	if (sdl->sdl_type != IFT_IB || sdl->sdl_alen != IPOIB_ADDRL)
377		return (B_FALSE);
378
379	return (B_TRUE);
380}
381
382/*
383 * callback for resolver lookups, both for success and failure.
384 * If Address resolution was succesful: return GID info.
385 */
386static void
387ibcm_resolver_ack(ip2mac_t *ip2macp, void *arg)
388{
389	ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg;
390	ibcm_arp_streams_t *ib_s;
391	uchar_t *cp;
392	int err = 0;
393
394	IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_ack(%p, %p)", ip2macp, wqnp);
395
396	ib_s = wqnp->ib_str;
397	mutex_enter(&ib_s->lock);
398
399	if (ip2macp->ip2mac_err != 0) {
400		wqnp->flags &= ~IBCM_ARP_PR_RESOLVE_PENDING;
401		cv_broadcast(&ib_s->cv);
402		err = EHOSTUNREACH;
403		goto user_callback;
404	}
405
406	if (!ibcm_check_sockdl(&ip2macp->ip2mac_ha)) {
407		IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_ack: Error: "
408		    "interface %s is not IB\n", wqnp->ifname);
409		err = EHOSTUNREACH;
410		goto user_callback;
411	}
412
413	cp = (uchar_t *)LLADDR(&ip2macp->ip2mac_ha);
414	bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL);
415
416	/*
417	 * at this point we have src/dst gid's derived from the mac addresses
418	 * now get the hca, port
419	 */
420	bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t));
421	bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t));
422
423	IBCM_H2N_GID(wqnp->sgid);
424	IBCM_H2N_GID(wqnp->dgid);
425
426user_callback:
427
428	ib_s->status = err;
429	ib_s->done = B_TRUE;
430
431	/* lock is held by the caller. */
432	cv_signal(&ib_s->cv);
433	mutex_exit(&ib_s->lock);
434}
435