ip2mac.c revision 11042:2d6e217af1b4
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Functions to implement IP address -> link layer address (PSARC 2006/482)
29 */
30#include <inet/ip2mac.h>
31#include <inet/ip2mac_impl.h>
32#include <sys/zone.h>
33#include <inet/ip_ndp.h>
34#include <inet/ip_if.h>
35#include <inet/ip6.h>
36
37/*
38 * dispatch pending callbacks.
39 */
40void
41ncec_cb_dispatch(ncec_t *ncec)
42{
43	ncec_cb_t *ncec_cb;
44	ip2mac_t ip2m;
45
46	mutex_enter(&ncec->ncec_lock);
47	if (list_is_empty(&ncec->ncec_cb)) {
48		mutex_exit(&ncec->ncec_lock);
49		return;
50	}
51	ncec_ip2mac_response(&ip2m, ncec);
52	ncec_cb_refhold_locked(ncec);
53	/*
54	 * IP does not hold internal locks like nce_lock across calls to
55	 * other subsystems for fear of recursive lock entry and lock
56	 * hierarchy violation. The caller may be holding locks across
57	 * the call to IP. (It would be ideal if no subsystem holds locks
58	 * across calls into another subsystem, especially if calls can
59	 * happen in either direction).
60	 */
61	ncec_cb = list_head(&ncec->ncec_cb);
62	for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) {
63		if (ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED)
64			continue;
65		ncec_cb->ncec_cb_flags |= NCE_CB_DISPATCHED;
66		mutex_exit(&ncec->ncec_lock);
67		(*ncec_cb->ncec_cb_func)(&ip2m, ncec_cb->ncec_cb_arg);
68		mutex_enter(&ncec->ncec_lock);
69	}
70	ncec_cb_refrele(ncec);
71	mutex_exit(&ncec->ncec_lock);
72}
73
74/*
75 * fill up the ip2m response fields with inforamation from the nce.
76 */
77void
78ncec_ip2mac_response(ip2mac_t *ip2m, ncec_t *ncec)
79{
80	boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION);
81	sin_t	*sin;
82	sin6_t	*sin6;
83	struct sockaddr_dl *sdl;
84
85	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
86	bzero(ip2m, sizeof (*ip2m));
87	if (NCE_ISREACHABLE(ncec) && !NCE_ISCONDEMNED(ncec))
88		ip2m->ip2mac_err = 0;
89	else
90		ip2m->ip2mac_err = ESRCH;
91	if (isv6) {
92		sin6 = (sin6_t *)&ip2m->ip2mac_pa;
93		sin6->sin6_family = AF_INET6;
94		sin6->sin6_addr = ncec->ncec_addr;
95	} else {
96		sin = (sin_t *)&ip2m->ip2mac_pa;
97		sin->sin_family = AF_INET;
98		IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &sin->sin_addr);
99	}
100	if (ip2m->ip2mac_err == 0) {
101		sdl = &ip2m->ip2mac_ha;
102		sdl->sdl_family = AF_LINK;
103		sdl->sdl_type = ncec->ncec_ill->ill_type;
104		/*
105		 * should we put ncec_ill->ill_name in there? why?
106		 * likewise for the sdl_index
107		 */
108		sdl->sdl_nlen = 0;
109		sdl->sdl_alen = ncec->ncec_ill->ill_phys_addr_length;
110		if (ncec->ncec_lladdr != NULL)
111			bcopy(ncec->ncec_lladdr, LLADDR(sdl), sdl->sdl_alen);
112	}
113}
114
115void
116ncec_cb_refhold_locked(ncec_t *ncec)
117{
118	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
119	ncec->ncec_cb_walker_cnt++;
120}
121
122void
123ncec_cb_refrele(ncec_t *ncec)
124{
125	ncec_cb_t *ncec_cb, *ncec_cb_next = NULL;
126
127	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
128	if (--ncec->ncec_cb_walker_cnt == 0) {
129		for (ncec_cb = list_head(&ncec->ncec_cb); ncec_cb != NULL;
130		    ncec_cb = ncec_cb_next) {
131
132			ncec_cb_next = list_next(&ncec->ncec_cb, ncec_cb);
133			if ((ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED) == 0)
134				continue;
135			list_remove(&ncec->ncec_cb, ncec_cb);
136			kmem_free(ncec_cb, sizeof (*ncec_cb));
137		}
138	}
139}
140
141/*
142 * add a callback to the nce, so that the callback can be invoked
143 * after address resolution succeeds/fails.
144 */
145static ip2mac_id_t
146ncec_add_cb(ncec_t *ncec, ip2mac_callback_t *cb, void *cbarg)
147{
148	ncec_cb_t	*nce_cb;
149	ip2mac_id_t	ip2mid = NULL;
150
151	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
152	if ((nce_cb = kmem_zalloc(sizeof (*nce_cb), KM_NOSLEEP)) == NULL)
153		return (ip2mid);
154	nce_cb->ncec_cb_func = cb;
155	nce_cb->ncec_cb_arg = cbarg;
156	/*
157	 * We identify the ncec_cb_t during cancellation by the address
158	 * of the nce_cb_t itself, and, as a short-cut for eliminating
159	 * clear mismatches, only look in the callback list of ncec's
160	 * whose address is equal to the nce_cb_id.
161	 */
162	nce_cb->ncec_cb_id = ncec; /* no refs! just an address */
163	list_insert_tail(&ncec->ncec_cb, nce_cb);
164	ip2mid = ncec;  /* this is the id to be used in ip2mac_cancel */
165
166	return (nce_cb);
167}
168
169/*
170 * Resolve an IP address to a link-layer address using the data-structures
171 * defined in PSARC 2006/482. If the current link-layer address for the
172 * IP address is not known, the state-machine for resolving the resolution
173 * will be triggered, and the callback function (*cb) will be invoked after
174 * the resolution completes.
175 */
176ip2mac_id_t
177ip2mac(uint_t op, ip2mac_t *ip2m, ip2mac_callback_t *cb, void *cbarg,
178    zoneid_t zoneid)
179{
180	ncec_t		*ncec;
181	nce_t		*nce = NULL;
182	boolean_t	isv6;
183	ill_t		*ill;
184	netstack_t	*ns;
185	ip_stack_t	*ipst;
186	ip2mac_id_t	ip2mid = NULL;
187	sin_t		*sin;
188	sin6_t		*sin6;
189	int		err;
190	uint64_t	delta;
191	boolean_t	need_resolve = B_FALSE;
192
193	isv6 = (ip2m->ip2mac_pa.ss_family == AF_INET6);
194
195	ns = netstack_find_by_zoneid(zoneid);
196	if (ns == NULL) {
197		ip2m->ip2mac_err = EINVAL;
198		return (NULL);
199	}
200	/*
201	 * For exclusive stacks we reset the zoneid to zero
202	 * since IP uses the global zoneid in the exclusive stacks.
203	 */
204	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
205		zoneid = GLOBAL_ZONEID;
206	ipst = ns->netstack_ip;
207	/*
208	 * find the ill from the ip2m->ip2mac_ifindex
209	 */
210	ill = ill_lookup_on_ifindex(ip2m->ip2mac_ifindex, isv6, ipst);
211	if (ill == NULL) {
212		ip2m->ip2mac_err = ENXIO;
213		netstack_rele(ns);
214		return (NULL);
215	}
216	if (isv6) {
217		sin6 = (sin6_t *)&ip2m->ip2mac_pa;
218		if (op == IP2MAC_LOOKUP) {
219			nce = nce_lookup_v6(ill, &sin6->sin6_addr);
220		} else {
221			err = nce_lookup_then_add_v6(ill, NULL,
222			    ill->ill_phys_addr_length,
223			    &sin6->sin6_addr, 0, ND_UNCHANGED, &nce);
224		}
225	} else  {
226		sin = (sin_t *)&ip2m->ip2mac_pa;
227		if (op == IP2MAC_LOOKUP) {
228			nce = nce_lookup_v4(ill, &sin->sin_addr.s_addr);
229		} else {
230			err = nce_lookup_then_add_v4(ill, NULL,
231			    ill->ill_phys_addr_length,
232			    &sin->sin_addr.s_addr, 0, ND_UNCHANGED, &nce);
233		}
234	}
235	if (op == IP2MAC_LOOKUP) {
236		if (nce == NULL) {
237			ip2m->ip2mac_err = ESRCH;
238			goto done;
239		}
240		ncec = nce->nce_common;
241		delta = TICK_TO_MSEC(lbolt64) - ncec->ncec_last;
242		mutex_enter(&ncec->ncec_lock);
243		if (NCE_ISREACHABLE(ncec) &&
244		    delta < (uint64_t)ill->ill_reachable_time) {
245			ncec_ip2mac_response(ip2m, ncec);
246			ip2m->ip2mac_err = 0;
247		} else {
248			ip2m->ip2mac_err = ESRCH;
249		}
250		mutex_exit(&ncec->ncec_lock);
251		goto done;
252	} else {
253		if (err != 0 && err != EEXIST) {
254			ip2m->ip2mac_err = err;
255			goto done;
256		}
257	}
258	ncec = nce->nce_common;
259	delta = TICK_TO_MSEC(lbolt64) - ncec->ncec_last;
260	mutex_enter(&ncec->ncec_lock);
261	if (NCE_ISCONDEMNED(ncec)) {
262		ip2m->ip2mac_err = ESRCH;
263	} else {
264		if (NCE_ISREACHABLE(ncec)) {
265			if (NCE_MYADDR(ncec) ||
266			    delta < (uint64_t)ill->ill_reachable_time) {
267				ncec_ip2mac_response(ip2m, ncec);
268				ip2m->ip2mac_err = 0;
269				mutex_exit(&ncec->ncec_lock);
270				goto done;
271			}
272			/*
273			 * Since we do not control the packet output
274			 * path for ip2mac() callers, we need to verify
275			 * if the existing information in the nce is
276			 * very old, and retrigger resolution if necessary.
277			 * We will not return the existing stale
278			 * information until it is verified through a
279			 * resolver request/response exchange.
280			 *
281			 * In the future, we may want to support extensions
282			 * that do additional callbacks on link-layer updates,
283			 * so that we can return the stale information but
284			 * also update the caller if the lladdr changes.
285			 */
286			ncec->ncec_rcnt = ill->ill_xmit_count;
287			ncec->ncec_state = ND_PROBE;
288			need_resolve = B_TRUE; /* reachable but very old nce */
289		} else if (ncec->ncec_state == ND_INITIAL) {
290			need_resolve = B_TRUE; /* ND_INITIAL nce */
291			ncec->ncec_state = ND_INCOMPLETE;
292		}
293		/*
294		 * NCE not known to be reachable in the recent past. We must
295		 * reconfirm the information before returning it to the caller
296		 */
297		if (ncec->ncec_rcnt > 0) {
298			/*
299			 * Still resolving this ncec, so we can queue the
300			 * callback information in ncec->ncec_cb
301			 */
302			ip2mid = ncec_add_cb(ncec, cb, cbarg);
303			ip2m->ip2mac_err = EINPROGRESS;
304		} else {
305			/*
306			 * No more retransmits allowed -- resolution failed.
307			 */
308			ip2m->ip2mac_err = ESRCH;
309		}
310	}
311	mutex_exit(&ncec->ncec_lock);
312done:
313	/*
314	 * if NCE_ISREACHABLE(ncec) but very old, or if it is ND_INITIAL,
315	 * trigger resolve.
316	 */
317	if (need_resolve)
318		ip_ndp_resolve(ncec);
319	if (nce != NULL)
320		nce_refrele(nce);
321	netstack_rele(ns);
322	ill_refrele(ill);
323	return (ip2mid);
324}
325
326/*
327 * data passed to ncec_walk for canceling outstanding callbacks.
328 */
329typedef struct ip2mac_cancel_data_s {
330	ip2mac_id_t ip2m_cancel_id;
331	int	ip2m_cancel_err;
332} ip2mac_cancel_data_t;
333
334/*
335 * callback invoked for each active ncec. If the ip2mac_id_t corresponds
336 * to an active nce_cb_t in the ncec's callback list, we want to remove
337 * the callback (if there are no walkers) or return EBUSY to the caller
338 */
339static int
340ip2mac_cancel_callback(ncec_t *ncec, void *arg)
341{
342	ip2mac_cancel_data_t *ip2m_wdata = arg;
343	ncec_cb_t *ip2m_nce_cb = ip2m_wdata->ip2m_cancel_id;
344	ncec_cb_t *ncec_cb;
345
346	if (ip2m_nce_cb->ncec_cb_id != ncec)
347		return (0);
348
349	mutex_enter(&ncec->ncec_lock);
350	if (list_is_empty(&ncec->ncec_cb)) {
351		mutex_exit(&ncec->ncec_lock);
352		return (0);
353	}
354	/*
355	 * IP does not hold internal locks like nce_lock across calls to
356	 * other subsystems for fear of recursive lock entry and lock
357	 * hierarchy violation. The caller may be holding locks across
358	 * the call to IP. (It would be ideal if no subsystem holds locks
359	 * across calls into another subsystem, especially if calls can
360	 * happen in either direction).
361	 */
362	ncec_cb = list_head(&ncec->ncec_cb);
363	for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) {
364		if (ncec_cb != ip2m_nce_cb)
365			continue;
366		/*
367		 * If there are no walkers we can remove the nce_cb.
368		 * Otherwise the exiting walker will clean up.
369		 */
370		if (ncec->ncec_cb_walker_cnt == 0) {
371			list_remove(&ncec->ncec_cb, ncec_cb);
372		} else {
373			ip2m_wdata->ip2m_cancel_err = EBUSY;
374		}
375		break;
376	}
377	mutex_exit(&ncec->ncec_lock);
378	return (0);
379}
380
381/*
382 * cancel an outstanding timeout set up via ip2mac
383 */
384int
385ip2mac_cancel(ip2mac_id_t ip2mid, zoneid_t zoneid)
386{
387	netstack_t	*ns;
388	ip_stack_t	*ipst;
389	ip2mac_cancel_data_t ip2m_wdata;
390
391	ns = netstack_find_by_zoneid(zoneid);
392	if (ns == NULL) {
393		ip2m_wdata.ip2m_cancel_err = EINVAL;
394		return (ip2m_wdata.ip2m_cancel_err);
395	}
396	/*
397	 * For exclusive stacks we reset the zoneid to zero
398	 * since IP uses the global zoneid in the exclusive stacks.
399	 */
400	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
401		zoneid = GLOBAL_ZONEID;
402	ipst = ns->netstack_ip;
403
404	ip2m_wdata.ip2m_cancel_id = ip2mid;
405	ip2m_wdata.ip2m_cancel_err = 0;
406	ncec_walk(NULL, ip2mac_cancel_callback, &ip2m_wdata, ipst);
407	/*
408	 * We may return EBUSY if a walk to dispatch callbacks is
409	 * in progress, in which case the caller needs to synchronize
410	 * with the registered callback function to make sure the
411	 * module does not exit when there is a callback pending.
412	 */
413	netstack_rele(ns);
414	return (ip2m_wdata.ip2m_cancel_err);
415}
416