ip_rts.c revision 11042:2d6e217af1b4
1326938Sdim/*
2326938Sdim * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
3353358Sdim * Use is subject to license terms.
4353358Sdim */
5353358Sdim
6326938Sdim/*
7326938Sdim * Copyright (c) 1988, 1991, 1993
8326938Sdim *	The Regents of the University of California.  All rights reserved.
9326938Sdim *
10341825Sdim * Redistribution and use in source and binary forms, with or without
11326938Sdim * modification, are permitted provided that the following conditions
12326938Sdim * are met:
13326938Sdim * 1. Redistributions of source code must retain the above copyright
14326938Sdim *    notice, this list of conditions and the following disclaimer.
15326938Sdim * 2. Redistributions in binary form must reproduce the above copyright
16326938Sdim *    notice, this list of conditions and the following disclaimer in the
17326938Sdim *    documentation and/or other materials provided with the distribution.
18326938Sdim * 3. All advertising materials mentioning features or use of this software
19326938Sdim *    must display the following acknowledgement:
20326938Sdim *	This product includes software developed by the University of
21326938Sdim *	California, Berkeley and its contributors.
22326938Sdim * 4. Neither the name of the University nor the names of its contributors
23326938Sdim *    may be used to endorse or promote products derived from this software
24326938Sdim *    without specific prior written permission.
25326938Sdim *
26353358Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27353358Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28353358Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29353358Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30353358Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31353358Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32353358Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33353358Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34353358Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35353358Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36326938Sdim * SUCH DAMAGE.
37326938Sdim *
38326938Sdim *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
39 */
40
41/*
42 * This file contains routines that processes routing socket requests.
43 */
44
45#include <sys/types.h>
46#include <sys/stream.h>
47#include <sys/stropts.h>
48#include <sys/ddi.h>
49#include <sys/strsubr.h>
50#include <sys/cmn_err.h>
51#include <sys/debug.h>
52#include <sys/policy.h>
53#include <sys/zone.h>
54
55#include <sys/systm.h>
56#include <sys/param.h>
57#include <sys/socket.h>
58#include <sys/strsun.h>
59#include <net/if.h>
60#include <net/route.h>
61#include <netinet/in.h>
62#include <net/if_dl.h>
63#include <netinet/ip6.h>
64
65#include <inet/common.h>
66#include <inet/ip.h>
67#include <inet/ip6.h>
68#include <inet/ip_if.h>
69#include <inet/ip_ire.h>
70#include <inet/ip_ftable.h>
71#include <inet/ip_rts.h>
72
73#include <inet/ipclassifier.h>
74
75#include <sys/tsol/tndb.h>
76#include <sys/tsol/tnet.h>
77
78#define	RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \
79	(rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type))
80
81static size_t	rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp);
82static void	rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst,
83    ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr,
84    ipaddr_t author, ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp,
85    const tsol_gc_t *);
86static int	rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp,
87    in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp,
88    in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp,
89    sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error);
90static void	rts_getifdata(if_data_t *if_data, const ipif_t *ipif);
91static int	rts_getmetrics(ire_t *ire, rt_metrics_t *metrics);
92static mblk_t	*rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire,
93    const in6_addr_t *setsrc, tsol_ire_gw_secattr_t *attrp, sa_family_t af);
94static void	rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics);
95static ire_t	*ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask,
96    ipaddr_t gw_addr, const ill_t *ill, zoneid_t zoneid,
97    const ts_label_t *tsl, int match_flags, ip_stack_t *ipst, ire_t **pifire,
98    ipaddr_t *v4setsrcp, tsol_ire_gw_secattr_t **gwattrp);
99static ire_t	*ire_lookup_v6(const in6_addr_t *dst_addr_v6,
100    const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6,
101    const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags,
102    ip_stack_t *ipst, ire_t **pifire,
103    in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp);
104
105/*
106 * Send `mp' to all eligible routing queues.  A queue is ineligible if:
107 *
108 *  1. SO_USELOOPBACK is off and it is not the originating queue.
109 *  2. RTA_UNDER_IPMP is on and RTSQ_UNDER_IPMP is not set in `flags'.
110 *  3. RTA_UNDER_IPMP is off and RTSQ_NORMAL is not set in `flags'.
111 *  4. It is not the same address family as `af', and `af' isn't AF_UNSPEC.
112 */
113void
114rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags,
115    ip_stack_t *ipst)
116{
117	mblk_t	*mp1;
118	conn_t 	*connp, *next_connp;
119
120	/*
121	 * Since we don't have an ill_t here, RTSQ_DEFAULT must already be
122	 * resolved to one or more of RTSQ_NORMAL|RTSQ_UNDER_IPMP at this point.
123	 */
124	ASSERT(!(flags & RTSQ_DEFAULT));
125
126	mutex_enter(&ipst->ips_rts_clients->connf_lock);
127	connp = ipst->ips_rts_clients->connf_head;
128
129	for (; connp != NULL; connp = next_connp) {
130		next_connp = connp->conn_next;
131		/*
132		 * If there was a family specified when this routing socket was
133		 * created and it doesn't match the family of the message to
134		 * copy, then continue.
135		 */
136		if ((connp->conn_proto != AF_UNSPEC) &&
137		    (connp->conn_proto != af))
138			continue;
139
140		/*
141		 * Queue the message only if the conn_t and flags match.
142		 */
143		if (connp->conn_rtaware & RTAW_UNDER_IPMP) {
144			if (!(flags & RTSQ_UNDER_IPMP))
145				continue;
146		} else {
147			if (!(flags & RTSQ_NORMAL))
148				continue;
149		}
150		/*
151		 * For the originating queue, we only copy the message upstream
152		 * if loopback is set.  For others reading on the routing
153		 * socket, we check if there is room upstream for a copy of the
154		 * message.
155		 */
156		if ((o_connp == connp) && connp->conn_useloopback == 0) {
157			connp = connp->conn_next;
158			continue;
159		}
160		CONN_INC_REF(connp);
161		mutex_exit(&ipst->ips_rts_clients->connf_lock);
162		/* Pass to rts_input */
163		if (IPCL_IS_NONSTR(connp) ? !connp->conn_flow_cntrld :
164		    canputnext(connp->conn_rq)) {
165			mp1 = dupmsg(mp);
166			if (mp1 == NULL)
167				mp1 = copymsg(mp);
168			/* Note that we pass a NULL ira to rts_input */
169			if (mp1 != NULL)
170				(connp->conn_recv)(connp, mp1, NULL, NULL);
171		}
172
173		mutex_enter(&ipst->ips_rts_clients->connf_lock);
174		/* reload next_connp since conn_next may have changed */
175		next_connp = connp->conn_next;
176		CONN_DEC_REF(connp);
177	}
178	mutex_exit(&ipst->ips_rts_clients->connf_lock);
179	freemsg(mp);
180}
181
182/*
183 * Takes an ire and sends an ack to all the routing sockets. This
184 * routine is used
185 * - when a route is created/deleted through the ioctl interface.
186 * - when a stale redirect is deleted
187 */
188void
189ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst)
190{
191	mblk_t		*mp;
192	rt_msghdr_t	*rtm;
193	int		rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY);
194	sa_family_t	af;
195	in6_addr_t	gw_addr_v6;
196
197	if (ire == NULL)
198		return;
199	ASSERT(ire->ire_ipversion == IPV4_VERSION ||
200	    ire->ire_ipversion == IPV6_VERSION);
201
202	ASSERT(!(ire->ire_type & IRE_IF_CLONE));
203
204	if (ire->ire_flags & RTF_SETSRC)
205		rtm_addrs |= RTA_SRC;
206
207	switch (ire->ire_ipversion) {
208	case IPV4_VERSION:
209		af = AF_INET;
210		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
211		if (mp == NULL)
212			return;
213		rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask,
214		    ire->ire_gateway_addr, ire->ire_setsrc_addr, 0, 0, 0, NULL,
215		    mp, NULL);
216		break;
217	case IPV6_VERSION:
218		af = AF_INET6;
219		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
220		if (mp == NULL)
221			return;
222		mutex_enter(&ire->ire_lock);
223		gw_addr_v6 = ire->ire_gateway_addr_v6;
224		mutex_exit(&ire->ire_lock);
225		rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6,
226		    &ire->ire_mask_v6, &gw_addr_v6,
227		    &ire->ire_setsrc_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros,
228		    &ipv6_all_zeros, NULL, mp, NULL);
229		break;
230	}
231	rtm = (rt_msghdr_t *)mp->b_rptr;
232	mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen];
233	rtm->rtm_addrs = rtm_addrs;
234	rtm->rtm_flags = ire->ire_flags;
235	if (error != 0)
236		rtm->rtm_errno = error;
237	else
238		rtm->rtm_flags |= RTF_DONE;
239	rts_queue_input(mp, NULL, af, RTSQ_ALL, ipst);
240}
241
242/*
243 * This is a call from the RTS module
244 * indicating that this is a Routing Socket
245 * Stream. Insert this conn_t in routing
246 * socket client list.
247 */
248void
249ip_rts_register(conn_t *connp)
250{
251	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
252
253	connp->conn_useloopback = 1;
254	ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
255}
256
257/*
258 * This is a call from the RTS module indicating that it is closing.
259 */
260void
261ip_rts_unregister(conn_t *connp)
262{
263	ipcl_hash_remove(connp);
264}
265
266/*
267 * Processes requests received on a routing socket. It extracts all the
268 * arguments and calls the appropriate function to process the request.
269 *
270 * RTA_SRC bit flag requests are sent by 'route -setsrc'.
271 *
272 * In general, this function does not consume the message supplied but rather
273 * sends the message upstream with an appropriate UNIX errno.
274 */
275int
276ip_rts_request_common(mblk_t *mp, conn_t *connp, cred_t *ioc_cr)
277{
278	rt_msghdr_t	*rtm = NULL;
279	in6_addr_t	dst_addr_v6;
280	in6_addr_t	src_addr_v6;
281	in6_addr_t	gw_addr_v6;
282	in6_addr_t	net_mask_v6;
283	in6_addr_t	author_v6;
284	in6_addr_t	if_addr_v6;
285	mblk_t		*mp1;
286	ire_t		*ire = NULL;
287	ire_t		*ifire = NULL;
288	ipaddr_t	v4setsrc;
289	in6_addr_t	v6setsrc = ipv6_all_zeros;
290	tsol_ire_gw_secattr_t *gwattr = NULL;
291	int		error = 0;
292	int		match_flags = MATCH_IRE_DSTONLY;
293	int		match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW;
294	int		found_addrs;
295	sa_family_t	af;
296	ipaddr_t	dst_addr;
297	ipaddr_t	gw_addr;
298	ipaddr_t	src_addr;
299	ipaddr_t	net_mask;
300	ushort_t	index;
301	boolean_t	gcgrp_xtraref = B_FALSE;
302	tsol_gcgrp_addr_t ga;
303	tsol_rtsecattr_t rtsecattr;
304	struct rtsa_s	*rtsap = NULL;
305	tsol_gcgrp_t	*gcgrp = NULL;
306	tsol_gc_t	*gc = NULL;
307	ts_label_t	*tsl = NULL;
308	zoneid_t	zoneid;
309	ip_stack_t	*ipst;
310	ill_t   	*ill = NULL;
311
312	zoneid = connp->conn_zoneid;
313	ipst = connp->conn_netstack->netstack_ip;
314
315	if (mp->b_cont != NULL && !pullupmsg(mp, -1)) {
316		freemsg(mp);
317		error =  EINVAL;
318		goto done;
319	}
320	if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
321		freemsg(mp);
322		error = EINVAL;
323		goto done;
324	}
325
326	/*
327	 * Check the routing message for basic consistency including the
328	 * version number and that the number of octets written is the same
329	 * as specified by the rtm_msglen field.
330	 *
331	 * At this point, an error can be delivered back via rtm_errno.
332	 */
333	rtm = (rt_msghdr_t *)mp->b_rptr;
334	if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) {
335		error = EINVAL;
336		goto done;
337	}
338	if (rtm->rtm_version != RTM_VERSION) {
339		error = EPROTONOSUPPORT;
340		goto done;
341	}
342
343	/* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */
344	if (rtm->rtm_type != RTM_GET &&
345	    rtm->rtm_type != RTM_RESOLVE &&
346	    (ioc_cr == NULL ||
347	    secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) {
348		error = EPERM;
349		goto done;
350	}
351
352	found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6,
353	    &author_v6, &if_addr_v6, &src_addr_v6, &index, &af, &rtsecattr,
354	    &error);
355
356	if (error != 0)
357		goto done;
358
359	if ((found_addrs & RTA_DST) == 0) {
360		error = EINVAL;
361		goto done;
362	}
363
364	/*
365	 * Based on the address family of the destination address, determine
366	 * the destination, gateway and netmask and return the appropriate error
367	 * if an unknown address family was specified (following the errno
368	 * values that 4.4BSD-Lite2 returns.)
369	 */
370	switch (af) {
371	case AF_INET:
372		IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr);
373		IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr);
374		IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr);
375		if (((found_addrs & RTA_NETMASK) == 0) ||
376		    (rtm->rtm_flags & RTF_HOST))
377			net_mask = IP_HOST_MASK;
378		else
379			IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask);
380		break;
381	case AF_INET6:
382		if (((found_addrs & RTA_NETMASK) == 0) ||
383		    (rtm->rtm_flags & RTF_HOST))
384			net_mask_v6 = ipv6_all_ones;
385		break;
386	default:
387		/*
388		 * These errno values are meant to be compatible with
389		 * 4.4BSD-Lite2 for the given message types.
390		 */
391		switch (rtm->rtm_type) {
392		case RTM_ADD:
393		case RTM_DELETE:
394			error = ESRCH;
395			goto done;
396		case RTM_GET:
397		case RTM_CHANGE:
398			error = EAFNOSUPPORT;
399			goto done;
400		default:
401			error = EOPNOTSUPP;
402			goto done;
403		}
404	}
405
406	/*
407	 * At this point, the address family must be something known.
408	 */
409	ASSERT(af == AF_INET || af == AF_INET6);
410
411	/* Handle RTA_IFP */
412	if (index != 0) {
413		ipif_t		*ipif;
414lookup:
415		ill = ill_lookup_on_ifindex(index, af == AF_INET6, ipst);
416		if (ill == NULL) {
417			error = EINVAL;
418			goto done;
419		}
420
421		/*
422		 * Since all interfaces in an IPMP group must be equivalent,
423		 * we prevent changes to a specific underlying interface's
424		 * routing configuration.  However, for backward compatibility,
425		 * we intepret a request to add a route on an underlying
426		 * interface as a request to add a route on its IPMP interface.
427		 */
428		if (IS_UNDER_IPMP(ill)) {
429			switch (rtm->rtm_type) {
430			case RTM_CHANGE:
431			case RTM_DELETE:
432				error = EINVAL;
433				goto done;
434			case RTM_ADD:
435				index = ipmp_ill_get_ipmp_ifindex(ill);
436				ill_refrele(ill);
437				if (index == 0) {
438					ill = NULL; /* already refrele'd */
439					error = EINVAL;
440					goto done;
441				}
442				goto lookup;
443			}
444		}
445
446		match_flags |= MATCH_IRE_ILL;
447		/*
448		 * This provides the same zoneid as in Solaris 10
449		 * that -ifp picks the zoneid from the first ipif on the ill.
450		 * But it might not be useful since the first ipif will always
451		 * have the same zoneid as the ill.
452		 */
453		ipif = ipif_get_next_ipif(NULL, ill);
454		if (ipif != NULL) {
455			zoneid = ipif->ipif_zoneid;
456			ipif_refrele(ipif);
457		}
458	}
459
460	/*
461	 * If a netmask was supplied in the message, then subsequent route
462	 * lookups will attempt to match on the netmask as well.
463	 */
464	if ((found_addrs & RTA_NETMASK) != 0)
465		match_flags |= MATCH_IRE_MASK;
466
467	/*
468	 * We only process any passed-in route security attributes for
469	 * either RTM_ADD or RTM_CHANGE message; We overload them
470	 * to do an RTM_GET as a different label; ignore otherwise.
471	 */
472	if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE ||
473	    rtm->rtm_type == RTM_GET) {
474		ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
475		if (rtsecattr.rtsa_cnt > 0)
476			rtsap = &rtsecattr.rtsa_attr[0];
477	}
478
479	switch (rtm->rtm_type) {
480	case RTM_ADD:
481		/* if we are adding a route, gateway is a must */
482		if ((found_addrs & RTA_GATEWAY) == 0) {
483			error = EINVAL;
484			goto done;
485		}
486
487		/* Multirouting does not support net routes. */
488		if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) ==
489		    RTF_MULTIRT) {
490			error = EADDRNOTAVAIL;
491			goto done;
492		}
493
494		/*
495		 * Multirouting and user-specified source addresses
496		 * do not support interface based routing.
497		 * Assigning a source address to an interface based
498		 * route is achievable by plumbing a new ipif and
499		 * setting up the interface route via this ipif,
500		 * though.
501		 */
502		if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) {
503			if ((rtm->rtm_flags & RTF_GATEWAY) == 0) {
504				error = EADDRNOTAVAIL;
505				goto done;
506			}
507		}
508
509		switch (af) {
510		case AF_INET:
511			if (src_addr != INADDR_ANY) {
512				uint_t type;
513
514				/*
515				 * The RTF_SETSRC flag is present, check that
516				 * the supplied src address is not the loopback
517				 * address. This would produce martian packets.
518				 */
519				if (src_addr == htonl(INADDR_LOOPBACK)) {
520					error = EINVAL;
521					goto done;
522				}
523				/*
524				 * Also check that the supplied address is a
525				 * valid, local one. Only allow IFF_UP ones
526				 */
527				type = ip_type_v4(src_addr, ipst);
528				if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) {
529					error = EADDRNOTAVAIL;
530					goto done;
531				}
532			} else {
533				/*
534				 * The RTF_SETSRC modifier must be associated
535				 * to a non-null source address.
536				 */
537				if (rtm->rtm_flags & RTF_SETSRC) {
538					error = EINVAL;
539					goto done;
540				}
541			}
542
543			error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr,
544			    rtm->rtm_flags, ill, &ire, B_FALSE,
545			    rtsap, ipst, zoneid);
546			if (ill != NULL)
547				ASSERT(!MUTEX_HELD(&ill->ill_lock));
548			break;
549		case AF_INET6:
550			if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) {
551				uint_t type;
552
553				/*
554				 * The RTF_SETSRC flag is present, check that
555				 * the supplied src address is not the loopback
556				 * address. This would produce martian packets.
557				 */
558				if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) {
559					error = EINVAL;
560					goto done;
561				}
562				/*
563				 * Also check that the supplied address is a
564				 * valid, local one. Only allow UP ones.
565				 */
566				type = ip_type_v6(&src_addr_v6, ipst);
567				if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) {
568					error = EADDRNOTAVAIL;
569					goto done;
570				}
571
572				error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
573				    &gw_addr_v6, &src_addr_v6, rtm->rtm_flags,
574				    ill, &ire, rtsap, ipst, zoneid);
575				break;
576			}
577			/*
578			 * The RTF_SETSRC modifier must be associated
579			 * to a non-null source address.
580			 */
581			if (rtm->rtm_flags & RTF_SETSRC) {
582				error = EINVAL;
583				goto done;
584			}
585			error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
586			    &gw_addr_v6, NULL, rtm->rtm_flags,
587			    ill, &ire, rtsap, ipst, zoneid);
588			if (ill != NULL)
589				ASSERT(!MUTEX_HELD(&ill->ill_lock));
590			break;
591		}
592		if (error != 0)
593			goto done;
594		ASSERT(ire != NULL);
595		rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
596		break;
597	case RTM_DELETE:
598		/* if we are deleting a route, gateway is a must */
599		if ((found_addrs & RTA_GATEWAY) == 0) {
600			error = EINVAL;
601			goto done;
602		}
603		/*
604		 * The RTF_SETSRC modifier does not make sense
605		 * when deleting a route.
606		 */
607		if (rtm->rtm_flags & RTF_SETSRC) {
608			error = EINVAL;
609			goto done;
610		}
611
612		switch (af) {
613		case AF_INET:
614			error = ip_rt_delete(dst_addr, net_mask, gw_addr,
615			    found_addrs, rtm->rtm_flags, ill, B_FALSE,
616			    ipst, zoneid);
617			break;
618		case AF_INET6:
619			error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6,
620			    &gw_addr_v6, found_addrs, rtm->rtm_flags, ill,
621			    ipst, zoneid);
622			break;
623		}
624		break;
625	case RTM_GET:
626	case RTM_CHANGE:
627		/*
628		 * In the case of RTM_GET, the forwarding table should be
629		 * searched recursively.  Also, if a gateway was
630		 * specified then the gateway address must also be matched.
631		 *
632		 * In the case of RTM_CHANGE, the gateway address (if supplied)
633		 * is the new gateway address so matching on the gateway address
634		 * is not done.  This can lead to ambiguity when looking up the
635		 * route to change as usually only the destination (and netmask,
636		 * if supplied) is used for the lookup.  However if a RTA_IFP
637		 * sockaddr is also supplied, it can disambiguate which route to
638		 * change provided the ambigous routes are tied to distinct
639		 * ill's (or interface indices).  If the routes are not tied to
640		 * any particular interfaces (for example, with traditional
641		 * gateway routes), then a RTA_IFP sockaddr will be of no use as
642		 * it won't match any such routes.
643		 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE,
644		 * except when RTM_CHANGE is combined to RTF_SETSRC.
645		 */
646		if (((found_addrs & RTA_SRC) != 0) &&
647		    ((rtm->rtm_type == RTM_GET) ||
648		    !(rtm->rtm_flags & RTF_SETSRC))) {
649			error = EOPNOTSUPP;
650			goto done;
651		}
652
653		if (rtm->rtm_type == RTM_GET) {
654			match_flags |= MATCH_IRE_SECATTR;
655			match_flags_local |= MATCH_IRE_SECATTR;
656			if ((found_addrs & RTA_GATEWAY) != 0)
657				match_flags |= MATCH_IRE_GW;
658			if (ioc_cr)
659				tsl = crgetlabel(ioc_cr);
660			if (rtsap != NULL) {
661				if (rtsa_validate(rtsap) != 0) {
662					error = EINVAL;
663					goto done;
664				}
665				if (tsl != NULL &&
666				    crgetzoneid(ioc_cr) != GLOBAL_ZONEID &&
667				    (tsl->tsl_doi != rtsap->rtsa_doi ||
668				    !bldominates(&tsl->tsl_label,
669				    &rtsap->rtsa_slrange.lower_bound))) {
670					error = EPERM;
671					goto done;
672				}
673				tsl = labelalloc(
674				    &rtsap->rtsa_slrange.lower_bound,
675				    rtsap->rtsa_doi, KM_NOSLEEP);
676			}
677		}
678		if (rtm->rtm_type == RTM_CHANGE) {
679			if ((found_addrs & RTA_GATEWAY) &&
680			    (rtm->rtm_flags & RTF_SETSRC)) {
681				/*
682				 * Do not want to change the gateway,
683				 * but rather the source address.
684				 */
685				match_flags |= MATCH_IRE_GW;
686			}
687		}
688
689		/*
690		 * If the netmask is all ones (either as supplied or as derived
691		 * above), then first check for an IRE_LOOPBACK or
692		 * IRE_LOCAL entry.
693		 *
694		 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL
695		 * entry, then look for any other type of IRE.
696		 */
697		switch (af) {
698		case AF_INET:
699			if (net_mask == IP_HOST_MASK) {
700				ire = ire_ftable_lookup_v4(dst_addr, 0, gw_addr,
701				    IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid,
702				    tsl, match_flags_local, 0, ipst, NULL);
703			}
704			if (ire == NULL) {
705				ire = ire_lookup_v4(dst_addr, net_mask,
706				    gw_addr, ill, zoneid, tsl, match_flags,
707				    ipst, &ifire, &v4setsrc, &gwattr);
708				IN6_IPADDR_TO_V4MAPPED(v4setsrc, &v6setsrc);
709			}
710			break;
711		case AF_INET6:
712			if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) {
713				ire = ire_ftable_lookup_v6(&dst_addr_v6, NULL,
714				    &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL,
715				    zoneid, tsl, match_flags_local, 0, ipst,
716				    NULL);
717			}
718			if (ire == NULL) {
719				ire = ire_lookup_v6(&dst_addr_v6,
720				    &net_mask_v6, &gw_addr_v6, ill, zoneid,
721				    tsl, match_flags, ipst, &ifire, &v6setsrc,
722				    &gwattr);
723			}
724			break;
725		}
726		if (tsl != NULL && tsl != crgetlabel(ioc_cr))
727			label_rele(tsl);
728
729		if (ire == NULL) {
730			error = ESRCH;
731			goto done;
732		}
733		/*
734		 * Want to return failure if we get an IRE_NOROUTE from
735		 * ire_route_recursive
736		 */
737		if (ire->ire_type & IRE_NOROUTE) {
738			ire_refrele(ire);
739			ire = NULL;
740			error = ESRCH;
741			goto done;
742		}
743
744		/* we know the IRE before we come here */
745		switch (rtm->rtm_type) {
746		case RTM_GET:
747			mp1 = rts_rtmget(mp, ire, ifire, &v6setsrc, gwattr, af);
748			if (mp1 == NULL) {
749				error = ENOBUFS;
750				goto done;
751			}
752			freemsg(mp);
753			mp = mp1;
754			rtm = (rt_msghdr_t *)mp->b_rptr;
755			break;
756		case RTM_CHANGE:
757			/*
758			 * Do not allow to the multirouting state of a route
759			 * to be changed. This aims to prevent undesirable
760			 * stages where both multirt and non-multirt routes
761			 * for the same destination are declared.
762			 */
763			if ((ire->ire_flags & RTF_MULTIRT) !=
764			    (rtm->rtm_flags & RTF_MULTIRT)) {
765				error = EINVAL;
766				goto done;
767			}
768			/*
769			 * Note that we do not need to do
770			 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change
771			 * in metrics or gateway will not affect existing
772			 * routes since it does not create a more specific
773			 * route.
774			 */
775			switch (af) {
776			case AF_INET:
777				if ((found_addrs & RTA_GATEWAY) != 0 &&
778				    (ire->ire_gateway_addr != gw_addr)) {
779					ire->ire_gateway_addr = gw_addr;
780				}
781
782				if (rtsap != NULL) {
783					ga.ga_af = AF_INET;
784					IN6_IPADDR_TO_V4MAPPED(
785					    ire->ire_gateway_addr, &ga.ga_addr);
786
787					gcgrp = gcgrp_lookup(&ga, B_TRUE);
788					if (gcgrp == NULL) {
789						error = ENOMEM;
790						goto done;
791					}
792				}
793
794				if ((found_addrs & RTA_SRC) != 0 &&
795				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
796				    (ire->ire_setsrc_addr != src_addr)) {
797					if (src_addr != INADDR_ANY) {
798						uint_t type;
799
800						/*
801						 * The RTF_SETSRC flag is
802						 * present, check that the
803						 * supplied src address is not
804						 * the loopback address. This
805						 * would produce martian
806						 * packets.
807						 */
808						if (src_addr ==
809						    htonl(INADDR_LOOPBACK)) {
810							error = EINVAL;
811							goto done;
812						}
813						/*
814						 * Also check that the
815						 * supplied addr is a valid
816						 * local address.
817						 */
818						type = ip_type_v4(src_addr,
819						    ipst);
820						if (!(type &
821						    (IRE_LOCAL|IRE_LOOPBACK))) {
822							error = EADDRNOTAVAIL;
823							goto done;
824						}
825						ire->ire_flags |= RTF_SETSRC;
826						ire->ire_setsrc_addr =
827						    src_addr;
828					} else {
829						ire->ire_flags &= ~RTF_SETSRC;
830						ire->ire_setsrc_addr =
831						    INADDR_ANY;
832					}
833					/*
834					 * Let conn_ixa caching know that
835					 * source address selection changed
836					 */
837					ip_update_source_selection(ipst);
838				}
839				ire_flush_cache_v4(ire, IRE_FLUSH_GWCHANGE);
840				break;
841			case AF_INET6:
842				mutex_enter(&ire->ire_lock);
843				if ((found_addrs & RTA_GATEWAY) != 0 &&
844				    !IN6_ARE_ADDR_EQUAL(
845				    &ire->ire_gateway_addr_v6, &gw_addr_v6)) {
846					ire->ire_gateway_addr_v6 = gw_addr_v6;
847				}
848				mutex_exit(&ire->ire_lock);
849
850				if (rtsap != NULL) {
851					ga.ga_af = AF_INET6;
852					mutex_enter(&ire->ire_lock);
853					ga.ga_addr = ire->ire_gateway_addr_v6;
854					mutex_exit(&ire->ire_lock);
855
856					gcgrp = gcgrp_lookup(&ga, B_TRUE);
857					if (gcgrp == NULL) {
858						error = ENOMEM;
859						goto done;
860					}
861				}
862
863				if ((found_addrs & RTA_SRC) != 0 &&
864				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
865				    !IN6_ARE_ADDR_EQUAL(
866				    &ire->ire_setsrc_addr_v6, &src_addr_v6)) {
867					if (!IN6_IS_ADDR_UNSPECIFIED(
868					    &src_addr_v6)) {
869						uint_t type;
870
871						/*
872						 * The RTF_SETSRC flag is
873						 * present, check that the
874						 * supplied src address is not
875						 * the loopback address. This
876						 * would produce martian
877						 * packets.
878						 */
879						if (IN6_IS_ADDR_LOOPBACK(
880						    &src_addr_v6)) {
881							error = EINVAL;
882							goto done;
883						}
884						/*
885						 * Also check that the
886						 * supplied addr is a valid
887						 * local address.
888						 */
889						type = ip_type_v6(&src_addr_v6,
890						    ipst);
891						if (!(type &
892						    (IRE_LOCAL|IRE_LOOPBACK))) {
893							error = EADDRNOTAVAIL;
894							goto done;
895						}
896						mutex_enter(&ire->ire_lock);
897						ire->ire_flags |= RTF_SETSRC;
898						ire->ire_setsrc_addr_v6 =
899						    src_addr_v6;
900						mutex_exit(&ire->ire_lock);
901					} else {
902						mutex_enter(&ire->ire_lock);
903						ire->ire_flags &= ~RTF_SETSRC;
904						ire->ire_setsrc_addr_v6 =
905						    ipv6_all_zeros;
906						mutex_exit(&ire->ire_lock);
907					}
908					/*
909					 * Let conn_ixa caching know that
910					 * source address selection changed
911					 */
912					ip_update_source_selection(ipst);
913				}
914				ire_flush_cache_v6(ire, IRE_FLUSH_GWCHANGE);
915				break;
916			}
917
918			if (rtsap != NULL) {
919				ASSERT(gcgrp != NULL);
920
921				/*
922				 * Create and add the security attribute to
923				 * prefix IRE; it will add a reference to the
924				 * group upon allocating a new entry.  If it
925				 * finds an already-existing entry for the
926				 * security attribute, it simply returns it
927				 * and no new group reference is made.
928				 */
929				gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref);
930				if (gc == NULL ||
931				    (error = tsol_ire_init_gwattr(ire,
932				    ire->ire_ipversion, gc)) != 0) {
933					if (gc != NULL) {
934						GC_REFRELE(gc);
935					} else {
936						/* gc_create failed */
937						error = ENOMEM;
938					}
939					goto done;
940				}
941			}
942			rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
943			break;
944		}
945		break;
946	default:
947		error = EOPNOTSUPP;
948		break;
949	}
950done:
951	if (ire != NULL)
952		ire_refrele(ire);
953	if (ifire != NULL)
954		ire_refrele(ifire);
955	if (ill != NULL)
956		ill_refrele(ill);
957
958	if (gcgrp_xtraref)
959		GCGRP_REFRELE(gcgrp);
960
961	if (rtm != NULL) {
962		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
963		if (error != 0) {
964			rtm->rtm_errno = error;
965			/* Send error ACK */
966			ip1dbg(("ip_rts_request: error %d\n", error));
967		} else {
968			rtm->rtm_flags |= RTF_DONE;
969			/* OK ACK already set up by caller except this */
970			ip2dbg(("ip_rts_request: OK ACK\n"));
971		}
972		rts_queue_input(mp, connp, af, RTSQ_ALL, ipst);
973	}
974	return (error);
975}
976
977/*
978 * Helper function that can do recursive lookups including when
979 * MATCH_IRE_GW and/or MATCH_IRE_MASK is set.
980 */
981static ire_t *
982ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, ipaddr_t gw_addr,
983    const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl,
984    int match_flags, ip_stack_t *ipst, ire_t **pifire, ipaddr_t *v4setsrcp,
985    tsol_ire_gw_secattr_t **gwattrp)
986{
987	ire_t		*ire;
988	ire_t		*ifire = NULL;
989	uint_t		ire_type;
990
991	*pifire = NULL;
992	*v4setsrcp = INADDR_ANY;
993	*gwattrp = NULL;
994
995	/* Skip IRE_IF_CLONE */
996	match_flags |= MATCH_IRE_TYPE;
997	ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE;
998
999	/*
1000	 * ire_route_recursive can't match gateway or mask thus if they are
1001	 * set we have to do two steps of lookups
1002	 */
1003	if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) {
1004		ire = ire_ftable_lookup_v4(dst_addr, net_mask, gw_addr,
1005		    ire_type, ill, zoneid, tsl, match_flags, 0, ipst, NULL);
1006
1007		if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)))
1008			return (ire);
1009
1010		if (ire->ire_type & IRE_ONLINK)
1011			return (ire);
1012
1013		if (ire->ire_flags & RTF_SETSRC) {
1014			ASSERT(ire->ire_setsrc_addr != INADDR_ANY);
1015			*v4setsrcp = ire->ire_setsrc_addr;
1016			v4setsrcp = NULL;
1017		}
1018
1019		/* The first ire_gw_secattr is passed back */
1020		if (ire->ire_gw_secattr != NULL) {
1021			*gwattrp = ire->ire_gw_secattr;
1022			gwattrp = NULL;
1023		}
1024
1025		/* Look for an interface ire recursively based on the gateway */
1026		dst_addr = ire->ire_gateway_addr;
1027		match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK);
1028		ifire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid,
1029		    tsl, match_flags, B_FALSE, 0, ipst, v4setsrcp, gwattrp,
1030		    NULL);
1031	} else {
1032		ire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid,
1033		    tsl, match_flags, B_FALSE, 0, ipst, v4setsrcp, gwattrp,
1034		    NULL);
1035	}
1036	*pifire = ifire;
1037	return (ire);
1038}
1039
1040static ire_t *
1041ire_lookup_v6(const in6_addr_t *dst_addr_v6,
1042    const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6,
1043    const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags,
1044    ip_stack_t *ipst, ire_t **pifire,
1045    in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp)
1046{
1047	ire_t		*ire;
1048	ire_t		*ifire = NULL;
1049	uint_t		ire_type;
1050
1051	*pifire = NULL;
1052	*v6setsrcp = ipv6_all_zeros;
1053	*gwattrp = NULL;
1054
1055	/* Skip IRE_IF_CLONE */
1056	match_flags |= MATCH_IRE_TYPE;
1057	ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE;
1058
1059	/*
1060	 * ire_route_recursive can't match gateway or mask thus if they are
1061	 * set we have to do two steps of lookups
1062	 */
1063	if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) {
1064		in6_addr_t dst;
1065
1066		ire = ire_ftable_lookup_v6(dst_addr_v6, net_mask_v6,
1067		    gw_addr_v6, ire_type, ill, zoneid, tsl, match_flags, 0,
1068		    ipst, NULL);
1069
1070		if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)))
1071			return (ire);
1072
1073		if (ire->ire_type & IRE_ONLINK)
1074			return (ire);
1075
1076		if (ire->ire_flags & RTF_SETSRC) {
1077			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(
1078			    &ire->ire_setsrc_addr_v6));
1079			*v6setsrcp = ire->ire_setsrc_addr_v6;
1080			v6setsrcp = NULL;
1081		}
1082
1083		/* The first ire_gw_secattr is passed back */
1084		if (ire->ire_gw_secattr != NULL) {
1085			*gwattrp = ire->ire_gw_secattr;
1086			gwattrp = NULL;
1087		}
1088
1089		mutex_enter(&ire->ire_lock);
1090		dst = ire->ire_gateway_addr_v6;
1091		mutex_exit(&ire->ire_lock);
1092		match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK);
1093		ifire = ire_route_recursive_v6(&dst, ire_type, ill, zoneid, tsl,
1094		    match_flags, B_FALSE, 0, ipst, v6setsrcp, gwattrp, NULL);
1095	} else {
1096		ire = ire_route_recursive_v6(dst_addr_v6, ire_type, ill, zoneid,
1097		    tsl, match_flags, B_FALSE, 0, ipst, v6setsrcp, gwattrp,
1098		    NULL);
1099	}
1100	*pifire = ifire;
1101	return (ire);
1102}
1103
1104
1105/*
1106 * Handle IP_IOC_RTS_REQUEST ioctls
1107 */
1108int
1109ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
1110{
1111	conn_t	*connp = Q_TO_CONN(q);
1112	IOCP	iocp = (IOCP)mp->b_rptr;
1113	mblk_t	*mp1, *ioc_mp = mp;
1114	int	error = 0;
1115	ip_stack_t	*ipst;
1116
1117	ipst = connp->conn_netstack->netstack_ip;
1118
1119	ASSERT(mp->b_cont != NULL);
1120	/* ioc_mp holds mp */
1121	mp = mp->b_cont;
1122
1123	/*
1124	 * The Routing Socket data starts on
1125	 * next block. If there is no next block
1126	 * this is an indication from routing module
1127	 * that it is a routing socket stream queue.
1128	 * We need to support that for compatibility with SDP since
1129	 * it has a contract private interface to use IP_IOC_RTS_REQUEST.
1130	 * Note: SDP no longer uses IP_IOC_RTS_REQUEST - we can remove this.
1131	 */
1132	if (mp->b_cont == NULL) {
1133		/*
1134		 * This is a message from SDP
1135		 * indicating that this is a Routing Socket
1136		 * Stream. Insert this conn_t in routing
1137		 * socket client list.
1138		 */
1139		connp->conn_useloopback = 1;
1140		ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
1141		goto done;
1142	}
1143	mp1 = dupmsg(mp->b_cont);
1144	if (mp1 == NULL) {
1145		error  = ENOBUFS;
1146		goto done;
1147	}
1148	mp = mp1;
1149
1150	error = ip_rts_request_common(mp, connp, ioc_cr);
1151done:
1152	iocp->ioc_error = error;
1153	ioc_mp->b_datap->db_type = M_IOCACK;
1154	if (iocp->ioc_error != 0)
1155		iocp->ioc_count = 0;
1156	/* Note that we pass a NULL ira to rts_input */
1157	(connp->conn_recv)(connp, ioc_mp, NULL, NULL);
1158
1159	/* conn was refheld in ip_wput_ioctl. */
1160	CONN_OPER_PENDING_DONE(connp);
1161
1162	return (error);
1163}
1164
1165/*
1166 * Build a reply to the RTM_GET request contained in the given message block
1167 * using the retrieved IRE of the destination address, the parent IRE (if it
1168 * exists) and the address family.
1169 *
1170 * Returns a pointer to a message block containing the reply if successful,
1171 * otherwise NULL is returned.
1172 */
1173static mblk_t *
1174rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, const in6_addr_t *setsrc,
1175    tsol_ire_gw_secattr_t *attrp, sa_family_t af)
1176{
1177	rt_msghdr_t	*rtm;
1178	rt_msghdr_t	*new_rtm;
1179	mblk_t		*new_mp;
1180	int		rtm_addrs;
1181	int		rtm_flags;
1182	tsol_gc_t	*gc = NULL;
1183	tsol_gcgrp_t	*gcgrp = NULL;
1184	ill_t		*ill;
1185	ipif_t		*ipif = NULL;
1186	ipaddr_t	brdaddr;	/* IFF_POINTOPOINT destination */
1187	ipaddr_t	ifaddr;
1188	in6_addr_t	brdaddr6;	/* IFF_POINTOPOINT destination */
1189	in6_addr_t	ifaddr6;
1190	ipaddr_t	v4setsrc;
1191
1192	rtm = (rt_msghdr_t *)mp->b_rptr;
1193
1194	/*
1195	 * Find the ill used to send packets. This will be NULL in case
1196	 * of a reject or blackhole.
1197	 */
1198	if (ifire != NULL)
1199		ill = ire_nexthop_ill(ifire);
1200	else
1201		ill = ire_nexthop_ill(ire);
1202
1203	if (attrp != NULL) {
1204		mutex_enter(&attrp->igsa_lock);
1205		if ((gc = attrp->igsa_gc) != NULL) {
1206			gcgrp = gc->gc_grp;
1207			ASSERT(gcgrp != NULL);
1208			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1209		}
1210		mutex_exit(&attrp->igsa_lock);
1211	}
1212
1213	/*
1214	 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK.
1215	 *
1216	 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both
1217	 * RTA_IFP and RTA_IFA if either is defined, and also
1218	 * returns RTA_BRD if the appropriate interface is
1219	 * point-to-point.
1220	 */
1221	rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK);
1222	if ((rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) && ill != NULL) {
1223		rtm_addrs |= (RTA_IFP | RTA_IFA);
1224		/*
1225		 * We associate an IRE with an ILL, hence we don't exactly
1226		 * know what might make sense for RTA_IFA and RTA_BRD. We
1227		 * pick the first ipif on the ill.
1228		 */
1229		ipif = ipif_get_next_ipif(NULL, ill);
1230		if (ipif != NULL) {
1231			if (ipif->ipif_isv6)
1232				ifaddr6 = ipif->ipif_v6lcl_addr;
1233			else
1234				ifaddr = ipif->ipif_lcl_addr;
1235			if (ipif->ipif_flags & IPIF_POINTOPOINT) {
1236				rtm_addrs |= RTA_BRD;
1237				if (ipif->ipif_isv6)
1238					brdaddr6 = ipif->ipif_v6pp_dst_addr;
1239				else
1240					brdaddr = ipif->ipif_pp_dst_addr;
1241			}
1242			ipif_refrele(ipif);
1243		}
1244	}
1245
1246	new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, gc != NULL ? 1 : 0);
1247	if (new_mp == NULL) {
1248		if (gcgrp != NULL)
1249			rw_exit(&gcgrp->gcgrp_rwlock);
1250		if (ill != NULL)
1251			ill_refrele(ill);
1252		return (NULL);
1253	}
1254
1255	/*
1256	 * We set the destination address, gateway address,
1257	 * netmask and flags in the RTM_GET response depending
1258	 * on whether we found a parent IRE or not.
1259	 * In particular, if we did find a parent IRE during the
1260	 * recursive search, use that IRE's gateway address.
1261	 * Otherwise, we use the IRE's source address for the
1262	 * gateway address.
1263	 */
1264	ASSERT(af == AF_INET || af == AF_INET6);
1265	switch (af) {
1266	case AF_INET:
1267		IN6_V4MAPPED_TO_IPADDR(setsrc, v4setsrc);
1268		if (v4setsrc != INADDR_ANY)
1269			rtm_addrs |= RTA_SRC;
1270
1271		rtm_flags = ire->ire_flags;
1272		rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr,
1273		    ire->ire_mask, ire->ire_gateway_addr, v4setsrc,
1274		    brdaddr, 0, ifaddr, ill, new_mp, gc);
1275		break;
1276	case AF_INET6:
1277		if (!IN6_IS_ADDR_UNSPECIFIED(setsrc))
1278			rtm_addrs |= RTA_SRC;
1279
1280		rtm_flags = ire->ire_flags;
1281		rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6,
1282		    &ire->ire_mask_v6, &ire->ire_gateway_addr_v6,
1283		    setsrc, &brdaddr6, &ipv6_all_zeros,
1284		    &ifaddr6, ill, new_mp, gc);
1285		break;
1286	}
1287
1288	if (gcgrp != NULL)
1289		rw_exit(&gcgrp->gcgrp_rwlock);
1290
1291	new_rtm = (rt_msghdr_t *)new_mp->b_rptr;
1292
1293	/*
1294	 * The rtm_msglen, rtm_version and rtm_type fields in
1295	 * RTM_GET response are filled in by rts_fill_msg.
1296	 *
1297	 * rtm_addrs and rtm_flags are filled in based on what
1298	 * was requested and the state of the IREs looked up
1299	 * above.
1300	 *
1301	 * rtm_inits and rtm_rmx are filled in with metrics
1302	 * based on whether a parent IRE was found or not.
1303	 *
1304	 * TODO: rtm_index and rtm_use should probably be
1305	 * filled in with something resonable here and not just
1306	 * copied from the request.
1307	 */
1308	new_rtm->rtm_index = rtm->rtm_index;
1309	new_rtm->rtm_pid = rtm->rtm_pid;
1310	new_rtm->rtm_seq = rtm->rtm_seq;
1311	new_rtm->rtm_use = rtm->rtm_use;
1312	new_rtm->rtm_addrs = rtm_addrs;
1313	new_rtm->rtm_flags = rtm_flags;
1314	new_rtm->rtm_inits = rts_getmetrics(ire, &new_rtm->rtm_rmx);
1315	if (ill != NULL)
1316		ill_refrele(ill);
1317	return (new_mp);
1318}
1319
1320/*
1321 * Fill the given if_data_t with interface statistics.
1322 */
1323static void
1324rts_getifdata(if_data_t *if_data, const ipif_t *ipif)
1325{
1326	if_data->ifi_type = ipif->ipif_ill->ill_type;
1327						/* ethernet, tokenring, etc */
1328	if_data->ifi_addrlen = 0;		/* media address length */
1329	if_data->ifi_hdrlen = 0;		/* media header length */
1330	if_data->ifi_mtu = ipif->ipif_ill->ill_mtu;	/* mtu */
1331	if_data->ifi_metric = ipif->ipif_metric; /* metric (external only) */
1332	if_data->ifi_baudrate = 0;		/* linespeed */
1333
1334	if_data->ifi_ipackets = 0;		/* packets received on if */
1335	if_data->ifi_ierrors = 0;		/* input errors on interface */
1336	if_data->ifi_opackets = 0;		/* packets sent on interface */
1337	if_data->ifi_oerrors = 0;		/* output errors on if */
1338	if_data->ifi_collisions = 0;		/* collisions on csma if */
1339	if_data->ifi_ibytes = 0;		/* total number received */
1340	if_data->ifi_obytes = 0;		/* total number sent */
1341	if_data->ifi_imcasts = 0;		/* multicast packets received */
1342	if_data->ifi_omcasts = 0;		/* multicast packets sent */
1343	if_data->ifi_iqdrops = 0;		/* dropped on input */
1344	if_data->ifi_noproto = 0;		/* destined for unsupported */
1345						/* protocol. */
1346}
1347
1348/*
1349 * Set the metrics on a forwarding table route.
1350 */
1351static void
1352rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
1353{
1354	clock_t		rtt;
1355	clock_t		rtt_sd;
1356	ill_t		*ill;
1357	ifrt_t		*ifrt;
1358	mblk_t		*mp;
1359	in6_addr_t	gw_addr_v6;
1360
1361	/* Need to add back some metrics to the IRE? */
1362	/*
1363	 * Bypass obtaining the lock and searching ill_saved_ire_mp in the
1364	 * common case of no metrics.
1365	 */
1366	if (which == 0)
1367		return;
1368	ire->ire_metrics.iulp_set = B_TRUE;
1369
1370	/*
1371	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1372	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1373	 * microseconds.
1374	 */
1375	if (which & RTV_RTT)
1376		rtt = metrics->rmx_rtt / 1000;
1377	if (which & RTV_RTTVAR)
1378		rtt_sd = metrics->rmx_rttvar / 1000;
1379
1380	/*
1381	 * Update the metrics in the IRE itself.
1382	 */
1383	mutex_enter(&ire->ire_lock);
1384	if (which & RTV_MTU)
1385		ire->ire_metrics.iulp_mtu = metrics->rmx_mtu;
1386	if (which & RTV_RTT)
1387		ire->ire_metrics.iulp_rtt = rtt;
1388	if (which & RTV_SSTHRESH)
1389		ire->ire_metrics.iulp_ssthresh = metrics->rmx_ssthresh;
1390	if (which & RTV_RTTVAR)
1391		ire->ire_metrics.iulp_rtt_sd = rtt_sd;
1392	if (which & RTV_SPIPE)
1393		ire->ire_metrics.iulp_spipe = metrics->rmx_sendpipe;
1394	if (which & RTV_RPIPE)
1395		ire->ire_metrics.iulp_rpipe = metrics->rmx_recvpipe;
1396	mutex_exit(&ire->ire_lock);
1397
1398	/*
1399	 * Search through the ifrt_t chain hanging off the ILL in order to
1400	 * reflect the metric change there.
1401	 */
1402	ill = ire->ire_ill;
1403	if (ill == NULL)
1404		return;
1405	ASSERT((ill->ill_isv6 && ire->ire_ipversion == IPV6_VERSION) ||
1406	    ((!ill->ill_isv6 && ire->ire_ipversion == IPV4_VERSION)));
1407	if (ill->ill_isv6) {
1408		mutex_enter(&ire->ire_lock);
1409		gw_addr_v6 = ire->ire_gateway_addr_v6;
1410		mutex_exit(&ire->ire_lock);
1411	}
1412	mutex_enter(&ill->ill_saved_ire_lock);
1413	for (mp = ill->ill_saved_ire_mp; mp != NULL; mp = mp->b_cont) {
1414		/*
1415		 * On a given ill, the tuple of address, gateway, mask,
1416		 * ire_type and zoneid unique for each saved IRE.
1417		 */
1418		ifrt = (ifrt_t *)mp->b_rptr;
1419		if (ill->ill_isv6) {
1420			if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr,
1421			    &ire->ire_addr_v6) ||
1422			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr,
1423			    &gw_addr_v6) ||
1424			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask,
1425			    &ire->ire_mask_v6))
1426				continue;
1427		} else {
1428			if (ifrt->ifrt_addr != ire->ire_addr ||
1429			    ifrt->ifrt_gateway_addr != ire->ire_gateway_addr ||
1430			    ifrt->ifrt_mask != ire->ire_mask)
1431				continue;
1432		}
1433		if (ifrt->ifrt_zoneid != ire->ire_zoneid ||
1434		    ifrt->ifrt_type != ire->ire_type)
1435			continue;
1436
1437		if (which & RTV_MTU)
1438			ifrt->ifrt_metrics.iulp_mtu = metrics->rmx_mtu;
1439		if (which & RTV_RTT)
1440			ifrt->ifrt_metrics.iulp_rtt = rtt;
1441		if (which & RTV_SSTHRESH) {
1442			ifrt->ifrt_metrics.iulp_ssthresh =
1443			    metrics->rmx_ssthresh;
1444		}
1445		if (which & RTV_RTTVAR)
1446			ifrt->ifrt_metrics.iulp_rtt_sd = metrics->rmx_rttvar;
1447		if (which & RTV_SPIPE)
1448			ifrt->ifrt_metrics.iulp_spipe = metrics->rmx_sendpipe;
1449		if (which & RTV_RPIPE)
1450			ifrt->ifrt_metrics.iulp_rpipe = metrics->rmx_recvpipe;
1451		break;
1452	}
1453	mutex_exit(&ill->ill_saved_ire_lock);
1454
1455	/*
1456	 * Update any IRE_IF_CLONE hanging created from this IRE_IF so they
1457	 * get any new iulp_mtu.
1458	 * We do that by deleting them; ire_create_if_clone will pick
1459	 * up the new metrics.
1460	 */
1461	if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != 0)
1462		ire_dep_delete_if_clone(ire);
1463}
1464
1465/*
1466 * Get the metrics from a forwarding table route.
1467 */
1468static int
1469rts_getmetrics(ire_t *ire, rt_metrics_t *metrics)
1470{
1471	int	metrics_set = 0;
1472
1473	bzero(metrics, sizeof (rt_metrics_t));
1474
1475	/*
1476	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1477	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1478	 * microseconds.
1479	 */
1480	metrics->rmx_rtt = ire->ire_metrics.iulp_rtt * 1000;
1481	metrics_set |= RTV_RTT;
1482	metrics->rmx_mtu = ire->ire_metrics.iulp_mtu;
1483	metrics_set |= RTV_MTU;
1484	metrics->rmx_ssthresh = ire->ire_metrics.iulp_ssthresh;
1485	metrics_set |= RTV_SSTHRESH;
1486	metrics->rmx_rttvar = ire->ire_metrics.iulp_rtt_sd * 1000;
1487	metrics_set |= RTV_RTTVAR;
1488	metrics->rmx_sendpipe = ire->ire_metrics.iulp_spipe;
1489	metrics_set |= RTV_SPIPE;
1490	metrics->rmx_recvpipe = ire->ire_metrics.iulp_rpipe;
1491	metrics_set |= RTV_RPIPE;
1492	return (metrics_set);
1493}
1494
1495/*
1496 * Given two sets of metrics (src and dst), use the dst values if they are
1497 * set. If a dst value is not set but the src value is set, then we use
1498 * the src value.
1499 * dst is updated with the new values.
1500 * This is used to merge information from a dce_t and ire_metrics, where the
1501 * dce values takes precedence.
1502 */
1503void
1504rts_merge_metrics(iulp_t *dst, const iulp_t *src)
1505{
1506	if (!src->iulp_set)
1507		return;
1508
1509	if (dst->iulp_ssthresh == 0)
1510		dst->iulp_ssthresh = src->iulp_ssthresh;
1511	if (dst->iulp_rtt == 0)
1512		dst->iulp_rtt = src->iulp_rtt;
1513	if (dst->iulp_rtt_sd == 0)
1514		dst->iulp_rtt_sd = src->iulp_rtt_sd;
1515	if (dst->iulp_spipe == 0)
1516		dst->iulp_spipe = src->iulp_spipe;
1517	if (dst->iulp_rpipe == 0)
1518		dst->iulp_rpipe = src->iulp_rpipe;
1519	if (dst->iulp_rtomax == 0)
1520		dst->iulp_rtomax = src->iulp_rtomax;
1521	if (dst->iulp_sack == 0)
1522		dst->iulp_sack = src->iulp_sack;
1523	if (dst->iulp_tstamp_ok == 0)
1524		dst->iulp_tstamp_ok = src->iulp_tstamp_ok;
1525	if (dst->iulp_wscale_ok == 0)
1526		dst->iulp_wscale_ok = src->iulp_wscale_ok;
1527	if (dst->iulp_ecn_ok == 0)
1528		dst->iulp_ecn_ok = src->iulp_ecn_ok;
1529	if (dst->iulp_pmtud_ok == 0)
1530		dst->iulp_pmtud_ok = src->iulp_pmtud_ok;
1531	if (dst->iulp_mtu == 0)
1532		dst->iulp_mtu = src->iulp_mtu;
1533}
1534
1535
1536/*
1537 * Takes a pointer to a routing message and extracts necessary info by looking
1538 * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers
1539 * passed (all of which must be valid).
1540 *
1541 * The bitmask of sockaddrs actually found in the message is returned, or zero
1542 * is returned in the case of an error.
1543 */
1544static int
1545rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp,
1546    in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp,
1547    in6_addr_t *in_src_addrp, ushort_t *indexp, sa_family_t *afp,
1548    tsol_rtsecattr_t *rtsecattr, int *error)
1549{
1550	struct sockaddr *sa;
1551	int	i;
1552	int	addr_bits;
1553	int	length;
1554	int	found_addrs = 0;
1555	caddr_t	cp;
1556	size_t	size;
1557	struct sockaddr_dl *sdl;
1558
1559	*dst_addrp = ipv6_all_zeros;
1560	*gw_addrp = ipv6_all_zeros;
1561	*net_maskp = ipv6_all_zeros;
1562	*authorp = ipv6_all_zeros;
1563	*if_addrp = ipv6_all_zeros;
1564	*in_src_addrp = ipv6_all_zeros;
1565	*indexp = 0;
1566	*afp = AF_UNSPEC;
1567	rtsecattr->rtsa_cnt = 0;
1568	*error = 0;
1569
1570	/*
1571	 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP,
1572	 * RTA_IFA and RTA_AUTHOR.  The rest will be added as we need them.
1573	 */
1574	cp = (caddr_t)&rtm[1];
1575	length = rtm->rtm_msglen;
1576	for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) {
1577		/*
1578		 * The address family we are working with starts out as
1579		 * AF_UNSPEC, but is set to the one specified with the
1580		 * destination address.
1581		 *
1582		 * If the "working" address family that has been set to
1583		 * something other than AF_UNSPEC, then the address family of
1584		 * subsequent sockaddrs must either be AF_UNSPEC (for
1585		 * compatibility with older programs) or must be the same as our
1586		 * "working" one.
1587		 *
1588		 * This code assumes that RTA_DST (1) comes first in the loop.
1589		 */
1590		sa = (struct sockaddr *)cp;
1591		addr_bits = (rtm->rtm_addrs & (1 << i));
1592		if (addr_bits == 0)
1593			continue;
1594		switch (addr_bits) {
1595		case RTA_DST:
1596			size = rts_copyfromsockaddr(sa, dst_addrp);
1597			*afp = sa->sa_family;
1598			break;
1599		case RTA_GATEWAY:
1600			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1601				return (0);
1602			size = rts_copyfromsockaddr(sa, gw_addrp);
1603			break;
1604		case RTA_NETMASK:
1605			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1606				return (0);
1607			size = rts_copyfromsockaddr(sa, net_maskp);
1608			break;
1609		case RTA_IFP:
1610			if (sa->sa_family != AF_LINK &&
1611			    sa->sa_family != AF_UNSPEC)
1612				return (0);
1613			sdl = (struct sockaddr_dl *)cp;
1614			*indexp = sdl->sdl_index;
1615			size = sizeof (struct sockaddr_dl);
1616			break;
1617		case RTA_SRC:
1618			/* Source address of the incoming packet */
1619			size = rts_copyfromsockaddr(sa, in_src_addrp);
1620			*afp = sa->sa_family;
1621			break;
1622		case RTA_IFA:
1623			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1624				return (0);
1625			size = rts_copyfromsockaddr(sa, if_addrp);
1626			break;
1627		case RTA_AUTHOR:
1628			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1629				return (0);
1630			size = rts_copyfromsockaddr(sa, authorp);
1631			break;
1632		default:
1633			return (0);
1634		}
1635		if (size == 0)
1636			return (0);
1637		cp += size;
1638		found_addrs |= addr_bits;
1639	}
1640
1641	/*
1642	 * Parse the routing message and look for any security-
1643	 * related attributes for the route.  For each valid
1644	 * attribute, allocate/obtain the corresponding kernel
1645	 * route security attributes.
1646	 */
1647	if (((cp - (caddr_t)rtm) < length) && is_system_labeled()) {
1648		*error = tsol_rtsa_init(rtm, rtsecattr, cp);
1649		ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
1650	}
1651
1652	return (found_addrs);
1653}
1654
1655/*
1656 * Fills the message with the given info.
1657 */
1658static void
1659rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask,
1660    ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author,
1661    ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp,
1662    const tsol_gc_t *gc)
1663{
1664	rt_msghdr_t	*rtm;
1665	sin_t		*sin;
1666	size_t		data_size, header_size;
1667	uchar_t		*cp;
1668	int		i;
1669
1670	ASSERT(mp != NULL);
1671	/*
1672	 * First find the type of the message
1673	 * and its length.
1674	 */
1675	header_size = rts_header_msg_size(type);
1676	/*
1677	 * Now find the size of the data
1678	 * that follows the message header.
1679	 */
1680	data_size = rts_data_msg_size(rtm_addrs, AF_INET, gc != NULL ? 1 : 0);
1681
1682	rtm = (rt_msghdr_t *)mp->b_rptr;
1683	mp->b_wptr = &mp->b_rptr[header_size];
1684	cp = mp->b_wptr;
1685	bzero(cp, data_size);
1686	for (i = 0; i < RTA_NUMBITS; i++) {
1687		sin = (sin_t *)cp;
1688		switch (rtm_addrs & (1 << i)) {
1689		case RTA_DST:
1690			sin->sin_addr.s_addr = dst;
1691			sin->sin_family = AF_INET;
1692			cp += sizeof (sin_t);
1693			break;
1694		case RTA_GATEWAY:
1695			sin->sin_addr.s_addr = gateway;
1696			sin->sin_family = AF_INET;
1697			cp += sizeof (sin_t);
1698			break;
1699		case RTA_NETMASK:
1700			sin->sin_addr.s_addr = mask;
1701			sin->sin_family = AF_INET;
1702			cp += sizeof (sin_t);
1703			break;
1704		case RTA_IFP:
1705			cp += ill_dls_info((struct sockaddr_dl *)cp, ill);
1706			break;
1707		case RTA_IFA:
1708			sin->sin_addr.s_addr = ifaddr;
1709			sin->sin_family = AF_INET;
1710			cp += sizeof (sin_t);
1711			break;
1712		case RTA_SRC:
1713			sin->sin_addr.s_addr = src_addr;
1714			sin->sin_family = AF_INET;
1715			cp += sizeof (sin_t);
1716			break;
1717		case RTA_AUTHOR:
1718			sin->sin_addr.s_addr = author;
1719			sin->sin_family = AF_INET;
1720			cp += sizeof (sin_t);
1721			break;
1722		case RTA_BRD:
1723			/*
1724			 * RTA_BRD is used typically to specify a point-to-point
1725			 * destination address.
1726			 */
1727			sin->sin_addr.s_addr = brd_addr;
1728			sin->sin_family = AF_INET;
1729			cp += sizeof (sin_t);
1730			break;
1731		}
1732	}
1733
1734	if (gc != NULL) {
1735		rtm_ext_t *rtm_ext;
1736		struct rtsa_s *rp_dst;
1737		tsol_rtsecattr_t *rsap;
1738
1739		ASSERT(gc->gc_grp != NULL);
1740		ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock));
1741
1742		rtm_ext = (rtm_ext_t *)cp;
1743		rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR;
1744		rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(1);
1745
1746		rsap = (tsol_rtsecattr_t *)(rtm_ext + 1);
1747		rsap->rtsa_cnt = 1;
1748		rp_dst = rsap->rtsa_attr;
1749
1750		ASSERT(gc->gc_db != NULL);
1751		bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst));
1752		cp = (uchar_t *)rp_dst;
1753	}
1754
1755	mp->b_wptr = cp;
1756	mp->b_cont = NULL;
1757	/*
1758	 * set the fields that are common to
1759	 * to different messages.
1760	 */
1761	rtm->rtm_msglen = (short)(header_size + data_size);
1762	rtm->rtm_version = RTM_VERSION;
1763	rtm->rtm_type = (uchar_t)type;
1764}
1765
1766/*
1767 * Allocates and initializes a routing socket message.
1768 * Note that sacnt is either zero or one.
1769 */
1770mblk_t *
1771rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt)
1772{
1773	size_t	length;
1774	mblk_t	*mp;
1775
1776	length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt);
1777	mp = allocb(length, BPRI_MED);
1778	if (mp == NULL)
1779		return (mp);
1780	bzero(mp->b_rptr, length);
1781	return (mp);
1782}
1783
1784/*
1785 * Returns the size of the routing
1786 * socket message header size.
1787 */
1788size_t
1789rts_header_msg_size(int type)
1790{
1791	switch (type) {
1792	case RTM_DELADDR:
1793	case RTM_NEWADDR:
1794		return (sizeof (ifa_msghdr_t));
1795	case RTM_IFINFO:
1796		return (sizeof (if_msghdr_t));
1797	default:
1798		return (sizeof (rt_msghdr_t));
1799	}
1800}
1801
1802/*
1803 * Returns the size of the message needed with the given rtm_addrs and family.
1804 *
1805 * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are
1806 * of the same family (currently either AF_INET or AF_INET6).
1807 */
1808size_t
1809rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt)
1810{
1811	int	i;
1812	size_t	length = 0;
1813
1814	for (i = 0; i < RTA_NUMBITS; i++) {
1815		switch (rtm_addrs & (1 << i)) {
1816		case RTA_IFP:
1817			length += sizeof (struct sockaddr_dl);
1818			break;
1819		case RTA_DST:
1820		case RTA_GATEWAY:
1821		case RTA_NETMASK:
1822		case RTA_SRC:
1823		case RTA_IFA:
1824		case RTA_AUTHOR:
1825		case RTA_BRD:
1826			ASSERT(af == AF_INET || af == AF_INET6);
1827			switch (af) {
1828			case AF_INET:
1829				length += sizeof (sin_t);
1830				break;
1831			case AF_INET6:
1832				length += sizeof (sin6_t);
1833				break;
1834			}
1835			break;
1836		}
1837	}
1838	if (sacnt > 0)
1839		length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt);
1840
1841	return (length);
1842}
1843
1844/*
1845 * This routine is called to generate a message to the routing
1846 * socket indicating that a redirect has occured, a routing lookup
1847 * has failed, or that a protocol has detected timeouts to a particular
1848 * destination. This routine is called for message types RTM_LOSING,
1849 * RTM_REDIRECT, and RTM_MISS.
1850 */
1851void
1852ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask,
1853    ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs,
1854    ip_stack_t *ipst)
1855{
1856	rt_msghdr_t	*rtm;
1857	mblk_t		*mp;
1858
1859	if (rtm_addrs == 0)
1860		return;
1861	mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0);
1862	if (mp == NULL)
1863		return;
1864	rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0,
1865	    author, 0, NULL, mp, NULL);
1866	rtm = (rt_msghdr_t *)mp->b_rptr;
1867	rtm->rtm_flags = flags;
1868	rtm->rtm_errno = error;
1869	rtm->rtm_flags |= RTF_DONE;
1870	rtm->rtm_addrs = rtm_addrs;
1871	rts_queue_input(mp, NULL, AF_INET, RTSQ_ALL, ipst);
1872}
1873
1874/*
1875 * This routine is called to generate a message to the routing
1876 * socket indicating that the status of a network interface has changed.
1877 * Message type generated RTM_IFINFO.
1878 */
1879void
1880ip_rts_ifmsg(const ipif_t *ipif, uint_t flags)
1881{
1882	ip_rts_xifmsg(ipif, 0, 0, flags);
1883}
1884
1885void
1886ip_rts_xifmsg(const ipif_t *ipif, uint64_t set, uint64_t clear, uint_t flags)
1887{
1888	if_msghdr_t	*ifm;
1889	mblk_t		*mp;
1890	sa_family_t	af;
1891	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
1892
1893	/*
1894	 * This message should be generated only
1895	 * when the physical device is changing
1896	 * state.
1897	 */
1898	if (ipif->ipif_id != 0)
1899		return;
1900	if (ipif->ipif_isv6) {
1901		af = AF_INET6;
1902		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1903		if (mp == NULL)
1904			return;
1905		rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros,
1906		    &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
1907		    &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
1908		    ipif->ipif_ill, mp, NULL);
1909	} else {
1910		af = AF_INET;
1911		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1912		if (mp == NULL)
1913			return;
1914		rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, 0,
1915		    ipif->ipif_ill, mp, NULL);
1916	}
1917	ifm = (if_msghdr_t *)mp->b_rptr;
1918	ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1919	ifm->ifm_flags = (ipif->ipif_flags | ipif->ipif_ill->ill_flags |
1920	    ipif->ipif_ill->ill_phyint->phyint_flags | set) & ~clear;
1921	rts_getifdata(&ifm->ifm_data, ipif);
1922	ifm->ifm_addrs = RTA_IFP;
1923
1924	if (flags & RTSQ_DEFAULT) {
1925		flags = RTSQ_ALL;
1926		/*
1927		 * If this message is for an underlying interface, prevent
1928		 * "normal" (IPMP-unaware) routing sockets from seeing it.
1929		 */
1930		if (IS_UNDER_IPMP(ipif->ipif_ill))
1931			flags &= ~RTSQ_NORMAL;
1932	}
1933
1934	rts_queue_input(mp, NULL, af, flags, ipst);
1935}
1936
1937/*
1938 * This is called to generate messages to the routing socket
1939 * indicating a network interface has had addresses associated with it.
1940 * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>.
1941 */
1942void
1943ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif, uint_t flags)
1944{
1945	int		pass;
1946	int		ncmd;
1947	int		rtm_addrs;
1948	mblk_t		*mp;
1949	ifa_msghdr_t	*ifam;
1950	rt_msghdr_t	*rtm;
1951	sa_family_t	af;
1952	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
1953
1954	/*
1955	 * Let conn_ixa caching know that source address selection
1956	 * changed
1957	 */
1958	ip_update_source_selection(ipst);
1959
1960	if (ipif->ipif_isv6)
1961		af = AF_INET6;
1962	else
1963		af = AF_INET;
1964
1965	if (flags & RTSQ_DEFAULT) {
1966		flags = RTSQ_ALL;
1967		/*
1968		 * If this message is for an underlying interface, prevent
1969		 * "normal" (IPMP-unaware) routing sockets from seeing it.
1970		 */
1971		if (IS_UNDER_IPMP(ipif->ipif_ill))
1972			flags &= ~RTSQ_NORMAL;
1973	}
1974
1975	/*
1976	 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR.
1977	 * if the request is ADD, send RTM_NEWADDR and RTM_ADD.
1978	 */
1979	for (pass = 1; pass < 3; pass++) {
1980		if ((cmd == RTM_ADD && pass == 1) ||
1981		    (cmd == RTM_DELETE && pass == 2)) {
1982			ncmd = ((cmd == RTM_ADD) ? RTM_NEWADDR : RTM_DELADDR);
1983
1984			rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP);
1985			mp = rts_alloc_msg(ncmd, rtm_addrs, af, 0);
1986			if (mp == NULL)
1987				continue;
1988			switch (af) {
1989			case AF_INET:
1990				rts_fill_msg(ncmd, rtm_addrs, 0,
1991				    ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr,
1992				    ipif->ipif_pp_dst_addr, 0,
1993				    ipif->ipif_lcl_addr, ipif->ipif_ill,
1994				    mp, NULL);
1995				break;
1996			case AF_INET6:
1997				rts_fill_msg_v6(ncmd, rtm_addrs,
1998				    &ipv6_all_zeros, &ipif->ipif_v6net_mask,
1999				    &ipv6_all_zeros, &ipif->ipif_v6lcl_addr,
2000				    &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
2001				    &ipif->ipif_v6lcl_addr, ipif->ipif_ill,
2002				    mp, NULL);
2003				break;
2004			}
2005			ifam = (ifa_msghdr_t *)mp->b_rptr;
2006			ifam->ifam_index =
2007			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
2008			ifam->ifam_metric = ipif->ipif_metric;
2009			ifam->ifam_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
2010			ifam->ifam_addrs = rtm_addrs;
2011			rts_queue_input(mp, NULL, af, flags, ipst);
2012		}
2013		if ((cmd == RTM_ADD && pass == 2) ||
2014		    (cmd == RTM_DELETE && pass == 1)) {
2015			rtm_addrs = (RTA_DST | RTA_NETMASK);
2016			mp = rts_alloc_msg(cmd, rtm_addrs, af, 0);
2017			if (mp == NULL)
2018				continue;
2019			switch (af) {
2020			case AF_INET:
2021				rts_fill_msg(cmd, rtm_addrs,
2022				    ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0,
2023				    0, 0, 0, 0, NULL, mp, NULL);
2024				break;
2025			case AF_INET6:
2026				rts_fill_msg_v6(cmd, rtm_addrs,
2027				    &ipif->ipif_v6lcl_addr,
2028				    &ipif->ipif_v6net_mask, &ipv6_all_zeros,
2029				    &ipv6_all_zeros, &ipv6_all_zeros,
2030				    &ipv6_all_zeros, &ipv6_all_zeros,
2031				    NULL, mp, NULL);
2032				break;
2033			}
2034			rtm = (rt_msghdr_t *)mp->b_rptr;
2035			rtm->rtm_index =
2036			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
2037			rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
2038			rtm->rtm_errno = error;
2039			if (error == 0)
2040				rtm->rtm_flags |= RTF_DONE;
2041			rtm->rtm_addrs = rtm_addrs;
2042			rts_queue_input(mp, NULL, af, flags, ipst);
2043		}
2044	}
2045}
2046
2047/*
2048 * Based on the address family specified in a sockaddr, copy the address field
2049 * into an in6_addr_t.
2050 *
2051 * In the case of AF_UNSPEC, we assume the family is actually AF_INET for
2052 * compatibility with programs that leave the family cleared in the sockaddr.
2053 * Callers of rts_copyfromsockaddr should check the family themselves if they
2054 * wish to verify its value.
2055 *
2056 * In the case of AF_INET6, a check is made to ensure that address is not an
2057 * IPv4-mapped address.
2058 */
2059size_t
2060rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp)
2061{
2062	switch (sa->sa_family) {
2063	case AF_INET:
2064	case AF_UNSPEC:
2065		IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp);
2066		return (sizeof (sin_t));
2067	case AF_INET6:
2068		*addrp = ((sin6_t *)sa)->sin6_addr;
2069		if (IN6_IS_ADDR_V4MAPPED(addrp))
2070			return (0);
2071		return (sizeof (sin6_t));
2072	default:
2073		return (0);
2074	}
2075}
2076