rtsock.c revision 331722
1/*-
2 * Copyright (c) 1988, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
30 * $FreeBSD: stable/11/sys/net/rtsock.c 331722 2018-03-29 02:50:57Z eadler $
31 */
32#include "opt_compat.h"
33#include "opt_mpath.h"
34#include "opt_inet.h"
35#include "opt_inet6.h"
36
37#include <sys/param.h>
38#include <sys/jail.h>
39#include <sys/kernel.h>
40#include <sys/domain.h>
41#include <sys/lock.h>
42#include <sys/malloc.h>
43#include <sys/mbuf.h>
44#include <sys/priv.h>
45#include <sys/proc.h>
46#include <sys/protosw.h>
47#include <sys/rwlock.h>
48#include <sys/signalvar.h>
49#include <sys/socket.h>
50#include <sys/socketvar.h>
51#include <sys/sysctl.h>
52#include <sys/systm.h>
53
54#include <net/if.h>
55#include <net/if_var.h>
56#include <net/if_dl.h>
57#include <net/if_llatbl.h>
58#include <net/if_types.h>
59#include <net/netisr.h>
60#include <net/raw_cb.h>
61#include <net/route.h>
62#include <net/route_var.h>
63#include <net/vnet.h>
64
65#include <netinet/in.h>
66#include <netinet/if_ether.h>
67#include <netinet/ip_carp.h>
68#ifdef INET6
69#include <netinet6/ip6_var.h>
70#include <netinet6/scope6_var.h>
71#endif
72
73#ifdef COMPAT_FREEBSD32
74#include <sys/mount.h>
75#include <compat/freebsd32/freebsd32.h>
76
77struct if_msghdr32 {
78	uint16_t ifm_msglen;
79	uint8_t	ifm_version;
80	uint8_t	ifm_type;
81	int32_t	ifm_addrs;
82	int32_t	ifm_flags;
83	uint16_t ifm_index;
84	struct	if_data ifm_data;
85};
86
87struct if_msghdrl32 {
88	uint16_t ifm_msglen;
89	uint8_t	ifm_version;
90	uint8_t	ifm_type;
91	int32_t	ifm_addrs;
92	int32_t	ifm_flags;
93	uint16_t ifm_index;
94	uint16_t _ifm_spare1;
95	uint16_t ifm_len;
96	uint16_t ifm_data_off;
97	struct	if_data ifm_data;
98};
99
100struct ifa_msghdrl32 {
101	uint16_t ifam_msglen;
102	uint8_t	ifam_version;
103	uint8_t	ifam_type;
104	int32_t	ifam_addrs;
105	int32_t	ifam_flags;
106	uint16_t ifam_index;
107	uint16_t _ifam_spare1;
108	uint16_t ifam_len;
109	uint16_t ifam_data_off;
110	int32_t	ifam_metric;
111	struct	if_data ifam_data;
112};
113
114#define SA_SIZE32(sa)						\
115    (  (((struct sockaddr *)(sa))->sa_len == 0) ?		\
116	sizeof(int)		:				\
117	1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(int) - 1) ) )
118
119#endif /* COMPAT_FREEBSD32 */
120
121MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
122
123/* NB: these are not modified */
124static struct	sockaddr route_src = { 2, PF_ROUTE, };
125static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
126
127/* These are external hooks for CARP. */
128int	(*carp_get_vhid_p)(struct ifaddr *);
129
130/*
131 * Used by rtsock/raw_input callback code to decide whether to filter the update
132 * notification to a socket bound to a particular FIB.
133 */
134#define	RTS_FILTER_FIB	M_PROTO8
135
136typedef struct {
137	int	ip_count;	/* attached w/ AF_INET */
138	int	ip6_count;	/* attached w/ AF_INET6 */
139	int	any_count;	/* total attached */
140} route_cb_t;
141static VNET_DEFINE(route_cb_t, route_cb);
142#define	V_route_cb VNET(route_cb)
143
144struct mtx rtsock_mtx;
145MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
146
147#define	RTSOCK_LOCK()	mtx_lock(&rtsock_mtx)
148#define	RTSOCK_UNLOCK()	mtx_unlock(&rtsock_mtx)
149#define	RTSOCK_LOCK_ASSERT()	mtx_assert(&rtsock_mtx, MA_OWNED)
150
151static SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
152
153struct walkarg {
154	int	w_tmemsize;
155	int	w_op, w_arg;
156	caddr_t	w_tmem;
157	struct sysctl_req *w_req;
158};
159
160static void	rts_input(struct mbuf *m);
161static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo);
162static int	rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo,
163			struct walkarg *w, int *plen);
164static int	rt_xaddrs(caddr_t cp, caddr_t cplim,
165			struct rt_addrinfo *rtinfo);
166static int	sysctl_dumpentry(struct radix_node *rn, void *vw);
167static int	sysctl_iflist(int af, struct walkarg *w);
168static int	sysctl_ifmalist(int af, struct walkarg *w);
169static int	route_output(struct mbuf *m, struct socket *so, ...);
170static void	rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out);
171static void	rt_dispatch(struct mbuf *, sa_family_t);
172static struct sockaddr	*rtsock_fix_netmask(struct sockaddr *dst,
173			struct sockaddr *smask, struct sockaddr_storage *dmask);
174
175static struct netisr_handler rtsock_nh = {
176	.nh_name = "rtsock",
177	.nh_handler = rts_input,
178	.nh_proto = NETISR_ROUTE,
179	.nh_policy = NETISR_POLICY_SOURCE,
180};
181
182static int
183sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
184{
185	int error, qlimit;
186
187	netisr_getqlimit(&rtsock_nh, &qlimit);
188	error = sysctl_handle_int(oidp, &qlimit, 0, req);
189        if (error || !req->newptr)
190                return (error);
191	if (qlimit < 1)
192		return (EINVAL);
193	return (netisr_setqlimit(&rtsock_nh, qlimit));
194}
195SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
196    0, 0, sysctl_route_netisr_maxqlen, "I",
197    "maximum routing socket dispatch queue length");
198
199static void
200vnet_rts_init(void)
201{
202	int tmp;
203
204	if (IS_DEFAULT_VNET(curvnet)) {
205		if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
206			rtsock_nh.nh_qlimit = tmp;
207		netisr_register(&rtsock_nh);
208	}
209#ifdef VIMAGE
210	 else
211		netisr_register_vnet(&rtsock_nh);
212#endif
213}
214VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
215    vnet_rts_init, 0);
216
217#ifdef VIMAGE
218static void
219vnet_rts_uninit(void)
220{
221
222	netisr_unregister_vnet(&rtsock_nh);
223}
224VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
225    vnet_rts_uninit, 0);
226#endif
227
228static int
229raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src,
230    struct rawcb *rp)
231{
232	int fibnum;
233
234	KASSERT(m != NULL, ("%s: m is NULL", __func__));
235	KASSERT(proto != NULL, ("%s: proto is NULL", __func__));
236	KASSERT(rp != NULL, ("%s: rp is NULL", __func__));
237
238	/* No filtering requested. */
239	if ((m->m_flags & RTS_FILTER_FIB) == 0)
240		return (0);
241
242	/* Check if it is a rts and the fib matches the one of the socket. */
243	fibnum = M_GETFIB(m);
244	if (proto->sp_family != PF_ROUTE ||
245	    rp->rcb_socket == NULL ||
246	    rp->rcb_socket->so_fibnum == fibnum)
247		return (0);
248
249	/* Filtering requested and no match, the socket shall be skipped. */
250	return (1);
251}
252
253static void
254rts_input(struct mbuf *m)
255{
256	struct sockproto route_proto;
257	unsigned short *family;
258	struct m_tag *tag;
259
260	route_proto.sp_family = PF_ROUTE;
261	tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
262	if (tag != NULL) {
263		family = (unsigned short *)(tag + 1);
264		route_proto.sp_protocol = *family;
265		m_tag_delete(m, tag);
266	} else
267		route_proto.sp_protocol = 0;
268
269	raw_input_ext(m, &route_proto, &route_src, raw_input_rts_cb);
270}
271
272/*
273 * It really doesn't make any sense at all for this code to share much
274 * with raw_usrreq.c, since its functionality is so restricted.  XXX
275 */
276static void
277rts_abort(struct socket *so)
278{
279
280	raw_usrreqs.pru_abort(so);
281}
282
283static void
284rts_close(struct socket *so)
285{
286
287	raw_usrreqs.pru_close(so);
288}
289
290/* pru_accept is EOPNOTSUPP */
291
292static int
293rts_attach(struct socket *so, int proto, struct thread *td)
294{
295	struct rawcb *rp;
296	int error;
297
298	KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
299
300	/* XXX */
301	rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
302
303	so->so_pcb = (caddr_t)rp;
304	so->so_fibnum = td->td_proc->p_fibnum;
305	error = raw_attach(so, proto);
306	rp = sotorawcb(so);
307	if (error) {
308		so->so_pcb = NULL;
309		free(rp, M_PCB);
310		return error;
311	}
312	RTSOCK_LOCK();
313	switch(rp->rcb_proto.sp_protocol) {
314	case AF_INET:
315		V_route_cb.ip_count++;
316		break;
317	case AF_INET6:
318		V_route_cb.ip6_count++;
319		break;
320	}
321	V_route_cb.any_count++;
322	RTSOCK_UNLOCK();
323	soisconnected(so);
324	so->so_options |= SO_USELOOPBACK;
325	return 0;
326}
327
328static int
329rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
330{
331
332	return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
333}
334
335static int
336rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
337{
338
339	return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
340}
341
342/* pru_connect2 is EOPNOTSUPP */
343/* pru_control is EOPNOTSUPP */
344
345static void
346rts_detach(struct socket *so)
347{
348	struct rawcb *rp = sotorawcb(so);
349
350	KASSERT(rp != NULL, ("rts_detach: rp == NULL"));
351
352	RTSOCK_LOCK();
353	switch(rp->rcb_proto.sp_protocol) {
354	case AF_INET:
355		V_route_cb.ip_count--;
356		break;
357	case AF_INET6:
358		V_route_cb.ip6_count--;
359		break;
360	}
361	V_route_cb.any_count--;
362	RTSOCK_UNLOCK();
363	raw_usrreqs.pru_detach(so);
364}
365
366static int
367rts_disconnect(struct socket *so)
368{
369
370	return (raw_usrreqs.pru_disconnect(so));
371}
372
373/* pru_listen is EOPNOTSUPP */
374
375static int
376rts_peeraddr(struct socket *so, struct sockaddr **nam)
377{
378
379	return (raw_usrreqs.pru_peeraddr(so, nam));
380}
381
382/* pru_rcvd is EOPNOTSUPP */
383/* pru_rcvoob is EOPNOTSUPP */
384
385static int
386rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
387	 struct mbuf *control, struct thread *td)
388{
389
390	return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
391}
392
393/* pru_sense is null */
394
395static int
396rts_shutdown(struct socket *so)
397{
398
399	return (raw_usrreqs.pru_shutdown(so));
400}
401
402static int
403rts_sockaddr(struct socket *so, struct sockaddr **nam)
404{
405
406	return (raw_usrreqs.pru_sockaddr(so, nam));
407}
408
409static struct pr_usrreqs route_usrreqs = {
410	.pru_abort =		rts_abort,
411	.pru_attach =		rts_attach,
412	.pru_bind =		rts_bind,
413	.pru_connect =		rts_connect,
414	.pru_detach =		rts_detach,
415	.pru_disconnect =	rts_disconnect,
416	.pru_peeraddr =		rts_peeraddr,
417	.pru_send =		rts_send,
418	.pru_shutdown =		rts_shutdown,
419	.pru_sockaddr =		rts_sockaddr,
420	.pru_close =		rts_close,
421};
422
423#ifndef _SOCKADDR_UNION_DEFINED
424#define	_SOCKADDR_UNION_DEFINED
425/*
426 * The union of all possible address formats we handle.
427 */
428union sockaddr_union {
429	struct sockaddr		sa;
430	struct sockaddr_in	sin;
431	struct sockaddr_in6	sin6;
432};
433#endif /* _SOCKADDR_UNION_DEFINED */
434
435static int
436rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
437    struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
438{
439
440	/* First, see if the returned address is part of the jail. */
441	if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
442		info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
443		return (0);
444	}
445
446	switch (info->rti_info[RTAX_DST]->sa_family) {
447#ifdef INET
448	case AF_INET:
449	{
450		struct in_addr ia;
451		struct ifaddr *ifa;
452		int found;
453
454		found = 0;
455		/*
456		 * Try to find an address on the given outgoing interface
457		 * that belongs to the jail.
458		 */
459		IF_ADDR_RLOCK(ifp);
460		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
461			struct sockaddr *sa;
462			sa = ifa->ifa_addr;
463			if (sa->sa_family != AF_INET)
464				continue;
465			ia = ((struct sockaddr_in *)sa)->sin_addr;
466			if (prison_check_ip4(cred, &ia) == 0) {
467				found = 1;
468				break;
469			}
470		}
471		IF_ADDR_RUNLOCK(ifp);
472		if (!found) {
473			/*
474			 * As a last resort return the 'default' jail address.
475			 */
476			ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->
477			    sin_addr;
478			if (prison_get_ip4(cred, &ia) != 0)
479				return (ESRCH);
480		}
481		bzero(&saun->sin, sizeof(struct sockaddr_in));
482		saun->sin.sin_len = sizeof(struct sockaddr_in);
483		saun->sin.sin_family = AF_INET;
484		saun->sin.sin_addr.s_addr = ia.s_addr;
485		info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
486		break;
487	}
488#endif
489#ifdef INET6
490	case AF_INET6:
491	{
492		struct in6_addr ia6;
493		struct ifaddr *ifa;
494		int found;
495
496		found = 0;
497		/*
498		 * Try to find an address on the given outgoing interface
499		 * that belongs to the jail.
500		 */
501		IF_ADDR_RLOCK(ifp);
502		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
503			struct sockaddr *sa;
504			sa = ifa->ifa_addr;
505			if (sa->sa_family != AF_INET6)
506				continue;
507			bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
508			    &ia6, sizeof(struct in6_addr));
509			if (prison_check_ip6(cred, &ia6) == 0) {
510				found = 1;
511				break;
512			}
513		}
514		IF_ADDR_RUNLOCK(ifp);
515		if (!found) {
516			/*
517			 * As a last resort return the 'default' jail address.
518			 */
519			ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)->
520			    sin6_addr;
521			if (prison_get_ip6(cred, &ia6) != 0)
522				return (ESRCH);
523		}
524		bzero(&saun->sin6, sizeof(struct sockaddr_in6));
525		saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
526		saun->sin6.sin6_family = AF_INET6;
527		bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
528		if (sa6_recoverscope(&saun->sin6) != 0)
529			return (ESRCH);
530		info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
531		break;
532	}
533#endif
534	default:
535		return (ESRCH);
536	}
537	return (0);
538}
539
540/*ARGSUSED*/
541static int
542route_output(struct mbuf *m, struct socket *so, ...)
543{
544	struct rt_msghdr *rtm = NULL;
545	struct rtentry *rt = NULL;
546	struct rib_head *rnh;
547	struct rt_addrinfo info;
548	struct sockaddr_storage ss;
549#ifdef INET6
550	struct sockaddr_in6 *sin6;
551	int i, rti_need_deembed = 0;
552#endif
553	int alloc_len = 0, len, error = 0, fibnum;
554	struct ifnet *ifp = NULL;
555	union sockaddr_union saun;
556	sa_family_t saf = AF_UNSPEC;
557	struct rawcb *rp = NULL;
558	struct walkarg w;
559
560	fibnum = so->so_fibnum;
561
562#define senderr(e) { error = e; goto flush;}
563	if (m == NULL || ((m->m_len < sizeof(long)) &&
564		       (m = m_pullup(m, sizeof(long))) == NULL))
565		return (ENOBUFS);
566	if ((m->m_flags & M_PKTHDR) == 0)
567		panic("route_output");
568	len = m->m_pkthdr.len;
569	if (len < sizeof(*rtm) ||
570	    len != mtod(m, struct rt_msghdr *)->rtm_msglen)
571		senderr(EINVAL);
572
573	/*
574	 * Most of current messages are in range 200-240 bytes,
575	 * minimize possible re-allocation on reply using larger size
576	 * buffer aligned on 1k boundaty.
577	 */
578	alloc_len = roundup2(len, 1024);
579	if ((rtm = malloc(alloc_len, M_TEMP, M_NOWAIT)) == NULL)
580		senderr(ENOBUFS);
581
582	m_copydata(m, 0, len, (caddr_t)rtm);
583	bzero(&info, sizeof(info));
584	bzero(&w, sizeof(w));
585
586	if (rtm->rtm_version != RTM_VERSION) {
587		/* Do not touch message since format is unknown */
588		free(rtm, M_TEMP);
589		rtm = NULL;
590		senderr(EPROTONOSUPPORT);
591	}
592
593	/*
594	 * Starting from here, it is possible
595	 * to alter original message and insert
596	 * caller PID and error value.
597	 */
598
599	rtm->rtm_pid = curproc->p_pid;
600	info.rti_addrs = rtm->rtm_addrs;
601
602	info.rti_mflags = rtm->rtm_inits;
603	info.rti_rmx = &rtm->rtm_rmx;
604
605	/*
606	 * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
607	 * link-local address because rtrequest requires addresses with
608	 * embedded scope id.
609	 */
610	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info))
611		senderr(EINVAL);
612
613	info.rti_flags = rtm->rtm_flags;
614	if (info.rti_info[RTAX_DST] == NULL ||
615	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
616	    (info.rti_info[RTAX_GATEWAY] != NULL &&
617	     info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
618		senderr(EINVAL);
619	saf = info.rti_info[RTAX_DST]->sa_family;
620	/*
621	 * Verify that the caller has the appropriate privilege; RTM_GET
622	 * is the only operation the non-superuser is allowed.
623	 */
624	if (rtm->rtm_type != RTM_GET) {
625		error = priv_check(curthread, PRIV_NET_ROUTE);
626		if (error)
627			senderr(error);
628	}
629
630	/*
631	 * The given gateway address may be an interface address.
632	 * For example, issuing a "route change" command on a route
633	 * entry that was created from a tunnel, and the gateway
634	 * address given is the local end point. In this case the
635	 * RTF_GATEWAY flag must be cleared or the destination will
636	 * not be reachable even though there is no error message.
637	 */
638	if (info.rti_info[RTAX_GATEWAY] != NULL &&
639	    info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
640		struct rt_addrinfo ginfo;
641		struct sockaddr *gdst;
642
643		bzero(&ginfo, sizeof(ginfo));
644		bzero(&ss, sizeof(ss));
645		ss.ss_len = sizeof(ss);
646
647		ginfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&ss;
648		gdst = info.rti_info[RTAX_GATEWAY];
649
650		/*
651		 * A host route through the loopback interface is
652		 * installed for each interface adddress. In pre 8.0
653		 * releases the interface address of a PPP link type
654		 * is not reachable locally. This behavior is fixed as
655		 * part of the new L2/L3 redesign and rewrite work. The
656		 * signature of this interface address route is the
657		 * AF_LINK sa_family type of the rt_gateway, and the
658		 * rt_ifp has the IFF_LOOPBACK flag set.
659		 */
660		if (rib_lookup_info(fibnum, gdst, NHR_REF, 0, &ginfo) == 0) {
661			if (ss.ss_family == AF_LINK &&
662			    ginfo.rti_ifp->if_flags & IFF_LOOPBACK) {
663				info.rti_flags &= ~RTF_GATEWAY;
664				info.rti_flags |= RTF_GWFLAG_COMPAT;
665			}
666			rib_free_info(&ginfo);
667		}
668	}
669
670	switch (rtm->rtm_type) {
671		struct rtentry *saved_nrt;
672
673	case RTM_ADD:
674	case RTM_CHANGE:
675		if (info.rti_info[RTAX_GATEWAY] == NULL)
676			senderr(EINVAL);
677		saved_nrt = NULL;
678
679		/* support for new ARP code */
680		if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
681		    (rtm->rtm_flags & RTF_LLDATA) != 0) {
682			error = lla_rt_output(rtm, &info);
683#ifdef INET6
684			if (error == 0)
685				rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
686#endif
687			break;
688		}
689		error = rtrequest1_fib(rtm->rtm_type, &info, &saved_nrt,
690		    fibnum);
691		if (error == 0 && saved_nrt != NULL) {
692#ifdef INET6
693			rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
694#endif
695			RT_LOCK(saved_nrt);
696			rtm->rtm_index = saved_nrt->rt_ifp->if_index;
697			RT_REMREF(saved_nrt);
698			RT_UNLOCK(saved_nrt);
699		}
700		break;
701
702	case RTM_DELETE:
703		saved_nrt = NULL;
704		/* support for new ARP code */
705		if (info.rti_info[RTAX_GATEWAY] &&
706		    (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
707		    (rtm->rtm_flags & RTF_LLDATA) != 0) {
708			error = lla_rt_output(rtm, &info);
709#ifdef INET6
710			if (error == 0)
711				rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
712#endif
713			break;
714		}
715		error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, fibnum);
716		if (error == 0) {
717			RT_LOCK(saved_nrt);
718			rt = saved_nrt;
719			goto report;
720		}
721#ifdef INET6
722		/* rt_msg2() will not be used when RTM_DELETE fails. */
723		rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
724#endif
725		break;
726
727	case RTM_GET:
728		rnh = rt_tables_get_rnh(fibnum, saf);
729		if (rnh == NULL)
730			senderr(EAFNOSUPPORT);
731
732		RIB_RLOCK(rnh);
733
734		if (info.rti_info[RTAX_NETMASK] == NULL &&
735		    rtm->rtm_type == RTM_GET) {
736			/*
737			 * Provide longest prefix match for
738			 * address lookup (no mask).
739			 * 'route -n get addr'
740			 */
741			rt = (struct rtentry *) rnh->rnh_matchaddr(
742			    info.rti_info[RTAX_DST], &rnh->head);
743		} else
744			rt = (struct rtentry *) rnh->rnh_lookup(
745			    info.rti_info[RTAX_DST],
746			    info.rti_info[RTAX_NETMASK], &rnh->head);
747
748		if (rt == NULL) {
749			RIB_RUNLOCK(rnh);
750			senderr(ESRCH);
751		}
752#ifdef RADIX_MPATH
753		/*
754		 * for RTM_CHANGE/LOCK, if we got multipath routes,
755		 * we require users to specify a matching RTAX_GATEWAY.
756		 *
757		 * for RTM_GET, gate is optional even with multipath.
758		 * if gate == NULL the first match is returned.
759		 * (no need to call rt_mpath_matchgate if gate == NULL)
760		 */
761		if (rt_mpath_capable(rnh) &&
762		    (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
763			rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
764			if (!rt) {
765				RIB_RUNLOCK(rnh);
766				senderr(ESRCH);
767			}
768		}
769#endif
770		/*
771		 * If performing proxied L2 entry insertion, and
772		 * the actual PPP host entry is found, perform
773		 * another search to retrieve the prefix route of
774		 * the local end point of the PPP link.
775		 */
776		if (rtm->rtm_flags & RTF_ANNOUNCE) {
777			struct sockaddr laddr;
778
779			if (rt->rt_ifp != NULL &&
780			    rt->rt_ifp->if_type == IFT_PROPVIRTUAL) {
781				struct ifaddr *ifa;
782
783				ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1,
784						RT_ALL_FIBS);
785				if (ifa != NULL)
786					rt_maskedcopy(ifa->ifa_addr,
787						      &laddr,
788						      ifa->ifa_netmask);
789			} else
790				rt_maskedcopy(rt->rt_ifa->ifa_addr,
791					      &laddr,
792					      rt->rt_ifa->ifa_netmask);
793			/*
794			 * refactor rt and no lock operation necessary
795			 */
796			rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr,
797			    &rnh->head);
798			if (rt == NULL) {
799				RIB_RUNLOCK(rnh);
800				senderr(ESRCH);
801			}
802		}
803		RT_LOCK(rt);
804		RT_ADDREF(rt);
805		RIB_RUNLOCK(rnh);
806
807report:
808		RT_LOCK_ASSERT(rt);
809		if ((rt->rt_flags & RTF_HOST) == 0
810		    ? jailed_without_vnet(curthread->td_ucred)
811		    : prison_if(curthread->td_ucred,
812		    rt_key(rt)) != 0) {
813			RT_UNLOCK(rt);
814			senderr(ESRCH);
815		}
816		info.rti_info[RTAX_DST] = rt_key(rt);
817		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
818		info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
819		    rt_mask(rt), &ss);
820		info.rti_info[RTAX_GENMASK] = 0;
821		if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
822			ifp = rt->rt_ifp;
823			if (ifp) {
824				info.rti_info[RTAX_IFP] =
825				    ifp->if_addr->ifa_addr;
826				error = rtm_get_jailed(&info, ifp, rt,
827				    &saun, curthread->td_ucred);
828				if (error != 0) {
829					RT_UNLOCK(rt);
830					senderr(error);
831				}
832				if (ifp->if_flags & IFF_POINTOPOINT)
833					info.rti_info[RTAX_BRD] =
834					    rt->rt_ifa->ifa_dstaddr;
835				rtm->rtm_index = ifp->if_index;
836			} else {
837				info.rti_info[RTAX_IFP] = NULL;
838				info.rti_info[RTAX_IFA] = NULL;
839			}
840		} else if ((ifp = rt->rt_ifp) != NULL) {
841			rtm->rtm_index = ifp->if_index;
842		}
843
844		/* Check if we need to realloc storage */
845		rtsock_msg_buffer(rtm->rtm_type, &info, NULL, &len);
846		if (len > alloc_len) {
847			struct rt_msghdr *new_rtm;
848			new_rtm = malloc(len, M_TEMP, M_NOWAIT);
849			if (new_rtm == NULL) {
850				RT_UNLOCK(rt);
851				senderr(ENOBUFS);
852			}
853			bcopy(rtm, new_rtm, rtm->rtm_msglen);
854			free(rtm, M_TEMP);
855			rtm = new_rtm;
856			alloc_len = len;
857		}
858
859		w.w_tmem = (caddr_t)rtm;
860		w.w_tmemsize = alloc_len;
861		rtsock_msg_buffer(rtm->rtm_type, &info, &w, &len);
862
863		if (rt->rt_flags & RTF_GWFLAG_COMPAT)
864			rtm->rtm_flags = RTF_GATEWAY |
865				(rt->rt_flags & ~RTF_GWFLAG_COMPAT);
866		else
867			rtm->rtm_flags = rt->rt_flags;
868		rt_getmetrics(rt, &rtm->rtm_rmx);
869		rtm->rtm_addrs = info.rti_addrs;
870
871		RT_UNLOCK(rt);
872		break;
873
874	default:
875		senderr(EOPNOTSUPP);
876	}
877
878flush:
879	if (rt != NULL)
880		RTFREE(rt);
881	/*
882	 * Check to see if we don't want our own messages.
883	 */
884	if ((so->so_options & SO_USELOOPBACK) == 0) {
885		if (V_route_cb.any_count <= 1) {
886			if (rtm != NULL)
887				free(rtm, M_TEMP);
888			m_freem(m);
889			return (error);
890		}
891		/* There is another listener, so construct message */
892		rp = sotorawcb(so);
893	}
894
895	if (rtm != NULL) {
896#ifdef INET6
897		if (rti_need_deembed) {
898			/* sin6_scope_id is recovered before sending rtm. */
899			sin6 = (struct sockaddr_in6 *)&ss;
900			for (i = 0; i < RTAX_MAX; i++) {
901				if (info.rti_info[i] == NULL)
902					continue;
903				if (info.rti_info[i]->sa_family != AF_INET6)
904					continue;
905				bcopy(info.rti_info[i], sin6, sizeof(*sin6));
906				if (sa6_recoverscope(sin6) == 0)
907					bcopy(sin6, info.rti_info[i],
908						    sizeof(*sin6));
909			}
910		}
911#endif
912		if (error != 0)
913			rtm->rtm_errno = error;
914		else
915			rtm->rtm_flags |= RTF_DONE;
916
917		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
918		if (m->m_pkthdr.len < rtm->rtm_msglen) {
919			m_freem(m);
920			m = NULL;
921		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
922			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
923
924		free(rtm, M_TEMP);
925	}
926	if (m != NULL) {
927		M_SETFIB(m, fibnum);
928		m->m_flags |= RTS_FILTER_FIB;
929		if (rp) {
930			/*
931			 * XXX insure we don't get a copy by
932			 * invalidating our protocol
933			 */
934			unsigned short family = rp->rcb_proto.sp_family;
935			rp->rcb_proto.sp_family = 0;
936			rt_dispatch(m, saf);
937			rp->rcb_proto.sp_family = family;
938		} else
939			rt_dispatch(m, saf);
940	}
941
942	return (error);
943}
944
945static void
946rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
947{
948
949	bzero(out, sizeof(*out));
950	out->rmx_mtu = rt->rt_mtu;
951	out->rmx_weight = rt->rt_weight;
952	out->rmx_pksent = counter_u64_fetch(rt->rt_pksent);
953	/* Kernel -> userland timebase conversion. */
954	out->rmx_expire = rt->rt_expire ?
955	    rt->rt_expire - time_uptime + time_second : 0;
956}
957
958/*
959 * Extract the addresses of the passed sockaddrs.
960 * Do a little sanity checking so as to avoid bad memory references.
961 * This data is derived straight from userland.
962 */
963static int
964rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
965{
966	struct sockaddr *sa;
967	int i;
968
969	for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
970		if ((rtinfo->rti_addrs & (1 << i)) == 0)
971			continue;
972		sa = (struct sockaddr *)cp;
973		/*
974		 * It won't fit.
975		 */
976		if (cp + sa->sa_len > cplim)
977			return (EINVAL);
978		/*
979		 * there are no more.. quit now
980		 * If there are more bits, they are in error.
981		 * I've seen this. route(1) can evidently generate these.
982		 * This causes kernel to core dump.
983		 * for compatibility, If we see this, point to a safe address.
984		 */
985		if (sa->sa_len == 0) {
986			rtinfo->rti_info[i] = &sa_zero;
987			return (0); /* should be EINVAL but for compat */
988		}
989		/* accept it */
990#ifdef INET6
991		if (sa->sa_family == AF_INET6)
992			sa6_embedscope((struct sockaddr_in6 *)sa,
993			    V_ip6_use_defzone);
994#endif
995		rtinfo->rti_info[i] = sa;
996		cp += SA_SIZE(sa);
997	}
998	return (0);
999}
1000
1001/*
1002 * Fill in @dmask with valid netmask leaving original @smask
1003 * intact. Mostly used with radix netmasks.
1004 */
1005static struct sockaddr *
1006rtsock_fix_netmask(struct sockaddr *dst, struct sockaddr *smask,
1007    struct sockaddr_storage *dmask)
1008{
1009	if (dst == NULL || smask == NULL)
1010		return (NULL);
1011
1012	memset(dmask, 0, dst->sa_len);
1013	memcpy(dmask, smask, smask->sa_len);
1014	dmask->ss_len = dst->sa_len;
1015	dmask->ss_family = dst->sa_family;
1016
1017	return ((struct sockaddr *)dmask);
1018}
1019
1020/*
1021 * Writes information related to @rtinfo object to newly-allocated mbuf.
1022 * Assumes MCLBYTES is enough to construct any message.
1023 * Used for OS notifications of vaious events (if/ifa announces,etc)
1024 *
1025 * Returns allocated mbuf or NULL on failure.
1026 */
1027static struct mbuf *
1028rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
1029{
1030	struct rt_msghdr *rtm;
1031	struct mbuf *m;
1032	int i;
1033	struct sockaddr *sa;
1034#ifdef INET6
1035	struct sockaddr_storage ss;
1036	struct sockaddr_in6 *sin6;
1037#endif
1038	int len, dlen;
1039
1040	switch (type) {
1041
1042	case RTM_DELADDR:
1043	case RTM_NEWADDR:
1044		len = sizeof(struct ifa_msghdr);
1045		break;
1046
1047	case RTM_DELMADDR:
1048	case RTM_NEWMADDR:
1049		len = sizeof(struct ifma_msghdr);
1050		break;
1051
1052	case RTM_IFINFO:
1053		len = sizeof(struct if_msghdr);
1054		break;
1055
1056	case RTM_IFANNOUNCE:
1057	case RTM_IEEE80211:
1058		len = sizeof(struct if_announcemsghdr);
1059		break;
1060
1061	default:
1062		len = sizeof(struct rt_msghdr);
1063	}
1064
1065	/* XXXGL: can we use MJUMPAGESIZE cluster here? */
1066	KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
1067	if (len > MHLEN)
1068		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1069	else
1070		m = m_gethdr(M_NOWAIT, MT_DATA);
1071	if (m == NULL)
1072		return (m);
1073
1074	m->m_pkthdr.len = m->m_len = len;
1075	rtm = mtod(m, struct rt_msghdr *);
1076	bzero((caddr_t)rtm, len);
1077	for (i = 0; i < RTAX_MAX; i++) {
1078		if ((sa = rtinfo->rti_info[i]) == NULL)
1079			continue;
1080		rtinfo->rti_addrs |= (1 << i);
1081		dlen = SA_SIZE(sa);
1082#ifdef INET6
1083		if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
1084			sin6 = (struct sockaddr_in6 *)&ss;
1085			bcopy(sa, sin6, sizeof(*sin6));
1086			if (sa6_recoverscope(sin6) == 0)
1087				sa = (struct sockaddr *)sin6;
1088		}
1089#endif
1090		m_copyback(m, len, dlen, (caddr_t)sa);
1091		len += dlen;
1092	}
1093	if (m->m_pkthdr.len != len) {
1094		m_freem(m);
1095		return (NULL);
1096	}
1097	rtm->rtm_msglen = len;
1098	rtm->rtm_version = RTM_VERSION;
1099	rtm->rtm_type = type;
1100	return (m);
1101}
1102
1103/*
1104 * Writes information related to @rtinfo object to preallocated buffer.
1105 * Stores needed size in @plen. If @w is NULL, calculates size without
1106 * writing.
1107 * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation.
1108 *
1109 * Returns 0 on success.
1110 *
1111 */
1112static int
1113rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen)
1114{
1115	int i;
1116	int len, buflen = 0, dlen;
1117	caddr_t cp = NULL;
1118	struct rt_msghdr *rtm = NULL;
1119#ifdef INET6
1120	struct sockaddr_storage ss;
1121	struct sockaddr_in6 *sin6;
1122#endif
1123#ifdef COMPAT_FREEBSD32
1124	bool compat32 = false;
1125#endif
1126
1127	switch (type) {
1128
1129	case RTM_DELADDR:
1130	case RTM_NEWADDR:
1131		if (w != NULL && w->w_op == NET_RT_IFLISTL) {
1132#ifdef COMPAT_FREEBSD32
1133			if (w->w_req->flags & SCTL_MASK32) {
1134				len = sizeof(struct ifa_msghdrl32);
1135				compat32 = true;
1136			} else
1137#endif
1138				len = sizeof(struct ifa_msghdrl);
1139		} else
1140			len = sizeof(struct ifa_msghdr);
1141		break;
1142
1143	case RTM_IFINFO:
1144#ifdef COMPAT_FREEBSD32
1145		if (w != NULL && w->w_req->flags & SCTL_MASK32) {
1146			if (w->w_op == NET_RT_IFLISTL)
1147				len = sizeof(struct if_msghdrl32);
1148			else
1149				len = sizeof(struct if_msghdr32);
1150			compat32 = true;
1151			break;
1152		}
1153#endif
1154		if (w != NULL && w->w_op == NET_RT_IFLISTL)
1155			len = sizeof(struct if_msghdrl);
1156		else
1157			len = sizeof(struct if_msghdr);
1158		break;
1159
1160	case RTM_NEWMADDR:
1161		len = sizeof(struct ifma_msghdr);
1162		break;
1163
1164	default:
1165		len = sizeof(struct rt_msghdr);
1166	}
1167
1168	if (w != NULL) {
1169		rtm = (struct rt_msghdr *)w->w_tmem;
1170		buflen = w->w_tmemsize - len;
1171		cp = (caddr_t)w->w_tmem + len;
1172	}
1173
1174	rtinfo->rti_addrs = 0;
1175	for (i = 0; i < RTAX_MAX; i++) {
1176		struct sockaddr *sa;
1177
1178		if ((sa = rtinfo->rti_info[i]) == NULL)
1179			continue;
1180		rtinfo->rti_addrs |= (1 << i);
1181#ifdef COMPAT_FREEBSD32
1182		if (compat32)
1183			dlen = SA_SIZE32(sa);
1184		else
1185#endif
1186			dlen = SA_SIZE(sa);
1187		if (cp != NULL && buflen >= dlen) {
1188#ifdef INET6
1189			if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
1190				sin6 = (struct sockaddr_in6 *)&ss;
1191				bcopy(sa, sin6, sizeof(*sin6));
1192				if (sa6_recoverscope(sin6) == 0)
1193					sa = (struct sockaddr *)sin6;
1194			}
1195#endif
1196			bcopy((caddr_t)sa, cp, (unsigned)dlen);
1197			cp += dlen;
1198			buflen -= dlen;
1199		} else if (cp != NULL) {
1200			/*
1201			 * Buffer too small. Count needed size
1202			 * and return with error.
1203			 */
1204			cp = NULL;
1205		}
1206
1207		len += dlen;
1208	}
1209
1210	if (cp != NULL) {
1211		dlen = ALIGN(len) - len;
1212		if (buflen < dlen)
1213			cp = NULL;
1214		else
1215			buflen -= dlen;
1216	}
1217	len = ALIGN(len);
1218
1219	if (cp != NULL) {
1220		/* fill header iff buffer is large enough */
1221		rtm->rtm_version = RTM_VERSION;
1222		rtm->rtm_type = type;
1223		rtm->rtm_msglen = len;
1224	}
1225
1226	*plen = len;
1227
1228	if (w != NULL && cp == NULL)
1229		return (ENOBUFS);
1230
1231	return (0);
1232}
1233
1234/*
1235 * This routine is called to generate a message from the routing
1236 * socket indicating that a redirect has occurred, a routing lookup
1237 * has failed, or that a protocol has detected timeouts to a particular
1238 * destination.
1239 */
1240void
1241rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
1242    int fibnum)
1243{
1244	struct rt_msghdr *rtm;
1245	struct mbuf *m;
1246	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1247
1248	if (V_route_cb.any_count == 0)
1249		return;
1250	m = rtsock_msg_mbuf(type, rtinfo);
1251	if (m == NULL)
1252		return;
1253
1254	if (fibnum != RT_ALL_FIBS) {
1255		KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
1256		    "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
1257		M_SETFIB(m, fibnum);
1258		m->m_flags |= RTS_FILTER_FIB;
1259	}
1260
1261	rtm = mtod(m, struct rt_msghdr *);
1262	rtm->rtm_flags = RTF_DONE | flags;
1263	rtm->rtm_errno = error;
1264	rtm->rtm_addrs = rtinfo->rti_addrs;
1265	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
1266}
1267
1268void
1269rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
1270{
1271
1272	rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS);
1273}
1274
1275/*
1276 * This routine is called to generate a message from the routing
1277 * socket indicating that the status of a network interface has changed.
1278 */
1279void
1280rt_ifmsg(struct ifnet *ifp)
1281{
1282	struct if_msghdr *ifm;
1283	struct mbuf *m;
1284	struct rt_addrinfo info;
1285
1286	if (V_route_cb.any_count == 0)
1287		return;
1288	bzero((caddr_t)&info, sizeof(info));
1289	m = rtsock_msg_mbuf(RTM_IFINFO, &info);
1290	if (m == NULL)
1291		return;
1292	ifm = mtod(m, struct if_msghdr *);
1293	ifm->ifm_index = ifp->if_index;
1294	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1295	if_data_copy(ifp, &ifm->ifm_data);
1296	ifm->ifm_addrs = 0;
1297	rt_dispatch(m, AF_UNSPEC);
1298}
1299
1300/*
1301 * Announce interface address arrival/withdraw.
1302 * Please do not call directly, use rt_addrmsg().
1303 * Assume input data to be valid.
1304 * Returns 0 on success.
1305 */
1306int
1307rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
1308{
1309	struct rt_addrinfo info;
1310	struct sockaddr *sa;
1311	int ncmd;
1312	struct mbuf *m;
1313	struct ifa_msghdr *ifam;
1314	struct ifnet *ifp = ifa->ifa_ifp;
1315	struct sockaddr_storage ss;
1316
1317	if (V_route_cb.any_count == 0)
1318		return (0);
1319
1320	ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1321
1322	bzero((caddr_t)&info, sizeof(info));
1323	info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1324	info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
1325	info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
1326	    info.rti_info[RTAX_IFP], ifa->ifa_netmask, &ss);
1327	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1328	if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL)
1329		return (ENOBUFS);
1330	ifam = mtod(m, struct ifa_msghdr *);
1331	ifam->ifam_index = ifp->if_index;
1332	ifam->ifam_metric = ifa->ifa_ifp->if_metric;
1333	ifam->ifam_flags = ifa->ifa_flags;
1334	ifam->ifam_addrs = info.rti_addrs;
1335
1336	if (fibnum != RT_ALL_FIBS) {
1337		M_SETFIB(m, fibnum);
1338		m->m_flags |= RTS_FILTER_FIB;
1339	}
1340
1341	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
1342
1343	return (0);
1344}
1345
1346/*
1347 * Announce route addition/removal.
1348 * Please do not call directly, use rt_routemsg().
1349 * Note that @rt data MAY be inconsistent/invalid:
1350 * if some userland app sends us "invalid" route message (invalid mask,
1351 * no dst, wrong address families, etc...) we need to pass it back
1352 * to app (and any other rtsock consumers) with rtm_errno field set to
1353 * non-zero value.
1354 *
1355 * Returns 0 on success.
1356 */
1357int
1358rtsock_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt,
1359    int fibnum)
1360{
1361	struct rt_addrinfo info;
1362	struct sockaddr *sa;
1363	struct mbuf *m;
1364	struct rt_msghdr *rtm;
1365	struct sockaddr_storage ss;
1366
1367	if (V_route_cb.any_count == 0)
1368		return (0);
1369
1370	bzero((caddr_t)&info, sizeof(info));
1371	info.rti_info[RTAX_DST] = sa = rt_key(rt);
1372	info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(sa, rt_mask(rt), &ss);
1373	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1374	if ((m = rtsock_msg_mbuf(cmd, &info)) == NULL)
1375		return (ENOBUFS);
1376	rtm = mtod(m, struct rt_msghdr *);
1377	rtm->rtm_index = ifp->if_index;
1378	rtm->rtm_flags |= rt->rt_flags;
1379	rtm->rtm_errno = error;
1380	rtm->rtm_addrs = info.rti_addrs;
1381
1382	if (fibnum != RT_ALL_FIBS) {
1383		M_SETFIB(m, fibnum);
1384		m->m_flags |= RTS_FILTER_FIB;
1385	}
1386
1387	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
1388
1389	return (0);
1390}
1391
1392/*
1393 * This is the analogue to the rt_newaddrmsg which performs the same
1394 * function but for multicast group memberhips.  This is easier since
1395 * there is no route state to worry about.
1396 */
1397void
1398rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
1399{
1400	struct rt_addrinfo info;
1401	struct mbuf *m = NULL;
1402	struct ifnet *ifp = ifma->ifma_ifp;
1403	struct ifma_msghdr *ifmam;
1404
1405	if (V_route_cb.any_count == 0)
1406		return;
1407
1408	bzero((caddr_t)&info, sizeof(info));
1409	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1410	info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL;
1411	/*
1412	 * If a link-layer address is present, present it as a ``gateway''
1413	 * (similarly to how ARP entries, e.g., are presented).
1414	 */
1415	info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
1416	m = rtsock_msg_mbuf(cmd, &info);
1417	if (m == NULL)
1418		return;
1419	ifmam = mtod(m, struct ifma_msghdr *);
1420	KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
1421	    __func__));
1422	ifmam->ifmam_index = ifp->if_index;
1423	ifmam->ifmam_addrs = info.rti_addrs;
1424	rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC);
1425}
1426
1427static struct mbuf *
1428rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
1429	struct rt_addrinfo *info)
1430{
1431	struct if_announcemsghdr *ifan;
1432	struct mbuf *m;
1433
1434	if (V_route_cb.any_count == 0)
1435		return NULL;
1436	bzero((caddr_t)info, sizeof(*info));
1437	m = rtsock_msg_mbuf(type, info);
1438	if (m != NULL) {
1439		ifan = mtod(m, struct if_announcemsghdr *);
1440		ifan->ifan_index = ifp->if_index;
1441		strlcpy(ifan->ifan_name, ifp->if_xname,
1442			sizeof(ifan->ifan_name));
1443		ifan->ifan_what = what;
1444	}
1445	return m;
1446}
1447
1448/*
1449 * This is called to generate routing socket messages indicating
1450 * IEEE80211 wireless events.
1451 * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
1452 */
1453void
1454rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
1455{
1456	struct mbuf *m;
1457	struct rt_addrinfo info;
1458
1459	m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
1460	if (m != NULL) {
1461		/*
1462		 * Append the ieee80211 data.  Try to stick it in the
1463		 * mbuf containing the ifannounce msg; otherwise allocate
1464		 * a new mbuf and append.
1465		 *
1466		 * NB: we assume m is a single mbuf.
1467		 */
1468		if (data_len > M_TRAILINGSPACE(m)) {
1469			struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
1470			if (n == NULL) {
1471				m_freem(m);
1472				return;
1473			}
1474			bcopy(data, mtod(n, void *), data_len);
1475			n->m_len = data_len;
1476			m->m_next = n;
1477		} else if (data_len > 0) {
1478			bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
1479			m->m_len += data_len;
1480		}
1481		if (m->m_flags & M_PKTHDR)
1482			m->m_pkthdr.len += data_len;
1483		mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
1484		rt_dispatch(m, AF_UNSPEC);
1485	}
1486}
1487
1488/*
1489 * This is called to generate routing socket messages indicating
1490 * network interface arrival and departure.
1491 */
1492void
1493rt_ifannouncemsg(struct ifnet *ifp, int what)
1494{
1495	struct mbuf *m;
1496	struct rt_addrinfo info;
1497
1498	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
1499	if (m != NULL)
1500		rt_dispatch(m, AF_UNSPEC);
1501}
1502
1503static void
1504rt_dispatch(struct mbuf *m, sa_family_t saf)
1505{
1506	struct m_tag *tag;
1507
1508	/*
1509	 * Preserve the family from the sockaddr, if any, in an m_tag for
1510	 * use when injecting the mbuf into the routing socket buffer from
1511	 * the netisr.
1512	 */
1513	if (saf != AF_UNSPEC) {
1514		tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
1515		    M_NOWAIT);
1516		if (tag == NULL) {
1517			m_freem(m);
1518			return;
1519		}
1520		*(unsigned short *)(tag + 1) = saf;
1521		m_tag_prepend(m, tag);
1522	}
1523#ifdef VIMAGE
1524	if (V_loif)
1525		m->m_pkthdr.rcvif = V_loif;
1526	else {
1527		m_freem(m);
1528		return;
1529	}
1530#endif
1531	netisr_queue(NETISR_ROUTE, m);	/* mbuf is free'd on failure. */
1532}
1533
1534/*
1535 * This is used in dumping the kernel table via sysctl().
1536 */
1537static int
1538sysctl_dumpentry(struct radix_node *rn, void *vw)
1539{
1540	struct walkarg *w = vw;
1541	struct rtentry *rt = (struct rtentry *)rn;
1542	int error = 0, size;
1543	struct rt_addrinfo info;
1544	struct sockaddr_storage ss;
1545
1546	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1547		return 0;
1548	if ((rt->rt_flags & RTF_HOST) == 0
1549	    ? jailed_without_vnet(w->w_req->td->td_ucred)
1550	    : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0)
1551		return (0);
1552	bzero((caddr_t)&info, sizeof(info));
1553	info.rti_info[RTAX_DST] = rt_key(rt);
1554	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1555	info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
1556	    rt_mask(rt), &ss);
1557	info.rti_info[RTAX_GENMASK] = 0;
1558	if (rt->rt_ifp) {
1559		info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
1560		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1561		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1562			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1563	}
1564	if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0)
1565		return (error);
1566	if (w->w_req && w->w_tmem) {
1567		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1568
1569		if (rt->rt_flags & RTF_GWFLAG_COMPAT)
1570			rtm->rtm_flags = RTF_GATEWAY |
1571				(rt->rt_flags & ~RTF_GWFLAG_COMPAT);
1572		else
1573			rtm->rtm_flags = rt->rt_flags;
1574		rt_getmetrics(rt, &rtm->rtm_rmx);
1575		rtm->rtm_index = rt->rt_ifp->if_index;
1576		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1577		rtm->rtm_addrs = info.rti_addrs;
1578		error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1579		return (error);
1580	}
1581	return (error);
1582}
1583
1584static int
1585sysctl_iflist_ifml(struct ifnet *ifp, const struct if_data *src_ifd,
1586    struct rt_addrinfo *info, struct walkarg *w, int len)
1587{
1588	struct if_msghdrl *ifm;
1589	struct if_data *ifd;
1590
1591	ifm = (struct if_msghdrl *)w->w_tmem;
1592
1593#ifdef COMPAT_FREEBSD32
1594	if (w->w_req->flags & SCTL_MASK32) {
1595		struct if_msghdrl32 *ifm32;
1596
1597		ifm32 = (struct if_msghdrl32 *)ifm;
1598		ifm32->ifm_addrs = info->rti_addrs;
1599		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1600		ifm32->ifm_index = ifp->if_index;
1601		ifm32->_ifm_spare1 = 0;
1602		ifm32->ifm_len = sizeof(*ifm32);
1603		ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
1604		ifd = &ifm32->ifm_data;
1605	} else
1606#endif
1607	{
1608		ifm->ifm_addrs = info->rti_addrs;
1609		ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1610		ifm->ifm_index = ifp->if_index;
1611		ifm->_ifm_spare1 = 0;
1612		ifm->ifm_len = sizeof(*ifm);
1613		ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
1614		ifd = &ifm->ifm_data;
1615	}
1616
1617	memcpy(ifd, src_ifd, sizeof(*ifd));
1618
1619	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
1620}
1621
1622static int
1623sysctl_iflist_ifm(struct ifnet *ifp, const struct if_data *src_ifd,
1624    struct rt_addrinfo *info, struct walkarg *w, int len)
1625{
1626	struct if_msghdr *ifm;
1627	struct if_data *ifd;
1628
1629	ifm = (struct if_msghdr *)w->w_tmem;
1630
1631#ifdef COMPAT_FREEBSD32
1632	if (w->w_req->flags & SCTL_MASK32) {
1633		struct if_msghdr32 *ifm32;
1634
1635		ifm32 = (struct if_msghdr32 *)ifm;
1636		ifm32->ifm_addrs = info->rti_addrs;
1637		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1638		ifm32->ifm_index = ifp->if_index;
1639		ifd = &ifm32->ifm_data;
1640	} else
1641#endif
1642	{
1643		ifm->ifm_addrs = info->rti_addrs;
1644		ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1645		ifm->ifm_index = ifp->if_index;
1646		ifd = &ifm->ifm_data;
1647	}
1648
1649	memcpy(ifd, src_ifd, sizeof(*ifd));
1650
1651	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
1652}
1653
1654static int
1655sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
1656    struct walkarg *w, int len)
1657{
1658	struct ifa_msghdrl *ifam;
1659	struct if_data *ifd;
1660
1661	ifam = (struct ifa_msghdrl *)w->w_tmem;
1662
1663#ifdef COMPAT_FREEBSD32
1664	if (w->w_req->flags & SCTL_MASK32) {
1665		struct ifa_msghdrl32 *ifam32;
1666
1667		ifam32 = (struct ifa_msghdrl32 *)ifam;
1668		ifam32->ifam_addrs = info->rti_addrs;
1669		ifam32->ifam_flags = ifa->ifa_flags;
1670		ifam32->ifam_index = ifa->ifa_ifp->if_index;
1671		ifam32->_ifam_spare1 = 0;
1672		ifam32->ifam_len = sizeof(*ifam32);
1673		ifam32->ifam_data_off =
1674		    offsetof(struct ifa_msghdrl32, ifam_data);
1675		ifam32->ifam_metric = ifa->ifa_ifp->if_metric;
1676		ifd = &ifam32->ifam_data;
1677	} else
1678#endif
1679	{
1680		ifam->ifam_addrs = info->rti_addrs;
1681		ifam->ifam_flags = ifa->ifa_flags;
1682		ifam->ifam_index = ifa->ifa_ifp->if_index;
1683		ifam->_ifam_spare1 = 0;
1684		ifam->ifam_len = sizeof(*ifam);
1685		ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
1686		ifam->ifam_metric = ifa->ifa_ifp->if_metric;
1687		ifd = &ifam->ifam_data;
1688	}
1689
1690	bzero(ifd, sizeof(*ifd));
1691	ifd->ifi_datalen = sizeof(struct if_data);
1692	ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets);
1693	ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets);
1694	ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes);
1695	ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes);
1696
1697	/* Fixup if_data carp(4) vhid. */
1698	if (carp_get_vhid_p != NULL)
1699		ifd->ifi_vhid = (*carp_get_vhid_p)(ifa);
1700
1701	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
1702}
1703
1704static int
1705sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
1706    struct walkarg *w, int len)
1707{
1708	struct ifa_msghdr *ifam;
1709
1710	ifam = (struct ifa_msghdr *)w->w_tmem;
1711	ifam->ifam_addrs = info->rti_addrs;
1712	ifam->ifam_flags = ifa->ifa_flags;
1713	ifam->ifam_index = ifa->ifa_ifp->if_index;
1714	ifam->ifam_metric = ifa->ifa_ifp->if_metric;
1715
1716	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
1717}
1718
1719static int
1720sysctl_iflist(int af, struct walkarg *w)
1721{
1722	struct ifnet *ifp;
1723	struct ifaddr *ifa;
1724	struct if_data ifd;
1725	struct rt_addrinfo info;
1726	int len, error = 0;
1727	struct sockaddr_storage ss;
1728
1729	bzero((caddr_t)&info, sizeof(info));
1730	bzero(&ifd, sizeof(ifd));
1731	IFNET_RLOCK_NOSLEEP();
1732	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1733		if (w->w_arg && w->w_arg != ifp->if_index)
1734			continue;
1735		if_data_copy(ifp, &ifd);
1736		IF_ADDR_RLOCK(ifp);
1737		ifa = ifp->if_addr;
1738		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1739		error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
1740		if (error != 0)
1741			goto done;
1742		info.rti_info[RTAX_IFP] = NULL;
1743		if (w->w_req && w->w_tmem) {
1744			if (w->w_op == NET_RT_IFLISTL)
1745				error = sysctl_iflist_ifml(ifp, &ifd, &info, w,
1746				    len);
1747			else
1748				error = sysctl_iflist_ifm(ifp, &ifd, &info, w,
1749				    len);
1750			if (error)
1751				goto done;
1752		}
1753		while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
1754			if (af && af != ifa->ifa_addr->sa_family)
1755				continue;
1756			if (prison_if(w->w_req->td->td_ucred,
1757			    ifa->ifa_addr) != 0)
1758				continue;
1759			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1760			info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
1761			    ifa->ifa_addr, ifa->ifa_netmask, &ss);
1762			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1763			error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len);
1764			if (error != 0)
1765				goto done;
1766			if (w->w_req && w->w_tmem) {
1767				if (w->w_op == NET_RT_IFLISTL)
1768					error = sysctl_iflist_ifaml(ifa, &info,
1769					    w, len);
1770				else
1771					error = sysctl_iflist_ifam(ifa, &info,
1772					    w, len);
1773				if (error)
1774					goto done;
1775			}
1776		}
1777		IF_ADDR_RUNLOCK(ifp);
1778		info.rti_info[RTAX_IFA] = NULL;
1779		info.rti_info[RTAX_NETMASK] = NULL;
1780		info.rti_info[RTAX_BRD] = NULL;
1781	}
1782done:
1783	if (ifp != NULL)
1784		IF_ADDR_RUNLOCK(ifp);
1785	IFNET_RUNLOCK_NOSLEEP();
1786	return (error);
1787}
1788
1789static int
1790sysctl_ifmalist(int af, struct walkarg *w)
1791{
1792	struct rt_addrinfo info;
1793	struct ifaddr *ifa;
1794	struct ifmultiaddr *ifma;
1795	struct ifnet *ifp;
1796	int error, len;
1797
1798	error = 0;
1799	bzero((caddr_t)&info, sizeof(info));
1800
1801	IFNET_RLOCK_NOSLEEP();
1802	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1803		if (w->w_arg && w->w_arg != ifp->if_index)
1804			continue;
1805		ifa = ifp->if_addr;
1806		info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
1807		IF_ADDR_RLOCK(ifp);
1808		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1809			if (af && af != ifma->ifma_addr->sa_family)
1810				continue;
1811			if (prison_if(w->w_req->td->td_ucred,
1812			    ifma->ifma_addr) != 0)
1813				continue;
1814			info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1815			info.rti_info[RTAX_GATEWAY] =
1816			    (ifma->ifma_addr->sa_family != AF_LINK) ?
1817			    ifma->ifma_lladdr : NULL;
1818			error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len);
1819			if (error != 0)
1820				break;
1821			if (w->w_req && w->w_tmem) {
1822				struct ifma_msghdr *ifmam;
1823
1824				ifmam = (struct ifma_msghdr *)w->w_tmem;
1825				ifmam->ifmam_index = ifma->ifma_ifp->if_index;
1826				ifmam->ifmam_flags = 0;
1827				ifmam->ifmam_addrs = info.rti_addrs;
1828				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1829				if (error != 0)
1830					break;
1831			}
1832		}
1833		IF_ADDR_RUNLOCK(ifp);
1834		if (error != 0)
1835			break;
1836	}
1837	IFNET_RUNLOCK_NOSLEEP();
1838	return (error);
1839}
1840
1841static int
1842sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1843{
1844	int	*name = (int *)arg1;
1845	u_int	namelen = arg2;
1846	struct rib_head *rnh = NULL; /* silence compiler. */
1847	int	i, lim, error = EINVAL;
1848	int	fib = 0;
1849	u_char	af;
1850	struct	walkarg w;
1851
1852	name ++;
1853	namelen--;
1854	if (req->newptr)
1855		return (EPERM);
1856	if (name[1] == NET_RT_DUMP) {
1857		if (namelen == 3)
1858			fib = req->td->td_proc->p_fibnum;
1859		else if (namelen == 4)
1860			fib = (name[3] == RT_ALL_FIBS) ?
1861			    req->td->td_proc->p_fibnum : name[3];
1862		else
1863			return ((namelen < 3) ? EISDIR : ENOTDIR);
1864		if (fib < 0 || fib >= rt_numfibs)
1865			return (EINVAL);
1866	} else if (namelen != 3)
1867		return ((namelen < 3) ? EISDIR : ENOTDIR);
1868	af = name[0];
1869	if (af > AF_MAX)
1870		return (EINVAL);
1871	bzero(&w, sizeof(w));
1872	w.w_op = name[1];
1873	w.w_arg = name[2];
1874	w.w_req = req;
1875
1876	error = sysctl_wire_old_buffer(req, 0);
1877	if (error)
1878		return (error);
1879
1880	/*
1881	 * Allocate reply buffer in advance.
1882	 * All rtsock messages has maximum length of u_short.
1883	 */
1884	w.w_tmemsize = 65536;
1885	w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK);
1886
1887	switch (w.w_op) {
1888
1889	case NET_RT_DUMP:
1890	case NET_RT_FLAGS:
1891		if (af == 0) {			/* dump all tables */
1892			i = 1;
1893			lim = AF_MAX;
1894		} else				/* dump only one table */
1895			i = lim = af;
1896
1897		/*
1898		 * take care of llinfo entries, the caller must
1899		 * specify an AF
1900		 */
1901		if (w.w_op == NET_RT_FLAGS &&
1902		    (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) {
1903			if (af != 0)
1904				error = lltable_sysctl_dumparp(af, w.w_req);
1905			else
1906				error = EINVAL;
1907			break;
1908		}
1909		/*
1910		 * take care of routing entries
1911		 */
1912		for (error = 0; error == 0 && i <= lim; i++) {
1913			rnh = rt_tables_get_rnh(fib, i);
1914			if (rnh != NULL) {
1915				RIB_RLOCK(rnh);
1916			    	error = rnh->rnh_walktree(&rnh->head,
1917				    sysctl_dumpentry, &w);
1918				RIB_RUNLOCK(rnh);
1919			} else if (af != 0)
1920				error = EAFNOSUPPORT;
1921		}
1922		break;
1923
1924	case NET_RT_IFLIST:
1925	case NET_RT_IFLISTL:
1926		error = sysctl_iflist(af, &w);
1927		break;
1928
1929	case NET_RT_IFMALIST:
1930		error = sysctl_ifmalist(af, &w);
1931		break;
1932	}
1933
1934	free(w.w_tmem, M_TEMP);
1935	return (error);
1936}
1937
1938static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1939
1940/*
1941 * Definitions of protocols supported in the ROUTE domain.
1942 */
1943
1944static struct domain routedomain;		/* or at least forward */
1945
1946static struct protosw routesw[] = {
1947{
1948	.pr_type =		SOCK_RAW,
1949	.pr_domain =		&routedomain,
1950	.pr_flags =		PR_ATOMIC|PR_ADDR,
1951	.pr_output =		route_output,
1952	.pr_ctlinput =		raw_ctlinput,
1953	.pr_init =		raw_init,
1954	.pr_usrreqs =		&route_usrreqs
1955}
1956};
1957
1958static struct domain routedomain = {
1959	.dom_family =		PF_ROUTE,
1960	.dom_name =		 "route",
1961	.dom_protosw =		routesw,
1962	.dom_protoswNPROTOSW =	&routesw[nitems(routesw)]
1963};
1964
1965VNET_DOMAIN_SET(route);
1966