Deleted Added
full compact
rtsock.c (193219) rtsock.c (193232)
1/*-
2 * Copyright (c) 1988, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)rtsock.c 8.7 (Berkeley) 10/12/95
1/*-
2 * Copyright (c) 1988, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)rtsock.c 8.7 (Berkeley) 10/12/95
30 * $FreeBSD: head/sys/net/rtsock.c 193219 2009-06-01 10:41:38Z rwatson $
30 * $FreeBSD: head/sys/net/rtsock.c 193232 2009-06-01 15:49:42Z bz $
31 */
32#include "opt_sctp.h"
33#include "opt_mpath.h"
34#include "opt_route.h"
35#include "opt_inet.h"
36#include "opt_inet6.h"
37
38#include <sys/param.h>
39#include <sys/domain.h>
40#include <sys/jail.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/priv.h>
46#include <sys/proc.h>
47#include <sys/protosw.h>
48#include <sys/rwlock.h>
49#include <sys/signalvar.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/sysctl.h>
53#include <sys/systm.h>
54#include <sys/vimage.h>
55
56#include <net/if.h>
57#include <net/if_dl.h>
58#include <net/if_llatbl.h>
59#include <net/netisr.h>
60#include <net/raw_cb.h>
61#include <net/route.h>
62#include <net/vnet.h>
63
64#include <netinet/in.h>
65#ifdef INET6
66#include <netinet6/scope6_var.h>
67#endif
68
69#ifdef SCTP
70extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
71#endif /* SCTP */
72
73MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
74
75/* NB: these are not modified */
76static struct sockaddr route_src = { 2, PF_ROUTE, };
77static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
78
79static struct {
80 int ip_count; /* attached w/ AF_INET */
81 int ip6_count; /* attached w/ AF_INET6 */
82 int ipx_count; /* attached w/ AF_IPX */
83 int any_count; /* total attached */
84} route_cb;
85
86struct mtx rtsock_mtx;
87MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
88
89#define RTSOCK_LOCK() mtx_lock(&rtsock_mtx)
90#define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx)
91#define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED)
92
93SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
94
95struct walkarg {
96 int w_tmemsize;
97 int w_op, w_arg;
98 caddr_t w_tmem;
99 struct sysctl_req *w_req;
100};
101
102static void rts_input(struct mbuf *m);
103static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
104static int rt_msg2(int type, struct rt_addrinfo *rtinfo,
105 caddr_t cp, struct walkarg *w);
106static int rt_xaddrs(caddr_t cp, caddr_t cplim,
107 struct rt_addrinfo *rtinfo);
108static int sysctl_dumpentry(struct radix_node *rn, void *vw);
109static int sysctl_iflist(int af, struct walkarg *w);
110static int sysctl_ifmalist(int af, struct walkarg *w);
111static int route_output(struct mbuf *m, struct socket *so);
112static void rt_setmetrics(u_long which, const struct rt_metrics *in,
113 struct rt_metrics_lite *out);
114static void rt_getmetrics(const struct rt_metrics_lite *in,
115 struct rt_metrics *out);
116static void rt_dispatch(struct mbuf *, const struct sockaddr *);
117
118static struct netisr_handler rtsock_nh = {
119 .nh_name = "rtsock",
120 .nh_handler = rts_input,
121 .nh_proto = NETISR_ROUTE,
122 .nh_policy = NETISR_POLICY_SOURCE,
123};
124
125static int
126sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
127{
128 int error, qlimit;
129
130 netisr_getqlimit(&rtsock_nh, &qlimit);
131 error = sysctl_handle_int(oidp, &qlimit, 0, req);
132 if (error || !req->newptr)
133 return (error);
134 if (qlimit < 1)
135 return (EINVAL);
136 return (netisr_setqlimit(&rtsock_nh, qlimit));
137}
138SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
139 0, 0, sysctl_route_netisr_maxqlen, "I",
140 "maximum routing socket dispatch queue length");
141
142static void
143rts_init(void)
144{
145 int tmp;
146
147 if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
148 rtsock_nh.nh_qlimit = tmp;
149 netisr_register(&rtsock_nh);
150}
151SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
152
153static void
154rts_input(struct mbuf *m)
155{
156 struct sockproto route_proto;
157 unsigned short *family;
158 struct m_tag *tag;
159
160 route_proto.sp_family = PF_ROUTE;
161 tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
162 if (tag != NULL) {
163 family = (unsigned short *)(tag + 1);
164 route_proto.sp_protocol = *family;
165 m_tag_delete(m, tag);
166 } else
167 route_proto.sp_protocol = 0;
168
169 raw_input(m, &route_proto, &route_src);
170}
171
172/*
173 * It really doesn't make any sense at all for this code to share much
174 * with raw_usrreq.c, since its functionality is so restricted. XXX
175 */
176static void
177rts_abort(struct socket *so)
178{
179
180 raw_usrreqs.pru_abort(so);
181}
182
183static void
184rts_close(struct socket *so)
185{
186
187 raw_usrreqs.pru_close(so);
188}
189
190/* pru_accept is EOPNOTSUPP */
191
192static int
193rts_attach(struct socket *so, int proto, struct thread *td)
194{
195 struct rawcb *rp;
196 int s, error;
197
198 KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
199
200 /* XXX */
201 rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
202 if (rp == NULL)
203 return ENOBUFS;
204
205 /*
206 * The splnet() is necessary to block protocols from sending
207 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
208 * this PCB is extant but incompletely initialized.
209 * Probably we should try to do more of this work beforehand and
210 * eliminate the spl.
211 */
212 s = splnet();
213 so->so_pcb = (caddr_t)rp;
214 so->so_fibnum = td->td_proc->p_fibnum;
215 error = raw_attach(so, proto);
216 rp = sotorawcb(so);
217 if (error) {
218 splx(s);
219 so->so_pcb = NULL;
220 free(rp, M_PCB);
221 return error;
222 }
223 RTSOCK_LOCK();
224 switch(rp->rcb_proto.sp_protocol) {
225 case AF_INET:
226 route_cb.ip_count++;
227 break;
228 case AF_INET6:
229 route_cb.ip6_count++;
230 break;
231 case AF_IPX:
232 route_cb.ipx_count++;
233 break;
234 }
235 route_cb.any_count++;
236 RTSOCK_UNLOCK();
237 soisconnected(so);
238 so->so_options |= SO_USELOOPBACK;
239 splx(s);
240 return 0;
241}
242
243static int
244rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
245{
246
247 return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
248}
249
250static int
251rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
252{
253
254 return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
255}
256
257/* pru_connect2 is EOPNOTSUPP */
258/* pru_control is EOPNOTSUPP */
259
260static void
261rts_detach(struct socket *so)
262{
263 struct rawcb *rp = sotorawcb(so);
264
265 KASSERT(rp != NULL, ("rts_detach: rp == NULL"));
266
267 RTSOCK_LOCK();
268 switch(rp->rcb_proto.sp_protocol) {
269 case AF_INET:
270 route_cb.ip_count--;
271 break;
272 case AF_INET6:
273 route_cb.ip6_count--;
274 break;
275 case AF_IPX:
276 route_cb.ipx_count--;
277 break;
278 }
279 route_cb.any_count--;
280 RTSOCK_UNLOCK();
281 raw_usrreqs.pru_detach(so);
282}
283
284static int
285rts_disconnect(struct socket *so)
286{
287
288 return (raw_usrreqs.pru_disconnect(so));
289}
290
291/* pru_listen is EOPNOTSUPP */
292
293static int
294rts_peeraddr(struct socket *so, struct sockaddr **nam)
295{
296
297 return (raw_usrreqs.pru_peeraddr(so, nam));
298}
299
300/* pru_rcvd is EOPNOTSUPP */
301/* pru_rcvoob is EOPNOTSUPP */
302
303static int
304rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
305 struct mbuf *control, struct thread *td)
306{
307
308 return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
309}
310
311/* pru_sense is null */
312
313static int
314rts_shutdown(struct socket *so)
315{
316
317 return (raw_usrreqs.pru_shutdown(so));
318}
319
320static int
321rts_sockaddr(struct socket *so, struct sockaddr **nam)
322{
323
324 return (raw_usrreqs.pru_sockaddr(so, nam));
325}
326
327static struct pr_usrreqs route_usrreqs = {
328 .pru_abort = rts_abort,
329 .pru_attach = rts_attach,
330 .pru_bind = rts_bind,
331 .pru_connect = rts_connect,
332 .pru_detach = rts_detach,
333 .pru_disconnect = rts_disconnect,
334 .pru_peeraddr = rts_peeraddr,
335 .pru_send = rts_send,
336 .pru_shutdown = rts_shutdown,
337 .pru_sockaddr = rts_sockaddr,
338 .pru_close = rts_close,
339};
340
341#ifndef _SOCKADDR_UNION_DEFINED
342#define _SOCKADDR_UNION_DEFINED
343/*
344 * The union of all possible address formats we handle.
345 */
346union sockaddr_union {
347 struct sockaddr sa;
348 struct sockaddr_in sin;
349 struct sockaddr_in6 sin6;
350};
351#endif /* _SOCKADDR_UNION_DEFINED */
352
353static int
354rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
355 struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
356{
357
358 /* First, see if the returned address is part of the jail. */
359 if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
360 info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
361 return (0);
362 }
363
364 switch (info->rti_info[RTAX_DST]->sa_family) {
365#ifdef INET
366 case AF_INET:
367 {
368 struct in_addr ia;
369 struct ifaddr *ifa;
370 int found;
371
372 found = 0;
373 /*
374 * Try to find an address on the given outgoing interface
375 * that belongs to the jail.
376 */
377 IF_ADDR_LOCK(ifp);
378 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
379 struct sockaddr *sa;
380 sa = ifa->ifa_addr;
381 if (sa->sa_family != AF_INET)
382 continue;
383 ia = ((struct sockaddr_in *)sa)->sin_addr;
384 if (prison_check_ip4(cred, &ia) == 0) {
385 found = 1;
386 break;
387 }
388 }
389 IF_ADDR_UNLOCK(ifp);
390 if (!found) {
391 /*
392 * As a last resort return the 'default' jail address.
393 */
394 ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->
395 sin_addr;
396 if (prison_get_ip4(cred, &ia) != 0)
397 return (ESRCH);
398 }
399 bzero(&saun->sin, sizeof(struct sockaddr_in));
400 saun->sin.sin_len = sizeof(struct sockaddr_in);
401 saun->sin.sin_family = AF_INET;
402 saun->sin.sin_addr.s_addr = ia.s_addr;
403 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
404 break;
405 }
406#endif
407#ifdef INET6
408 case AF_INET6:
409 {
410 struct in6_addr ia6;
411 struct ifaddr *ifa;
412 int found;
413
414 found = 0;
415 /*
416 * Try to find an address on the given outgoing interface
417 * that belongs to the jail.
418 */
419 IF_ADDR_LOCK(ifp);
420 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
421 struct sockaddr *sa;
422 sa = ifa->ifa_addr;
423 if (sa->sa_family != AF_INET6)
424 continue;
425 bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
426 &ia6, sizeof(struct in6_addr));
427 if (prison_check_ip6(cred, &ia6) == 0) {
428 found = 1;
429 break;
430 }
431 }
432 IF_ADDR_UNLOCK(ifp);
433 if (!found) {
434 /*
435 * As a last resort return the 'default' jail address.
436 */
437 ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)->
438 sin6_addr;
439 if (prison_get_ip6(cred, &ia6) != 0)
440 return (ESRCH);
441 }
442 bzero(&saun->sin6, sizeof(struct sockaddr_in6));
443 saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
444 saun->sin6.sin6_family = AF_INET6;
445 bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
446 if (sa6_recoverscope(&saun->sin6) != 0)
447 return (ESRCH);
448 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
449 break;
450 }
451#endif
452 default:
453 return (ESRCH);
454 }
455 return (0);
456}
457
458/*ARGSUSED*/
459static int
460route_output(struct mbuf *m, struct socket *so)
461{
462#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
31 */
32#include "opt_sctp.h"
33#include "opt_mpath.h"
34#include "opt_route.h"
35#include "opt_inet.h"
36#include "opt_inet6.h"
37
38#include <sys/param.h>
39#include <sys/domain.h>
40#include <sys/jail.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/priv.h>
46#include <sys/proc.h>
47#include <sys/protosw.h>
48#include <sys/rwlock.h>
49#include <sys/signalvar.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/sysctl.h>
53#include <sys/systm.h>
54#include <sys/vimage.h>
55
56#include <net/if.h>
57#include <net/if_dl.h>
58#include <net/if_llatbl.h>
59#include <net/netisr.h>
60#include <net/raw_cb.h>
61#include <net/route.h>
62#include <net/vnet.h>
63
64#include <netinet/in.h>
65#ifdef INET6
66#include <netinet6/scope6_var.h>
67#endif
68
69#ifdef SCTP
70extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
71#endif /* SCTP */
72
73MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
74
75/* NB: these are not modified */
76static struct sockaddr route_src = { 2, PF_ROUTE, };
77static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
78
79static struct {
80 int ip_count; /* attached w/ AF_INET */
81 int ip6_count; /* attached w/ AF_INET6 */
82 int ipx_count; /* attached w/ AF_IPX */
83 int any_count; /* total attached */
84} route_cb;
85
86struct mtx rtsock_mtx;
87MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
88
89#define RTSOCK_LOCK() mtx_lock(&rtsock_mtx)
90#define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx)
91#define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED)
92
93SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
94
95struct walkarg {
96 int w_tmemsize;
97 int w_op, w_arg;
98 caddr_t w_tmem;
99 struct sysctl_req *w_req;
100};
101
102static void rts_input(struct mbuf *m);
103static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
104static int rt_msg2(int type, struct rt_addrinfo *rtinfo,
105 caddr_t cp, struct walkarg *w);
106static int rt_xaddrs(caddr_t cp, caddr_t cplim,
107 struct rt_addrinfo *rtinfo);
108static int sysctl_dumpentry(struct radix_node *rn, void *vw);
109static int sysctl_iflist(int af, struct walkarg *w);
110static int sysctl_ifmalist(int af, struct walkarg *w);
111static int route_output(struct mbuf *m, struct socket *so);
112static void rt_setmetrics(u_long which, const struct rt_metrics *in,
113 struct rt_metrics_lite *out);
114static void rt_getmetrics(const struct rt_metrics_lite *in,
115 struct rt_metrics *out);
116static void rt_dispatch(struct mbuf *, const struct sockaddr *);
117
118static struct netisr_handler rtsock_nh = {
119 .nh_name = "rtsock",
120 .nh_handler = rts_input,
121 .nh_proto = NETISR_ROUTE,
122 .nh_policy = NETISR_POLICY_SOURCE,
123};
124
125static int
126sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
127{
128 int error, qlimit;
129
130 netisr_getqlimit(&rtsock_nh, &qlimit);
131 error = sysctl_handle_int(oidp, &qlimit, 0, req);
132 if (error || !req->newptr)
133 return (error);
134 if (qlimit < 1)
135 return (EINVAL);
136 return (netisr_setqlimit(&rtsock_nh, qlimit));
137}
138SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
139 0, 0, sysctl_route_netisr_maxqlen, "I",
140 "maximum routing socket dispatch queue length");
141
142static void
143rts_init(void)
144{
145 int tmp;
146
147 if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
148 rtsock_nh.nh_qlimit = tmp;
149 netisr_register(&rtsock_nh);
150}
151SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
152
153static void
154rts_input(struct mbuf *m)
155{
156 struct sockproto route_proto;
157 unsigned short *family;
158 struct m_tag *tag;
159
160 route_proto.sp_family = PF_ROUTE;
161 tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
162 if (tag != NULL) {
163 family = (unsigned short *)(tag + 1);
164 route_proto.sp_protocol = *family;
165 m_tag_delete(m, tag);
166 } else
167 route_proto.sp_protocol = 0;
168
169 raw_input(m, &route_proto, &route_src);
170}
171
172/*
173 * It really doesn't make any sense at all for this code to share much
174 * with raw_usrreq.c, since its functionality is so restricted. XXX
175 */
176static void
177rts_abort(struct socket *so)
178{
179
180 raw_usrreqs.pru_abort(so);
181}
182
183static void
184rts_close(struct socket *so)
185{
186
187 raw_usrreqs.pru_close(so);
188}
189
190/* pru_accept is EOPNOTSUPP */
191
192static int
193rts_attach(struct socket *so, int proto, struct thread *td)
194{
195 struct rawcb *rp;
196 int s, error;
197
198 KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
199
200 /* XXX */
201 rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
202 if (rp == NULL)
203 return ENOBUFS;
204
205 /*
206 * The splnet() is necessary to block protocols from sending
207 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
208 * this PCB is extant but incompletely initialized.
209 * Probably we should try to do more of this work beforehand and
210 * eliminate the spl.
211 */
212 s = splnet();
213 so->so_pcb = (caddr_t)rp;
214 so->so_fibnum = td->td_proc->p_fibnum;
215 error = raw_attach(so, proto);
216 rp = sotorawcb(so);
217 if (error) {
218 splx(s);
219 so->so_pcb = NULL;
220 free(rp, M_PCB);
221 return error;
222 }
223 RTSOCK_LOCK();
224 switch(rp->rcb_proto.sp_protocol) {
225 case AF_INET:
226 route_cb.ip_count++;
227 break;
228 case AF_INET6:
229 route_cb.ip6_count++;
230 break;
231 case AF_IPX:
232 route_cb.ipx_count++;
233 break;
234 }
235 route_cb.any_count++;
236 RTSOCK_UNLOCK();
237 soisconnected(so);
238 so->so_options |= SO_USELOOPBACK;
239 splx(s);
240 return 0;
241}
242
243static int
244rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
245{
246
247 return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
248}
249
250static int
251rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
252{
253
254 return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
255}
256
257/* pru_connect2 is EOPNOTSUPP */
258/* pru_control is EOPNOTSUPP */
259
260static void
261rts_detach(struct socket *so)
262{
263 struct rawcb *rp = sotorawcb(so);
264
265 KASSERT(rp != NULL, ("rts_detach: rp == NULL"));
266
267 RTSOCK_LOCK();
268 switch(rp->rcb_proto.sp_protocol) {
269 case AF_INET:
270 route_cb.ip_count--;
271 break;
272 case AF_INET6:
273 route_cb.ip6_count--;
274 break;
275 case AF_IPX:
276 route_cb.ipx_count--;
277 break;
278 }
279 route_cb.any_count--;
280 RTSOCK_UNLOCK();
281 raw_usrreqs.pru_detach(so);
282}
283
284static int
285rts_disconnect(struct socket *so)
286{
287
288 return (raw_usrreqs.pru_disconnect(so));
289}
290
291/* pru_listen is EOPNOTSUPP */
292
293static int
294rts_peeraddr(struct socket *so, struct sockaddr **nam)
295{
296
297 return (raw_usrreqs.pru_peeraddr(so, nam));
298}
299
300/* pru_rcvd is EOPNOTSUPP */
301/* pru_rcvoob is EOPNOTSUPP */
302
303static int
304rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
305 struct mbuf *control, struct thread *td)
306{
307
308 return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
309}
310
311/* pru_sense is null */
312
313static int
314rts_shutdown(struct socket *so)
315{
316
317 return (raw_usrreqs.pru_shutdown(so));
318}
319
320static int
321rts_sockaddr(struct socket *so, struct sockaddr **nam)
322{
323
324 return (raw_usrreqs.pru_sockaddr(so, nam));
325}
326
327static struct pr_usrreqs route_usrreqs = {
328 .pru_abort = rts_abort,
329 .pru_attach = rts_attach,
330 .pru_bind = rts_bind,
331 .pru_connect = rts_connect,
332 .pru_detach = rts_detach,
333 .pru_disconnect = rts_disconnect,
334 .pru_peeraddr = rts_peeraddr,
335 .pru_send = rts_send,
336 .pru_shutdown = rts_shutdown,
337 .pru_sockaddr = rts_sockaddr,
338 .pru_close = rts_close,
339};
340
341#ifndef _SOCKADDR_UNION_DEFINED
342#define _SOCKADDR_UNION_DEFINED
343/*
344 * The union of all possible address formats we handle.
345 */
346union sockaddr_union {
347 struct sockaddr sa;
348 struct sockaddr_in sin;
349 struct sockaddr_in6 sin6;
350};
351#endif /* _SOCKADDR_UNION_DEFINED */
352
353static int
354rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
355 struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
356{
357
358 /* First, see if the returned address is part of the jail. */
359 if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
360 info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
361 return (0);
362 }
363
364 switch (info->rti_info[RTAX_DST]->sa_family) {
365#ifdef INET
366 case AF_INET:
367 {
368 struct in_addr ia;
369 struct ifaddr *ifa;
370 int found;
371
372 found = 0;
373 /*
374 * Try to find an address on the given outgoing interface
375 * that belongs to the jail.
376 */
377 IF_ADDR_LOCK(ifp);
378 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
379 struct sockaddr *sa;
380 sa = ifa->ifa_addr;
381 if (sa->sa_family != AF_INET)
382 continue;
383 ia = ((struct sockaddr_in *)sa)->sin_addr;
384 if (prison_check_ip4(cred, &ia) == 0) {
385 found = 1;
386 break;
387 }
388 }
389 IF_ADDR_UNLOCK(ifp);
390 if (!found) {
391 /*
392 * As a last resort return the 'default' jail address.
393 */
394 ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->
395 sin_addr;
396 if (prison_get_ip4(cred, &ia) != 0)
397 return (ESRCH);
398 }
399 bzero(&saun->sin, sizeof(struct sockaddr_in));
400 saun->sin.sin_len = sizeof(struct sockaddr_in);
401 saun->sin.sin_family = AF_INET;
402 saun->sin.sin_addr.s_addr = ia.s_addr;
403 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
404 break;
405 }
406#endif
407#ifdef INET6
408 case AF_INET6:
409 {
410 struct in6_addr ia6;
411 struct ifaddr *ifa;
412 int found;
413
414 found = 0;
415 /*
416 * Try to find an address on the given outgoing interface
417 * that belongs to the jail.
418 */
419 IF_ADDR_LOCK(ifp);
420 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
421 struct sockaddr *sa;
422 sa = ifa->ifa_addr;
423 if (sa->sa_family != AF_INET6)
424 continue;
425 bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
426 &ia6, sizeof(struct in6_addr));
427 if (prison_check_ip6(cred, &ia6) == 0) {
428 found = 1;
429 break;
430 }
431 }
432 IF_ADDR_UNLOCK(ifp);
433 if (!found) {
434 /*
435 * As a last resort return the 'default' jail address.
436 */
437 ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)->
438 sin6_addr;
439 if (prison_get_ip6(cred, &ia6) != 0)
440 return (ESRCH);
441 }
442 bzero(&saun->sin6, sizeof(struct sockaddr_in6));
443 saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
444 saun->sin6.sin6_family = AF_INET6;
445 bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
446 if (sa6_recoverscope(&saun->sin6) != 0)
447 return (ESRCH);
448 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
449 break;
450 }
451#endif
452 default:
453 return (ESRCH);
454 }
455 return (0);
456}
457
458/*ARGSUSED*/
459static int
460route_output(struct mbuf *m, struct socket *so)
461{
462#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
463 INIT_VNET_NET(so->so_vnet);
464 struct rt_msghdr *rtm = NULL;
465 struct rtentry *rt = NULL;
466 struct radix_node_head *rnh;
467 struct rt_addrinfo info;
468 int len, error = 0;
469 struct ifnet *ifp = NULL;
470 union sockaddr_union saun;
471
472#define senderr(e) { error = e; goto flush;}
473 if (m == NULL || ((m->m_len < sizeof(long)) &&
474 (m = m_pullup(m, sizeof(long))) == NULL))
475 return (ENOBUFS);
476 if ((m->m_flags & M_PKTHDR) == 0)
477 panic("route_output");
478 len = m->m_pkthdr.len;
479 if (len < sizeof(*rtm) ||
480 len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
481 info.rti_info[RTAX_DST] = NULL;
482 senderr(EINVAL);
483 }
484 R_Malloc(rtm, struct rt_msghdr *, len);
485 if (rtm == NULL) {
486 info.rti_info[RTAX_DST] = NULL;
487 senderr(ENOBUFS);
488 }
489 m_copydata(m, 0, len, (caddr_t)rtm);
490 if (rtm->rtm_version != RTM_VERSION) {
491 info.rti_info[RTAX_DST] = NULL;
492 senderr(EPROTONOSUPPORT);
493 }
494 rtm->rtm_pid = curproc->p_pid;
495 bzero(&info, sizeof(info));
496 info.rti_addrs = rtm->rtm_addrs;
497 if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
498 info.rti_info[RTAX_DST] = NULL;
499 senderr(EINVAL);
500 }
501 info.rti_flags = rtm->rtm_flags;
502 if (info.rti_info[RTAX_DST] == NULL ||
503 info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
504 (info.rti_info[RTAX_GATEWAY] != NULL &&
505 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
506 senderr(EINVAL);
507 /*
508 * Verify that the caller has the appropriate privilege; RTM_GET
509 * is the only operation the non-superuser is allowed.
510 */
511 if (rtm->rtm_type != RTM_GET) {
512 error = priv_check(curthread, PRIV_NET_ROUTE);
513 if (error)
514 senderr(error);
515 }
516
517 switch (rtm->rtm_type) {
518 struct rtentry *saved_nrt;
519
520 case RTM_ADD:
521 if (info.rti_info[RTAX_GATEWAY] == NULL)
522 senderr(EINVAL);
523 saved_nrt = NULL;
524
525 /* support for new ARP code */
526 if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
527 (rtm->rtm_flags & RTF_LLDATA) != 0) {
528 error = lla_rt_output(rtm, &info);
529 break;
530 }
531 error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
532 so->so_fibnum);
533 if (error == 0 && saved_nrt) {
534 RT_LOCK(saved_nrt);
535 rt_setmetrics(rtm->rtm_inits,
536 &rtm->rtm_rmx, &saved_nrt->rt_rmx);
537 rtm->rtm_index = saved_nrt->rt_ifp->if_index;
538 RT_REMREF(saved_nrt);
539 RT_UNLOCK(saved_nrt);
540 }
541 break;
542
543 case RTM_DELETE:
544 saved_nrt = NULL;
545 /* support for new ARP code */
546 if (info.rti_info[RTAX_GATEWAY] &&
547 (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
548 (rtm->rtm_flags & RTF_LLDATA) != 0) {
549 error = lla_rt_output(rtm, &info);
550 break;
551 }
552 error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
553 so->so_fibnum);
554 if (error == 0) {
555 RT_LOCK(saved_nrt);
556 rt = saved_nrt;
557 goto report;
558 }
559 break;
560
561 case RTM_GET:
562 case RTM_CHANGE:
563 case RTM_LOCK:
463 struct rt_msghdr *rtm = NULL;
464 struct rtentry *rt = NULL;
465 struct radix_node_head *rnh;
466 struct rt_addrinfo info;
467 int len, error = 0;
468 struct ifnet *ifp = NULL;
469 union sockaddr_union saun;
470
471#define senderr(e) { error = e; goto flush;}
472 if (m == NULL || ((m->m_len < sizeof(long)) &&
473 (m = m_pullup(m, sizeof(long))) == NULL))
474 return (ENOBUFS);
475 if ((m->m_flags & M_PKTHDR) == 0)
476 panic("route_output");
477 len = m->m_pkthdr.len;
478 if (len < sizeof(*rtm) ||
479 len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
480 info.rti_info[RTAX_DST] = NULL;
481 senderr(EINVAL);
482 }
483 R_Malloc(rtm, struct rt_msghdr *, len);
484 if (rtm == NULL) {
485 info.rti_info[RTAX_DST] = NULL;
486 senderr(ENOBUFS);
487 }
488 m_copydata(m, 0, len, (caddr_t)rtm);
489 if (rtm->rtm_version != RTM_VERSION) {
490 info.rti_info[RTAX_DST] = NULL;
491 senderr(EPROTONOSUPPORT);
492 }
493 rtm->rtm_pid = curproc->p_pid;
494 bzero(&info, sizeof(info));
495 info.rti_addrs = rtm->rtm_addrs;
496 if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
497 info.rti_info[RTAX_DST] = NULL;
498 senderr(EINVAL);
499 }
500 info.rti_flags = rtm->rtm_flags;
501 if (info.rti_info[RTAX_DST] == NULL ||
502 info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
503 (info.rti_info[RTAX_GATEWAY] != NULL &&
504 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
505 senderr(EINVAL);
506 /*
507 * Verify that the caller has the appropriate privilege; RTM_GET
508 * is the only operation the non-superuser is allowed.
509 */
510 if (rtm->rtm_type != RTM_GET) {
511 error = priv_check(curthread, PRIV_NET_ROUTE);
512 if (error)
513 senderr(error);
514 }
515
516 switch (rtm->rtm_type) {
517 struct rtentry *saved_nrt;
518
519 case RTM_ADD:
520 if (info.rti_info[RTAX_GATEWAY] == NULL)
521 senderr(EINVAL);
522 saved_nrt = NULL;
523
524 /* support for new ARP code */
525 if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
526 (rtm->rtm_flags & RTF_LLDATA) != 0) {
527 error = lla_rt_output(rtm, &info);
528 break;
529 }
530 error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
531 so->so_fibnum);
532 if (error == 0 && saved_nrt) {
533 RT_LOCK(saved_nrt);
534 rt_setmetrics(rtm->rtm_inits,
535 &rtm->rtm_rmx, &saved_nrt->rt_rmx);
536 rtm->rtm_index = saved_nrt->rt_ifp->if_index;
537 RT_REMREF(saved_nrt);
538 RT_UNLOCK(saved_nrt);
539 }
540 break;
541
542 case RTM_DELETE:
543 saved_nrt = NULL;
544 /* support for new ARP code */
545 if (info.rti_info[RTAX_GATEWAY] &&
546 (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
547 (rtm->rtm_flags & RTF_LLDATA) != 0) {
548 error = lla_rt_output(rtm, &info);
549 break;
550 }
551 error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
552 so->so_fibnum);
553 if (error == 0) {
554 RT_LOCK(saved_nrt);
555 rt = saved_nrt;
556 goto report;
557 }
558 break;
559
560 case RTM_GET:
561 case RTM_CHANGE:
562 case RTM_LOCK:
564 rnh = V_rt_tables[so->so_fibnum][info.rti_info[RTAX_DST]->sa_family];
563 rnh = rt_tables_get_rnh(so->so_fibnum,
564 info.rti_info[RTAX_DST]->sa_family);
565 if (rnh == NULL)
566 senderr(EAFNOSUPPORT);
567 RADIX_NODE_HEAD_RLOCK(rnh);
568 rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
569 info.rti_info[RTAX_NETMASK], rnh);
570 if (rt == NULL) { /* XXX looks bogus */
571 RADIX_NODE_HEAD_RUNLOCK(rnh);
572 senderr(ESRCH);
573 }
574#ifdef RADIX_MPATH
575 /*
576 * for RTM_CHANGE/LOCK, if we got multipath routes,
577 * we require users to specify a matching RTAX_GATEWAY.
578 *
579 * for RTM_GET, gate is optional even with multipath.
580 * if gate == NULL the first match is returned.
581 * (no need to call rt_mpath_matchgate if gate == NULL)
582 */
583 if (rn_mpath_capable(rnh) &&
584 (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
585 rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
586 if (!rt) {
587 RADIX_NODE_HEAD_RUNLOCK(rnh);
588 senderr(ESRCH);
589 }
590 }
591#endif
592 RT_LOCK(rt);
593 RT_ADDREF(rt);
594 RADIX_NODE_HEAD_RUNLOCK(rnh);
595
596 /*
597 * Fix for PR: 82974
598 *
599 * RTM_CHANGE/LOCK need a perfect match, rn_lookup()
600 * returns a perfect match in case a netmask is
601 * specified. For host routes only a longest prefix
602 * match is returned so it is necessary to compare the
603 * existence of the netmask. If both have a netmask
604 * rnh_lookup() did a perfect match and if none of them
605 * have a netmask both are host routes which is also a
606 * perfect match.
607 */
608
609 if (rtm->rtm_type != RTM_GET &&
610 (!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) {
611 RT_UNLOCK(rt);
612 senderr(ESRCH);
613 }
614
615 switch(rtm->rtm_type) {
616
617 case RTM_GET:
618 report:
619 RT_LOCK_ASSERT(rt);
620 if ((rt->rt_flags & RTF_HOST) == 0
621 ? jailed(curthread->td_ucred)
622 : prison_if(curthread->td_ucred,
623 rt_key(rt)) != 0) {
624 RT_UNLOCK(rt);
625 senderr(ESRCH);
626 }
627 info.rti_info[RTAX_DST] = rt_key(rt);
628 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
629 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
630 info.rti_info[RTAX_GENMASK] = 0;
631 if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
632 ifp = rt->rt_ifp;
633 if (ifp) {
634 info.rti_info[RTAX_IFP] =
635 ifp->if_addr->ifa_addr;
636 error = rtm_get_jailed(&info, ifp, rt,
637 &saun, curthread->td_ucred);
638 if (error != 0) {
639 RT_UNLOCK(rt);
640 senderr(error);
641 }
642 if (ifp->if_flags & IFF_POINTOPOINT)
643 info.rti_info[RTAX_BRD] =
644 rt->rt_ifa->ifa_dstaddr;
645 rtm->rtm_index = ifp->if_index;
646 } else {
647 info.rti_info[RTAX_IFP] = NULL;
648 info.rti_info[RTAX_IFA] = NULL;
649 }
650 } else if ((ifp = rt->rt_ifp) != NULL) {
651 rtm->rtm_index = ifp->if_index;
652 }
653 len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
654 if (len > rtm->rtm_msglen) {
655 struct rt_msghdr *new_rtm;
656 R_Malloc(new_rtm, struct rt_msghdr *, len);
657 if (new_rtm == NULL) {
658 RT_UNLOCK(rt);
659 senderr(ENOBUFS);
660 }
661 bcopy(rtm, new_rtm, rtm->rtm_msglen);
662 Free(rtm); rtm = new_rtm;
663 }
664 (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
665 rtm->rtm_flags = rt->rt_flags;
666 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
667 rtm->rtm_addrs = info.rti_addrs;
668 break;
669
670 case RTM_CHANGE:
671 /*
672 * New gateway could require new ifaddr, ifp;
673 * flags may also be different; ifp may be specified
674 * by ll sockaddr when protocol address is ambiguous
675 */
676 if (((rt->rt_flags & RTF_GATEWAY) &&
677 info.rti_info[RTAX_GATEWAY] != NULL) ||
678 info.rti_info[RTAX_IFP] != NULL ||
679 (info.rti_info[RTAX_IFA] != NULL &&
680 !sa_equal(info.rti_info[RTAX_IFA],
681 rt->rt_ifa->ifa_addr))) {
682 RT_UNLOCK(rt);
683 RADIX_NODE_HEAD_LOCK(rnh);
684 error = rt_getifa_fib(&info, rt->rt_fibnum);
685 RADIX_NODE_HEAD_UNLOCK(rnh);
686 if (error != 0)
687 senderr(error);
688 RT_LOCK(rt);
689 }
690 if (info.rti_ifa != NULL &&
691 info.rti_ifa != rt->rt_ifa &&
692 rt->rt_ifa != NULL &&
693 rt->rt_ifa->ifa_rtrequest != NULL) {
694 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
695 &info);
696 IFAFREE(rt->rt_ifa);
697 }
698 if (info.rti_info[RTAX_GATEWAY] != NULL) {
699 RT_UNLOCK(rt);
700 RADIX_NODE_HEAD_LOCK(rnh);
701 RT_LOCK(rt);
702
703 error = rt_setgate(rt, rt_key(rt),
704 info.rti_info[RTAX_GATEWAY]);
705 RADIX_NODE_HEAD_UNLOCK(rnh);
706 if (error != 0) {
707 RT_UNLOCK(rt);
708 senderr(error);
709 }
710 rt->rt_flags |= RTF_GATEWAY;
711 }
712 if (info.rti_ifa != NULL &&
713 info.rti_ifa != rt->rt_ifa) {
714 IFAREF(info.rti_ifa);
715 rt->rt_ifa = info.rti_ifa;
716 rt->rt_ifp = info.rti_ifp;
717 }
718 /* Allow some flags to be toggled on change. */
719 rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) |
720 (rtm->rtm_flags & RTF_FMASK);
721 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
722 &rt->rt_rmx);
723 rtm->rtm_index = rt->rt_ifp->if_index;
724 if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
725 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
726 /* FALLTHROUGH */
727 case RTM_LOCK:
728 /* We don't support locks anymore */
729 break;
730 }
731 RT_UNLOCK(rt);
732 break;
733
734 default:
735 senderr(EOPNOTSUPP);
736 }
737
738flush:
739 if (rtm) {
740 if (error)
741 rtm->rtm_errno = error;
742 else
743 rtm->rtm_flags |= RTF_DONE;
744 }
745 if (rt) /* XXX can this be true? */
746 RTFREE(rt);
747 {
748 struct rawcb *rp = NULL;
749 /*
750 * Check to see if we don't want our own messages.
751 */
752 if ((so->so_options & SO_USELOOPBACK) == 0) {
753 if (route_cb.any_count <= 1) {
754 if (rtm)
755 Free(rtm);
756 m_freem(m);
757 return (error);
758 }
759 /* There is another listener, so construct message */
760 rp = sotorawcb(so);
761 }
762 if (rtm) {
763 m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
764 if (m->m_pkthdr.len < rtm->rtm_msglen) {
765 m_freem(m);
766 m = NULL;
767 } else if (m->m_pkthdr.len > rtm->rtm_msglen)
768 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
769 Free(rtm);
770 }
771 if (m) {
772 if (rp) {
773 /*
774 * XXX insure we don't get a copy by
775 * invalidating our protocol
776 */
777 unsigned short family = rp->rcb_proto.sp_family;
778 rp->rcb_proto.sp_family = 0;
779 rt_dispatch(m, info.rti_info[RTAX_DST]);
780 rp->rcb_proto.sp_family = family;
781 } else
782 rt_dispatch(m, info.rti_info[RTAX_DST]);
783 }
784 }
785 return (error);
786#undef sa_equal
787}
788
789static void
790rt_setmetrics(u_long which, const struct rt_metrics *in,
791 struct rt_metrics_lite *out)
792{
793#define metric(f, e) if (which & (f)) out->e = in->e;
794 /*
795 * Only these are stored in the routing entry since introduction
796 * of tcp hostcache. The rest is ignored.
797 */
798 metric(RTV_MTU, rmx_mtu);
799 metric(RTV_WEIGHT, rmx_weight);
800 /* Userland -> kernel timebase conversion. */
801 if (which & RTV_EXPIRE)
802 out->rmx_expire = in->rmx_expire ?
803 in->rmx_expire - time_second + time_uptime : 0;
804#undef metric
805}
806
807static void
808rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
809{
810#define metric(e) out->e = in->e;
811 bzero(out, sizeof(*out));
812 metric(rmx_mtu);
813 metric(rmx_weight);
814 /* Kernel -> userland timebase conversion. */
815 out->rmx_expire = in->rmx_expire ?
816 in->rmx_expire - time_uptime + time_second : 0;
817#undef metric
818}
819
820/*
821 * Extract the addresses of the passed sockaddrs.
822 * Do a little sanity checking so as to avoid bad memory references.
823 * This data is derived straight from userland.
824 */
825static int
826rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
827{
828 struct sockaddr *sa;
829 int i;
830
831 for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
832 if ((rtinfo->rti_addrs & (1 << i)) == 0)
833 continue;
834 sa = (struct sockaddr *)cp;
835 /*
836 * It won't fit.
837 */
838 if (cp + sa->sa_len > cplim)
839 return (EINVAL);
840 /*
841 * there are no more.. quit now
842 * If there are more bits, they are in error.
843 * I've seen this. route(1) can evidently generate these.
844 * This causes kernel to core dump.
845 * for compatibility, If we see this, point to a safe address.
846 */
847 if (sa->sa_len == 0) {
848 rtinfo->rti_info[i] = &sa_zero;
849 return (0); /* should be EINVAL but for compat */
850 }
851 /* accept it */
852 rtinfo->rti_info[i] = sa;
853 cp += SA_SIZE(sa);
854 }
855 return (0);
856}
857
858static struct mbuf *
859rt_msg1(int type, struct rt_addrinfo *rtinfo)
860{
861 struct rt_msghdr *rtm;
862 struct mbuf *m;
863 int i;
864 struct sockaddr *sa;
865 int len, dlen;
866
867 switch (type) {
868
869 case RTM_DELADDR:
870 case RTM_NEWADDR:
871 len = sizeof(struct ifa_msghdr);
872 break;
873
874 case RTM_DELMADDR:
875 case RTM_NEWMADDR:
876 len = sizeof(struct ifma_msghdr);
877 break;
878
879 case RTM_IFINFO:
880 len = sizeof(struct if_msghdr);
881 break;
882
883 case RTM_IFANNOUNCE:
884 case RTM_IEEE80211:
885 len = sizeof(struct if_announcemsghdr);
886 break;
887
888 default:
889 len = sizeof(struct rt_msghdr);
890 }
891 if (len > MCLBYTES)
892 panic("rt_msg1");
893 m = m_gethdr(M_DONTWAIT, MT_DATA);
894 if (m && len > MHLEN) {
895 MCLGET(m, M_DONTWAIT);
896 if ((m->m_flags & M_EXT) == 0) {
897 m_free(m);
898 m = NULL;
899 }
900 }
901 if (m == NULL)
902 return (m);
903 m->m_pkthdr.len = m->m_len = len;
904 m->m_pkthdr.rcvif = NULL;
905 rtm = mtod(m, struct rt_msghdr *);
906 bzero((caddr_t)rtm, len);
907 for (i = 0; i < RTAX_MAX; i++) {
908 if ((sa = rtinfo->rti_info[i]) == NULL)
909 continue;
910 rtinfo->rti_addrs |= (1 << i);
911 dlen = SA_SIZE(sa);
912 m_copyback(m, len, dlen, (caddr_t)sa);
913 len += dlen;
914 }
915 if (m->m_pkthdr.len != len) {
916 m_freem(m);
917 return (NULL);
918 }
919 rtm->rtm_msglen = len;
920 rtm->rtm_version = RTM_VERSION;
921 rtm->rtm_type = type;
922 return (m);
923}
924
925static int
926rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
927{
928 int i;
929 int len, dlen, second_time = 0;
930 caddr_t cp0;
931
932 rtinfo->rti_addrs = 0;
933again:
934 switch (type) {
935
936 case RTM_DELADDR:
937 case RTM_NEWADDR:
938 len = sizeof(struct ifa_msghdr);
939 break;
940
941 case RTM_IFINFO:
942 len = sizeof(struct if_msghdr);
943 break;
944
945 case RTM_NEWMADDR:
946 len = sizeof(struct ifma_msghdr);
947 break;
948
949 default:
950 len = sizeof(struct rt_msghdr);
951 }
952 cp0 = cp;
953 if (cp0)
954 cp += len;
955 for (i = 0; i < RTAX_MAX; i++) {
956 struct sockaddr *sa;
957
958 if ((sa = rtinfo->rti_info[i]) == NULL)
959 continue;
960 rtinfo->rti_addrs |= (1 << i);
961 dlen = SA_SIZE(sa);
962 if (cp) {
963 bcopy((caddr_t)sa, cp, (unsigned)dlen);
964 cp += dlen;
965 }
966 len += dlen;
967 }
968 len = ALIGN(len);
969 if (cp == NULL && w != NULL && !second_time) {
970 struct walkarg *rw = w;
971
972 if (rw->w_req) {
973 if (rw->w_tmemsize < len) {
974 if (rw->w_tmem)
975 free(rw->w_tmem, M_RTABLE);
976 rw->w_tmem = (caddr_t)
977 malloc(len, M_RTABLE, M_NOWAIT);
978 if (rw->w_tmem)
979 rw->w_tmemsize = len;
980 }
981 if (rw->w_tmem) {
982 cp = rw->w_tmem;
983 second_time = 1;
984 goto again;
985 }
986 }
987 }
988 if (cp) {
989 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
990
991 rtm->rtm_version = RTM_VERSION;
992 rtm->rtm_type = type;
993 rtm->rtm_msglen = len;
994 }
995 return (len);
996}
997
998/*
999 * This routine is called to generate a message from the routing
1000 * socket indicating that a redirect has occured, a routing lookup
1001 * has failed, or that a protocol has detected timeouts to a particular
1002 * destination.
1003 */
1004void
1005rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
1006{
1007 struct rt_msghdr *rtm;
1008 struct mbuf *m;
1009 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1010
1011 if (route_cb.any_count == 0)
1012 return;
1013 m = rt_msg1(type, rtinfo);
1014 if (m == NULL)
1015 return;
1016 rtm = mtod(m, struct rt_msghdr *);
1017 rtm->rtm_flags = RTF_DONE | flags;
1018 rtm->rtm_errno = error;
1019 rtm->rtm_addrs = rtinfo->rti_addrs;
1020 rt_dispatch(m, sa);
1021}
1022
1023/*
1024 * This routine is called to generate a message from the routing
1025 * socket indicating that the status of a network interface has changed.
1026 */
1027void
1028rt_ifmsg(struct ifnet *ifp)
1029{
1030 struct if_msghdr *ifm;
1031 struct mbuf *m;
1032 struct rt_addrinfo info;
1033
1034 if (route_cb.any_count == 0)
1035 return;
1036 bzero((caddr_t)&info, sizeof(info));
1037 m = rt_msg1(RTM_IFINFO, &info);
1038 if (m == NULL)
1039 return;
1040 ifm = mtod(m, struct if_msghdr *);
1041 ifm->ifm_index = ifp->if_index;
1042 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1043 ifm->ifm_data = ifp->if_data;
1044 ifm->ifm_addrs = 0;
1045 rt_dispatch(m, NULL);
1046}
1047
1048/*
1049 * This is called to generate messages from the routing socket
1050 * indicating a network interface has had addresses associated with it.
1051 * if we ever reverse the logic and replace messages TO the routing
1052 * socket indicate a request to configure interfaces, then it will
1053 * be unnecessary as the routing socket will automatically generate
1054 * copies of it.
1055 */
1056void
1057rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1058{
1059 struct rt_addrinfo info;
1060 struct sockaddr *sa = NULL;
1061 int pass;
1062 struct mbuf *m = NULL;
1063 struct ifnet *ifp = ifa->ifa_ifp;
1064
1065 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
1066 ("unexpected cmd %u", cmd));
1067#ifdef SCTP
1068 /*
1069 * notify the SCTP stack
1070 * this will only get called when an address is added/deleted
1071 * XXX pass the ifaddr struct instead if ifa->ifa_addr...
1072 */
1073 sctp_addr_change(ifa, cmd);
1074#endif /* SCTP */
1075 if (route_cb.any_count == 0)
1076 return;
1077 for (pass = 1; pass < 3; pass++) {
1078 bzero((caddr_t)&info, sizeof(info));
1079 if ((cmd == RTM_ADD && pass == 1) ||
1080 (cmd == RTM_DELETE && pass == 2)) {
1081 struct ifa_msghdr *ifam;
1082 int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1083
1084 info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1085 info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
1086 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1087 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1088 if ((m = rt_msg1(ncmd, &info)) == NULL)
1089 continue;
1090 ifam = mtod(m, struct ifa_msghdr *);
1091 ifam->ifam_index = ifp->if_index;
1092 ifam->ifam_metric = ifa->ifa_metric;
1093 ifam->ifam_flags = ifa->ifa_flags;
1094 ifam->ifam_addrs = info.rti_addrs;
1095 }
1096 if ((cmd == RTM_ADD && pass == 2) ||
1097 (cmd == RTM_DELETE && pass == 1)) {
1098 struct rt_msghdr *rtm;
1099
1100 if (rt == NULL)
1101 continue;
1102 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1103 info.rti_info[RTAX_DST] = sa = rt_key(rt);
1104 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1105 if ((m = rt_msg1(cmd, &info)) == NULL)
1106 continue;
1107 rtm = mtod(m, struct rt_msghdr *);
1108 rtm->rtm_index = ifp->if_index;
1109 rtm->rtm_flags |= rt->rt_flags;
1110 rtm->rtm_errno = error;
1111 rtm->rtm_addrs = info.rti_addrs;
1112 }
1113 rt_dispatch(m, sa);
1114 }
1115}
1116
1117/*
1118 * This is the analogue to the rt_newaddrmsg which performs the same
1119 * function but for multicast group memberhips. This is easier since
1120 * there is no route state to worry about.
1121 */
1122void
1123rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
1124{
1125 struct rt_addrinfo info;
1126 struct mbuf *m = NULL;
1127 struct ifnet *ifp = ifma->ifma_ifp;
1128 struct ifma_msghdr *ifmam;
1129
1130 if (route_cb.any_count == 0)
1131 return;
1132
1133 bzero((caddr_t)&info, sizeof(info));
1134 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1135 info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL;
1136 /*
1137 * If a link-layer address is present, present it as a ``gateway''
1138 * (similarly to how ARP entries, e.g., are presented).
1139 */
1140 info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
1141 m = rt_msg1(cmd, &info);
1142 if (m == NULL)
1143 return;
1144 ifmam = mtod(m, struct ifma_msghdr *);
1145 KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
1146 __func__));
1147 ifmam->ifmam_index = ifp->if_index;
1148 ifmam->ifmam_addrs = info.rti_addrs;
1149 rt_dispatch(m, ifma->ifma_addr);
1150}
1151
1152static struct mbuf *
1153rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
1154 struct rt_addrinfo *info)
1155{
1156 struct if_announcemsghdr *ifan;
1157 struct mbuf *m;
1158
1159 if (route_cb.any_count == 0)
1160 return NULL;
1161 bzero((caddr_t)info, sizeof(*info));
1162 m = rt_msg1(type, info);
1163 if (m != NULL) {
1164 ifan = mtod(m, struct if_announcemsghdr *);
1165 ifan->ifan_index = ifp->if_index;
1166 strlcpy(ifan->ifan_name, ifp->if_xname,
1167 sizeof(ifan->ifan_name));
1168 ifan->ifan_what = what;
1169 }
1170 return m;
1171}
1172
1173/*
1174 * This is called to generate routing socket messages indicating
1175 * IEEE80211 wireless events.
1176 * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
1177 */
1178void
1179rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
1180{
1181 struct mbuf *m;
1182 struct rt_addrinfo info;
1183
1184 m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
1185 if (m != NULL) {
1186 /*
1187 * Append the ieee80211 data. Try to stick it in the
1188 * mbuf containing the ifannounce msg; otherwise allocate
1189 * a new mbuf and append.
1190 *
1191 * NB: we assume m is a single mbuf.
1192 */
1193 if (data_len > M_TRAILINGSPACE(m)) {
1194 struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
1195 if (n == NULL) {
1196 m_freem(m);
1197 return;
1198 }
1199 bcopy(data, mtod(n, void *), data_len);
1200 n->m_len = data_len;
1201 m->m_next = n;
1202 } else if (data_len > 0) {
1203 bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
1204 m->m_len += data_len;
1205 }
1206 if (m->m_flags & M_PKTHDR)
1207 m->m_pkthdr.len += data_len;
1208 mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
1209 rt_dispatch(m, NULL);
1210 }
1211}
1212
1213/*
1214 * This is called to generate routing socket messages indicating
1215 * network interface arrival and departure.
1216 */
1217void
1218rt_ifannouncemsg(struct ifnet *ifp, int what)
1219{
1220 struct mbuf *m;
1221 struct rt_addrinfo info;
1222
1223 m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
1224 if (m != NULL)
1225 rt_dispatch(m, NULL);
1226}
1227
1228static void
1229rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
1230{
1231 INIT_VNET_NET(curvnet);
1232 struct m_tag *tag;
1233
1234 /*
1235 * Preserve the family from the sockaddr, if any, in an m_tag for
1236 * use when injecting the mbuf into the routing socket buffer from
1237 * the netisr.
1238 */
1239 if (sa != NULL) {
1240 tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
1241 M_NOWAIT);
1242 if (tag == NULL) {
1243 m_freem(m);
1244 return;
1245 }
1246 *(unsigned short *)(tag + 1) = sa->sa_family;
1247 m_tag_prepend(m, tag);
1248 }
1249#ifdef VIMAGE
1250 if (V_loif)
1251 m->m_pkthdr.rcvif = V_loif;
1252 else {
1253 m_freem(m);
1254 return;
1255 }
1256#endif
1257 netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */
1258}
1259
1260/*
1261 * This is used in dumping the kernel table via sysctl().
1262 */
1263static int
1264sysctl_dumpentry(struct radix_node *rn, void *vw)
1265{
1266 struct walkarg *w = vw;
1267 struct rtentry *rt = (struct rtentry *)rn;
1268 int error = 0, size;
1269 struct rt_addrinfo info;
1270
1271 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1272 return 0;
1273 if ((rt->rt_flags & RTF_HOST) == 0
1274 ? jailed(w->w_req->td->td_ucred)
1275 : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0)
1276 return (0);
1277 bzero((caddr_t)&info, sizeof(info));
1278 info.rti_info[RTAX_DST] = rt_key(rt);
1279 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1280 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1281 info.rti_info[RTAX_GENMASK] = 0;
1282 if (rt->rt_ifp) {
1283 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
1284 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1285 if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1286 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1287 }
1288 size = rt_msg2(RTM_GET, &info, NULL, w);
1289 if (w->w_req && w->w_tmem) {
1290 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1291
1292 rtm->rtm_flags = rt->rt_flags;
1293 /*
1294 * let's be honest about this being a retarded hack
1295 */
1296 rtm->rtm_fmask = rt->rt_rmx.rmx_pksent;
1297 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1298 rtm->rtm_index = rt->rt_ifp->if_index;
1299 rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1300 rtm->rtm_addrs = info.rti_addrs;
1301 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1302 return (error);
1303 }
1304 return (error);
1305}
1306
1307static int
1308sysctl_iflist(int af, struct walkarg *w)
1309{
1310 INIT_VNET_NET(curvnet);
1311 struct ifnet *ifp;
1312 struct ifaddr *ifa;
1313 struct rt_addrinfo info;
1314 int len, error = 0;
1315
1316 bzero((caddr_t)&info, sizeof(info));
1317 IFNET_RLOCK();
1318 TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1319 if (w->w_arg && w->w_arg != ifp->if_index)
1320 continue;
1321 ifa = ifp->if_addr;
1322 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1323 len = rt_msg2(RTM_IFINFO, &info, NULL, w);
1324 info.rti_info[RTAX_IFP] = NULL;
1325 if (w->w_req && w->w_tmem) {
1326 struct if_msghdr *ifm;
1327
1328 ifm = (struct if_msghdr *)w->w_tmem;
1329 ifm->ifm_index = ifp->if_index;
1330 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1331 ifm->ifm_data = ifp->if_data;
1332 ifm->ifm_addrs = info.rti_addrs;
1333 error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
1334 if (error)
1335 goto done;
1336 }
1337 while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
1338 if (af && af != ifa->ifa_addr->sa_family)
1339 continue;
1340 if (prison_if(w->w_req->td->td_ucred,
1341 ifa->ifa_addr) != 0)
1342 continue;
1343 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1344 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1345 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1346 len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
1347 if (w->w_req && w->w_tmem) {
1348 struct ifa_msghdr *ifam;
1349
1350 ifam = (struct ifa_msghdr *)w->w_tmem;
1351 ifam->ifam_index = ifa->ifa_ifp->if_index;
1352 ifam->ifam_flags = ifa->ifa_flags;
1353 ifam->ifam_metric = ifa->ifa_metric;
1354 ifam->ifam_addrs = info.rti_addrs;
1355 error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1356 if (error)
1357 goto done;
1358 }
1359 }
1360 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1361 info.rti_info[RTAX_BRD] = NULL;
1362 }
1363done:
1364 IFNET_RUNLOCK();
1365 return (error);
1366}
1367
1368static int
1369sysctl_ifmalist(int af, struct walkarg *w)
1370{
1371 INIT_VNET_NET(curvnet);
1372 struct ifnet *ifp;
1373 struct ifmultiaddr *ifma;
1374 struct rt_addrinfo info;
1375 int len, error = 0;
1376 struct ifaddr *ifa;
1377
1378 bzero((caddr_t)&info, sizeof(info));
1379 IFNET_RLOCK();
1380 TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1381 if (w->w_arg && w->w_arg != ifp->if_index)
1382 continue;
1383 ifa = ifp->if_addr;
1384 info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
1385 IF_ADDR_LOCK(ifp);
1386 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1387 if (af && af != ifma->ifma_addr->sa_family)
1388 continue;
1389 if (prison_if(w->w_req->td->td_ucred,
1390 ifma->ifma_addr) != 0)
1391 continue;
1392 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1393 info.rti_info[RTAX_GATEWAY] =
1394 (ifma->ifma_addr->sa_family != AF_LINK) ?
1395 ifma->ifma_lladdr : NULL;
1396 len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
1397 if (w->w_req && w->w_tmem) {
1398 struct ifma_msghdr *ifmam;
1399
1400 ifmam = (struct ifma_msghdr *)w->w_tmem;
1401 ifmam->ifmam_index = ifma->ifma_ifp->if_index;
1402 ifmam->ifmam_flags = 0;
1403 ifmam->ifmam_addrs = info.rti_addrs;
1404 error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1405 if (error) {
1406 IF_ADDR_UNLOCK(ifp);
1407 goto done;
1408 }
1409 }
1410 }
1411 IF_ADDR_UNLOCK(ifp);
1412 }
1413done:
1414 IFNET_RUNLOCK();
1415 return (error);
1416}
1417
1418static int
1419sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1420{
565 if (rnh == NULL)
566 senderr(EAFNOSUPPORT);
567 RADIX_NODE_HEAD_RLOCK(rnh);
568 rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
569 info.rti_info[RTAX_NETMASK], rnh);
570 if (rt == NULL) { /* XXX looks bogus */
571 RADIX_NODE_HEAD_RUNLOCK(rnh);
572 senderr(ESRCH);
573 }
574#ifdef RADIX_MPATH
575 /*
576 * for RTM_CHANGE/LOCK, if we got multipath routes,
577 * we require users to specify a matching RTAX_GATEWAY.
578 *
579 * for RTM_GET, gate is optional even with multipath.
580 * if gate == NULL the first match is returned.
581 * (no need to call rt_mpath_matchgate if gate == NULL)
582 */
583 if (rn_mpath_capable(rnh) &&
584 (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
585 rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
586 if (!rt) {
587 RADIX_NODE_HEAD_RUNLOCK(rnh);
588 senderr(ESRCH);
589 }
590 }
591#endif
592 RT_LOCK(rt);
593 RT_ADDREF(rt);
594 RADIX_NODE_HEAD_RUNLOCK(rnh);
595
596 /*
597 * Fix for PR: 82974
598 *
599 * RTM_CHANGE/LOCK need a perfect match, rn_lookup()
600 * returns a perfect match in case a netmask is
601 * specified. For host routes only a longest prefix
602 * match is returned so it is necessary to compare the
603 * existence of the netmask. If both have a netmask
604 * rnh_lookup() did a perfect match and if none of them
605 * have a netmask both are host routes which is also a
606 * perfect match.
607 */
608
609 if (rtm->rtm_type != RTM_GET &&
610 (!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) {
611 RT_UNLOCK(rt);
612 senderr(ESRCH);
613 }
614
615 switch(rtm->rtm_type) {
616
617 case RTM_GET:
618 report:
619 RT_LOCK_ASSERT(rt);
620 if ((rt->rt_flags & RTF_HOST) == 0
621 ? jailed(curthread->td_ucred)
622 : prison_if(curthread->td_ucred,
623 rt_key(rt)) != 0) {
624 RT_UNLOCK(rt);
625 senderr(ESRCH);
626 }
627 info.rti_info[RTAX_DST] = rt_key(rt);
628 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
629 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
630 info.rti_info[RTAX_GENMASK] = 0;
631 if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
632 ifp = rt->rt_ifp;
633 if (ifp) {
634 info.rti_info[RTAX_IFP] =
635 ifp->if_addr->ifa_addr;
636 error = rtm_get_jailed(&info, ifp, rt,
637 &saun, curthread->td_ucred);
638 if (error != 0) {
639 RT_UNLOCK(rt);
640 senderr(error);
641 }
642 if (ifp->if_flags & IFF_POINTOPOINT)
643 info.rti_info[RTAX_BRD] =
644 rt->rt_ifa->ifa_dstaddr;
645 rtm->rtm_index = ifp->if_index;
646 } else {
647 info.rti_info[RTAX_IFP] = NULL;
648 info.rti_info[RTAX_IFA] = NULL;
649 }
650 } else if ((ifp = rt->rt_ifp) != NULL) {
651 rtm->rtm_index = ifp->if_index;
652 }
653 len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
654 if (len > rtm->rtm_msglen) {
655 struct rt_msghdr *new_rtm;
656 R_Malloc(new_rtm, struct rt_msghdr *, len);
657 if (new_rtm == NULL) {
658 RT_UNLOCK(rt);
659 senderr(ENOBUFS);
660 }
661 bcopy(rtm, new_rtm, rtm->rtm_msglen);
662 Free(rtm); rtm = new_rtm;
663 }
664 (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
665 rtm->rtm_flags = rt->rt_flags;
666 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
667 rtm->rtm_addrs = info.rti_addrs;
668 break;
669
670 case RTM_CHANGE:
671 /*
672 * New gateway could require new ifaddr, ifp;
673 * flags may also be different; ifp may be specified
674 * by ll sockaddr when protocol address is ambiguous
675 */
676 if (((rt->rt_flags & RTF_GATEWAY) &&
677 info.rti_info[RTAX_GATEWAY] != NULL) ||
678 info.rti_info[RTAX_IFP] != NULL ||
679 (info.rti_info[RTAX_IFA] != NULL &&
680 !sa_equal(info.rti_info[RTAX_IFA],
681 rt->rt_ifa->ifa_addr))) {
682 RT_UNLOCK(rt);
683 RADIX_NODE_HEAD_LOCK(rnh);
684 error = rt_getifa_fib(&info, rt->rt_fibnum);
685 RADIX_NODE_HEAD_UNLOCK(rnh);
686 if (error != 0)
687 senderr(error);
688 RT_LOCK(rt);
689 }
690 if (info.rti_ifa != NULL &&
691 info.rti_ifa != rt->rt_ifa &&
692 rt->rt_ifa != NULL &&
693 rt->rt_ifa->ifa_rtrequest != NULL) {
694 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
695 &info);
696 IFAFREE(rt->rt_ifa);
697 }
698 if (info.rti_info[RTAX_GATEWAY] != NULL) {
699 RT_UNLOCK(rt);
700 RADIX_NODE_HEAD_LOCK(rnh);
701 RT_LOCK(rt);
702
703 error = rt_setgate(rt, rt_key(rt),
704 info.rti_info[RTAX_GATEWAY]);
705 RADIX_NODE_HEAD_UNLOCK(rnh);
706 if (error != 0) {
707 RT_UNLOCK(rt);
708 senderr(error);
709 }
710 rt->rt_flags |= RTF_GATEWAY;
711 }
712 if (info.rti_ifa != NULL &&
713 info.rti_ifa != rt->rt_ifa) {
714 IFAREF(info.rti_ifa);
715 rt->rt_ifa = info.rti_ifa;
716 rt->rt_ifp = info.rti_ifp;
717 }
718 /* Allow some flags to be toggled on change. */
719 rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) |
720 (rtm->rtm_flags & RTF_FMASK);
721 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
722 &rt->rt_rmx);
723 rtm->rtm_index = rt->rt_ifp->if_index;
724 if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
725 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
726 /* FALLTHROUGH */
727 case RTM_LOCK:
728 /* We don't support locks anymore */
729 break;
730 }
731 RT_UNLOCK(rt);
732 break;
733
734 default:
735 senderr(EOPNOTSUPP);
736 }
737
738flush:
739 if (rtm) {
740 if (error)
741 rtm->rtm_errno = error;
742 else
743 rtm->rtm_flags |= RTF_DONE;
744 }
745 if (rt) /* XXX can this be true? */
746 RTFREE(rt);
747 {
748 struct rawcb *rp = NULL;
749 /*
750 * Check to see if we don't want our own messages.
751 */
752 if ((so->so_options & SO_USELOOPBACK) == 0) {
753 if (route_cb.any_count <= 1) {
754 if (rtm)
755 Free(rtm);
756 m_freem(m);
757 return (error);
758 }
759 /* There is another listener, so construct message */
760 rp = sotorawcb(so);
761 }
762 if (rtm) {
763 m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
764 if (m->m_pkthdr.len < rtm->rtm_msglen) {
765 m_freem(m);
766 m = NULL;
767 } else if (m->m_pkthdr.len > rtm->rtm_msglen)
768 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
769 Free(rtm);
770 }
771 if (m) {
772 if (rp) {
773 /*
774 * XXX insure we don't get a copy by
775 * invalidating our protocol
776 */
777 unsigned short family = rp->rcb_proto.sp_family;
778 rp->rcb_proto.sp_family = 0;
779 rt_dispatch(m, info.rti_info[RTAX_DST]);
780 rp->rcb_proto.sp_family = family;
781 } else
782 rt_dispatch(m, info.rti_info[RTAX_DST]);
783 }
784 }
785 return (error);
786#undef sa_equal
787}
788
789static void
790rt_setmetrics(u_long which, const struct rt_metrics *in,
791 struct rt_metrics_lite *out)
792{
793#define metric(f, e) if (which & (f)) out->e = in->e;
794 /*
795 * Only these are stored in the routing entry since introduction
796 * of tcp hostcache. The rest is ignored.
797 */
798 metric(RTV_MTU, rmx_mtu);
799 metric(RTV_WEIGHT, rmx_weight);
800 /* Userland -> kernel timebase conversion. */
801 if (which & RTV_EXPIRE)
802 out->rmx_expire = in->rmx_expire ?
803 in->rmx_expire - time_second + time_uptime : 0;
804#undef metric
805}
806
807static void
808rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
809{
810#define metric(e) out->e = in->e;
811 bzero(out, sizeof(*out));
812 metric(rmx_mtu);
813 metric(rmx_weight);
814 /* Kernel -> userland timebase conversion. */
815 out->rmx_expire = in->rmx_expire ?
816 in->rmx_expire - time_uptime + time_second : 0;
817#undef metric
818}
819
820/*
821 * Extract the addresses of the passed sockaddrs.
822 * Do a little sanity checking so as to avoid bad memory references.
823 * This data is derived straight from userland.
824 */
825static int
826rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
827{
828 struct sockaddr *sa;
829 int i;
830
831 for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
832 if ((rtinfo->rti_addrs & (1 << i)) == 0)
833 continue;
834 sa = (struct sockaddr *)cp;
835 /*
836 * It won't fit.
837 */
838 if (cp + sa->sa_len > cplim)
839 return (EINVAL);
840 /*
841 * there are no more.. quit now
842 * If there are more bits, they are in error.
843 * I've seen this. route(1) can evidently generate these.
844 * This causes kernel to core dump.
845 * for compatibility, If we see this, point to a safe address.
846 */
847 if (sa->sa_len == 0) {
848 rtinfo->rti_info[i] = &sa_zero;
849 return (0); /* should be EINVAL but for compat */
850 }
851 /* accept it */
852 rtinfo->rti_info[i] = sa;
853 cp += SA_SIZE(sa);
854 }
855 return (0);
856}
857
858static struct mbuf *
859rt_msg1(int type, struct rt_addrinfo *rtinfo)
860{
861 struct rt_msghdr *rtm;
862 struct mbuf *m;
863 int i;
864 struct sockaddr *sa;
865 int len, dlen;
866
867 switch (type) {
868
869 case RTM_DELADDR:
870 case RTM_NEWADDR:
871 len = sizeof(struct ifa_msghdr);
872 break;
873
874 case RTM_DELMADDR:
875 case RTM_NEWMADDR:
876 len = sizeof(struct ifma_msghdr);
877 break;
878
879 case RTM_IFINFO:
880 len = sizeof(struct if_msghdr);
881 break;
882
883 case RTM_IFANNOUNCE:
884 case RTM_IEEE80211:
885 len = sizeof(struct if_announcemsghdr);
886 break;
887
888 default:
889 len = sizeof(struct rt_msghdr);
890 }
891 if (len > MCLBYTES)
892 panic("rt_msg1");
893 m = m_gethdr(M_DONTWAIT, MT_DATA);
894 if (m && len > MHLEN) {
895 MCLGET(m, M_DONTWAIT);
896 if ((m->m_flags & M_EXT) == 0) {
897 m_free(m);
898 m = NULL;
899 }
900 }
901 if (m == NULL)
902 return (m);
903 m->m_pkthdr.len = m->m_len = len;
904 m->m_pkthdr.rcvif = NULL;
905 rtm = mtod(m, struct rt_msghdr *);
906 bzero((caddr_t)rtm, len);
907 for (i = 0; i < RTAX_MAX; i++) {
908 if ((sa = rtinfo->rti_info[i]) == NULL)
909 continue;
910 rtinfo->rti_addrs |= (1 << i);
911 dlen = SA_SIZE(sa);
912 m_copyback(m, len, dlen, (caddr_t)sa);
913 len += dlen;
914 }
915 if (m->m_pkthdr.len != len) {
916 m_freem(m);
917 return (NULL);
918 }
919 rtm->rtm_msglen = len;
920 rtm->rtm_version = RTM_VERSION;
921 rtm->rtm_type = type;
922 return (m);
923}
924
925static int
926rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
927{
928 int i;
929 int len, dlen, second_time = 0;
930 caddr_t cp0;
931
932 rtinfo->rti_addrs = 0;
933again:
934 switch (type) {
935
936 case RTM_DELADDR:
937 case RTM_NEWADDR:
938 len = sizeof(struct ifa_msghdr);
939 break;
940
941 case RTM_IFINFO:
942 len = sizeof(struct if_msghdr);
943 break;
944
945 case RTM_NEWMADDR:
946 len = sizeof(struct ifma_msghdr);
947 break;
948
949 default:
950 len = sizeof(struct rt_msghdr);
951 }
952 cp0 = cp;
953 if (cp0)
954 cp += len;
955 for (i = 0; i < RTAX_MAX; i++) {
956 struct sockaddr *sa;
957
958 if ((sa = rtinfo->rti_info[i]) == NULL)
959 continue;
960 rtinfo->rti_addrs |= (1 << i);
961 dlen = SA_SIZE(sa);
962 if (cp) {
963 bcopy((caddr_t)sa, cp, (unsigned)dlen);
964 cp += dlen;
965 }
966 len += dlen;
967 }
968 len = ALIGN(len);
969 if (cp == NULL && w != NULL && !second_time) {
970 struct walkarg *rw = w;
971
972 if (rw->w_req) {
973 if (rw->w_tmemsize < len) {
974 if (rw->w_tmem)
975 free(rw->w_tmem, M_RTABLE);
976 rw->w_tmem = (caddr_t)
977 malloc(len, M_RTABLE, M_NOWAIT);
978 if (rw->w_tmem)
979 rw->w_tmemsize = len;
980 }
981 if (rw->w_tmem) {
982 cp = rw->w_tmem;
983 second_time = 1;
984 goto again;
985 }
986 }
987 }
988 if (cp) {
989 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
990
991 rtm->rtm_version = RTM_VERSION;
992 rtm->rtm_type = type;
993 rtm->rtm_msglen = len;
994 }
995 return (len);
996}
997
998/*
999 * This routine is called to generate a message from the routing
1000 * socket indicating that a redirect has occured, a routing lookup
1001 * has failed, or that a protocol has detected timeouts to a particular
1002 * destination.
1003 */
1004void
1005rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
1006{
1007 struct rt_msghdr *rtm;
1008 struct mbuf *m;
1009 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1010
1011 if (route_cb.any_count == 0)
1012 return;
1013 m = rt_msg1(type, rtinfo);
1014 if (m == NULL)
1015 return;
1016 rtm = mtod(m, struct rt_msghdr *);
1017 rtm->rtm_flags = RTF_DONE | flags;
1018 rtm->rtm_errno = error;
1019 rtm->rtm_addrs = rtinfo->rti_addrs;
1020 rt_dispatch(m, sa);
1021}
1022
1023/*
1024 * This routine is called to generate a message from the routing
1025 * socket indicating that the status of a network interface has changed.
1026 */
1027void
1028rt_ifmsg(struct ifnet *ifp)
1029{
1030 struct if_msghdr *ifm;
1031 struct mbuf *m;
1032 struct rt_addrinfo info;
1033
1034 if (route_cb.any_count == 0)
1035 return;
1036 bzero((caddr_t)&info, sizeof(info));
1037 m = rt_msg1(RTM_IFINFO, &info);
1038 if (m == NULL)
1039 return;
1040 ifm = mtod(m, struct if_msghdr *);
1041 ifm->ifm_index = ifp->if_index;
1042 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1043 ifm->ifm_data = ifp->if_data;
1044 ifm->ifm_addrs = 0;
1045 rt_dispatch(m, NULL);
1046}
1047
1048/*
1049 * This is called to generate messages from the routing socket
1050 * indicating a network interface has had addresses associated with it.
1051 * if we ever reverse the logic and replace messages TO the routing
1052 * socket indicate a request to configure interfaces, then it will
1053 * be unnecessary as the routing socket will automatically generate
1054 * copies of it.
1055 */
1056void
1057rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1058{
1059 struct rt_addrinfo info;
1060 struct sockaddr *sa = NULL;
1061 int pass;
1062 struct mbuf *m = NULL;
1063 struct ifnet *ifp = ifa->ifa_ifp;
1064
1065 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
1066 ("unexpected cmd %u", cmd));
1067#ifdef SCTP
1068 /*
1069 * notify the SCTP stack
1070 * this will only get called when an address is added/deleted
1071 * XXX pass the ifaddr struct instead if ifa->ifa_addr...
1072 */
1073 sctp_addr_change(ifa, cmd);
1074#endif /* SCTP */
1075 if (route_cb.any_count == 0)
1076 return;
1077 for (pass = 1; pass < 3; pass++) {
1078 bzero((caddr_t)&info, sizeof(info));
1079 if ((cmd == RTM_ADD && pass == 1) ||
1080 (cmd == RTM_DELETE && pass == 2)) {
1081 struct ifa_msghdr *ifam;
1082 int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1083
1084 info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1085 info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
1086 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1087 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1088 if ((m = rt_msg1(ncmd, &info)) == NULL)
1089 continue;
1090 ifam = mtod(m, struct ifa_msghdr *);
1091 ifam->ifam_index = ifp->if_index;
1092 ifam->ifam_metric = ifa->ifa_metric;
1093 ifam->ifam_flags = ifa->ifa_flags;
1094 ifam->ifam_addrs = info.rti_addrs;
1095 }
1096 if ((cmd == RTM_ADD && pass == 2) ||
1097 (cmd == RTM_DELETE && pass == 1)) {
1098 struct rt_msghdr *rtm;
1099
1100 if (rt == NULL)
1101 continue;
1102 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1103 info.rti_info[RTAX_DST] = sa = rt_key(rt);
1104 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1105 if ((m = rt_msg1(cmd, &info)) == NULL)
1106 continue;
1107 rtm = mtod(m, struct rt_msghdr *);
1108 rtm->rtm_index = ifp->if_index;
1109 rtm->rtm_flags |= rt->rt_flags;
1110 rtm->rtm_errno = error;
1111 rtm->rtm_addrs = info.rti_addrs;
1112 }
1113 rt_dispatch(m, sa);
1114 }
1115}
1116
1117/*
1118 * This is the analogue to the rt_newaddrmsg which performs the same
1119 * function but for multicast group memberhips. This is easier since
1120 * there is no route state to worry about.
1121 */
1122void
1123rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
1124{
1125 struct rt_addrinfo info;
1126 struct mbuf *m = NULL;
1127 struct ifnet *ifp = ifma->ifma_ifp;
1128 struct ifma_msghdr *ifmam;
1129
1130 if (route_cb.any_count == 0)
1131 return;
1132
1133 bzero((caddr_t)&info, sizeof(info));
1134 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1135 info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL;
1136 /*
1137 * If a link-layer address is present, present it as a ``gateway''
1138 * (similarly to how ARP entries, e.g., are presented).
1139 */
1140 info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
1141 m = rt_msg1(cmd, &info);
1142 if (m == NULL)
1143 return;
1144 ifmam = mtod(m, struct ifma_msghdr *);
1145 KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
1146 __func__));
1147 ifmam->ifmam_index = ifp->if_index;
1148 ifmam->ifmam_addrs = info.rti_addrs;
1149 rt_dispatch(m, ifma->ifma_addr);
1150}
1151
1152static struct mbuf *
1153rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
1154 struct rt_addrinfo *info)
1155{
1156 struct if_announcemsghdr *ifan;
1157 struct mbuf *m;
1158
1159 if (route_cb.any_count == 0)
1160 return NULL;
1161 bzero((caddr_t)info, sizeof(*info));
1162 m = rt_msg1(type, info);
1163 if (m != NULL) {
1164 ifan = mtod(m, struct if_announcemsghdr *);
1165 ifan->ifan_index = ifp->if_index;
1166 strlcpy(ifan->ifan_name, ifp->if_xname,
1167 sizeof(ifan->ifan_name));
1168 ifan->ifan_what = what;
1169 }
1170 return m;
1171}
1172
1173/*
1174 * This is called to generate routing socket messages indicating
1175 * IEEE80211 wireless events.
1176 * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
1177 */
1178void
1179rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
1180{
1181 struct mbuf *m;
1182 struct rt_addrinfo info;
1183
1184 m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
1185 if (m != NULL) {
1186 /*
1187 * Append the ieee80211 data. Try to stick it in the
1188 * mbuf containing the ifannounce msg; otherwise allocate
1189 * a new mbuf and append.
1190 *
1191 * NB: we assume m is a single mbuf.
1192 */
1193 if (data_len > M_TRAILINGSPACE(m)) {
1194 struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
1195 if (n == NULL) {
1196 m_freem(m);
1197 return;
1198 }
1199 bcopy(data, mtod(n, void *), data_len);
1200 n->m_len = data_len;
1201 m->m_next = n;
1202 } else if (data_len > 0) {
1203 bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
1204 m->m_len += data_len;
1205 }
1206 if (m->m_flags & M_PKTHDR)
1207 m->m_pkthdr.len += data_len;
1208 mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
1209 rt_dispatch(m, NULL);
1210 }
1211}
1212
1213/*
1214 * This is called to generate routing socket messages indicating
1215 * network interface arrival and departure.
1216 */
1217void
1218rt_ifannouncemsg(struct ifnet *ifp, int what)
1219{
1220 struct mbuf *m;
1221 struct rt_addrinfo info;
1222
1223 m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
1224 if (m != NULL)
1225 rt_dispatch(m, NULL);
1226}
1227
1228static void
1229rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
1230{
1231 INIT_VNET_NET(curvnet);
1232 struct m_tag *tag;
1233
1234 /*
1235 * Preserve the family from the sockaddr, if any, in an m_tag for
1236 * use when injecting the mbuf into the routing socket buffer from
1237 * the netisr.
1238 */
1239 if (sa != NULL) {
1240 tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
1241 M_NOWAIT);
1242 if (tag == NULL) {
1243 m_freem(m);
1244 return;
1245 }
1246 *(unsigned short *)(tag + 1) = sa->sa_family;
1247 m_tag_prepend(m, tag);
1248 }
1249#ifdef VIMAGE
1250 if (V_loif)
1251 m->m_pkthdr.rcvif = V_loif;
1252 else {
1253 m_freem(m);
1254 return;
1255 }
1256#endif
1257 netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */
1258}
1259
1260/*
1261 * This is used in dumping the kernel table via sysctl().
1262 */
1263static int
1264sysctl_dumpentry(struct radix_node *rn, void *vw)
1265{
1266 struct walkarg *w = vw;
1267 struct rtentry *rt = (struct rtentry *)rn;
1268 int error = 0, size;
1269 struct rt_addrinfo info;
1270
1271 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1272 return 0;
1273 if ((rt->rt_flags & RTF_HOST) == 0
1274 ? jailed(w->w_req->td->td_ucred)
1275 : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0)
1276 return (0);
1277 bzero((caddr_t)&info, sizeof(info));
1278 info.rti_info[RTAX_DST] = rt_key(rt);
1279 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1280 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1281 info.rti_info[RTAX_GENMASK] = 0;
1282 if (rt->rt_ifp) {
1283 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
1284 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1285 if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1286 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1287 }
1288 size = rt_msg2(RTM_GET, &info, NULL, w);
1289 if (w->w_req && w->w_tmem) {
1290 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1291
1292 rtm->rtm_flags = rt->rt_flags;
1293 /*
1294 * let's be honest about this being a retarded hack
1295 */
1296 rtm->rtm_fmask = rt->rt_rmx.rmx_pksent;
1297 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1298 rtm->rtm_index = rt->rt_ifp->if_index;
1299 rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1300 rtm->rtm_addrs = info.rti_addrs;
1301 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1302 return (error);
1303 }
1304 return (error);
1305}
1306
1307static int
1308sysctl_iflist(int af, struct walkarg *w)
1309{
1310 INIT_VNET_NET(curvnet);
1311 struct ifnet *ifp;
1312 struct ifaddr *ifa;
1313 struct rt_addrinfo info;
1314 int len, error = 0;
1315
1316 bzero((caddr_t)&info, sizeof(info));
1317 IFNET_RLOCK();
1318 TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1319 if (w->w_arg && w->w_arg != ifp->if_index)
1320 continue;
1321 ifa = ifp->if_addr;
1322 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1323 len = rt_msg2(RTM_IFINFO, &info, NULL, w);
1324 info.rti_info[RTAX_IFP] = NULL;
1325 if (w->w_req && w->w_tmem) {
1326 struct if_msghdr *ifm;
1327
1328 ifm = (struct if_msghdr *)w->w_tmem;
1329 ifm->ifm_index = ifp->if_index;
1330 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1331 ifm->ifm_data = ifp->if_data;
1332 ifm->ifm_addrs = info.rti_addrs;
1333 error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
1334 if (error)
1335 goto done;
1336 }
1337 while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
1338 if (af && af != ifa->ifa_addr->sa_family)
1339 continue;
1340 if (prison_if(w->w_req->td->td_ucred,
1341 ifa->ifa_addr) != 0)
1342 continue;
1343 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1344 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1345 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1346 len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
1347 if (w->w_req && w->w_tmem) {
1348 struct ifa_msghdr *ifam;
1349
1350 ifam = (struct ifa_msghdr *)w->w_tmem;
1351 ifam->ifam_index = ifa->ifa_ifp->if_index;
1352 ifam->ifam_flags = ifa->ifa_flags;
1353 ifam->ifam_metric = ifa->ifa_metric;
1354 ifam->ifam_addrs = info.rti_addrs;
1355 error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1356 if (error)
1357 goto done;
1358 }
1359 }
1360 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1361 info.rti_info[RTAX_BRD] = NULL;
1362 }
1363done:
1364 IFNET_RUNLOCK();
1365 return (error);
1366}
1367
1368static int
1369sysctl_ifmalist(int af, struct walkarg *w)
1370{
1371 INIT_VNET_NET(curvnet);
1372 struct ifnet *ifp;
1373 struct ifmultiaddr *ifma;
1374 struct rt_addrinfo info;
1375 int len, error = 0;
1376 struct ifaddr *ifa;
1377
1378 bzero((caddr_t)&info, sizeof(info));
1379 IFNET_RLOCK();
1380 TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1381 if (w->w_arg && w->w_arg != ifp->if_index)
1382 continue;
1383 ifa = ifp->if_addr;
1384 info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
1385 IF_ADDR_LOCK(ifp);
1386 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1387 if (af && af != ifma->ifma_addr->sa_family)
1388 continue;
1389 if (prison_if(w->w_req->td->td_ucred,
1390 ifma->ifma_addr) != 0)
1391 continue;
1392 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1393 info.rti_info[RTAX_GATEWAY] =
1394 (ifma->ifma_addr->sa_family != AF_LINK) ?
1395 ifma->ifma_lladdr : NULL;
1396 len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
1397 if (w->w_req && w->w_tmem) {
1398 struct ifma_msghdr *ifmam;
1399
1400 ifmam = (struct ifma_msghdr *)w->w_tmem;
1401 ifmam->ifmam_index = ifma->ifma_ifp->if_index;
1402 ifmam->ifmam_flags = 0;
1403 ifmam->ifmam_addrs = info.rti_addrs;
1404 error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1405 if (error) {
1406 IF_ADDR_UNLOCK(ifp);
1407 goto done;
1408 }
1409 }
1410 }
1411 IF_ADDR_UNLOCK(ifp);
1412 }
1413done:
1414 IFNET_RUNLOCK();
1415 return (error);
1416}
1417
1418static int
1419sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1420{
1421 INIT_VNET_NET(curvnet);
1422 int *name = (int *)arg1;
1423 u_int namelen = arg2;
1421 int *name = (int *)arg1;
1422 u_int namelen = arg2;
1424 struct radix_node_head *rnh;
1423 struct radix_node_head *rnh = NULL; /* silence compiler. */
1425 int i, lim, error = EINVAL;
1426 u_char af;
1427 struct walkarg w;
1428
1429 name ++;
1430 namelen--;
1431 if (req->newptr)
1432 return (EPERM);
1433 if (namelen != 3)
1434 return ((namelen < 3) ? EISDIR : ENOTDIR);
1435 af = name[0];
1436 if (af > AF_MAX)
1437 return (EINVAL);
1438 bzero(&w, sizeof(w));
1439 w.w_op = name[1];
1440 w.w_arg = name[2];
1441 w.w_req = req;
1442
1443 error = sysctl_wire_old_buffer(req, 0);
1444 if (error)
1445 return (error);
1446 switch (w.w_op) {
1447
1448 case NET_RT_DUMP:
1449 case NET_RT_FLAGS:
1450 if (af == 0) { /* dump all tables */
1451 i = 1;
1452 lim = AF_MAX;
1453 } else /* dump only one table */
1454 i = lim = af;
1455
1456 /*
1457 * take care of llinfo entries, the caller must
1458 * specify an AF
1459 */
1460 if (w.w_op == NET_RT_FLAGS &&
1461 (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) {
1462 if (af != 0)
1463 error = lltable_sysctl_dumparp(af, w.w_req);
1464 else
1465 error = EINVAL;
1466 break;
1467 }
1468 /*
1469 * take care of routing entries
1470 */
1471 for (error = 0; error == 0 && i <= lim; i++)
1424 int i, lim, error = EINVAL;
1425 u_char af;
1426 struct walkarg w;
1427
1428 name ++;
1429 namelen--;
1430 if (req->newptr)
1431 return (EPERM);
1432 if (namelen != 3)
1433 return ((namelen < 3) ? EISDIR : ENOTDIR);
1434 af = name[0];
1435 if (af > AF_MAX)
1436 return (EINVAL);
1437 bzero(&w, sizeof(w));
1438 w.w_op = name[1];
1439 w.w_arg = name[2];
1440 w.w_req = req;
1441
1442 error = sysctl_wire_old_buffer(req, 0);
1443 if (error)
1444 return (error);
1445 switch (w.w_op) {
1446
1447 case NET_RT_DUMP:
1448 case NET_RT_FLAGS:
1449 if (af == 0) { /* dump all tables */
1450 i = 1;
1451 lim = AF_MAX;
1452 } else /* dump only one table */
1453 i = lim = af;
1454
1455 /*
1456 * take care of llinfo entries, the caller must
1457 * specify an AF
1458 */
1459 if (w.w_op == NET_RT_FLAGS &&
1460 (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) {
1461 if (af != 0)
1462 error = lltable_sysctl_dumparp(af, w.w_req);
1463 else
1464 error = EINVAL;
1465 break;
1466 }
1467 /*
1468 * take care of routing entries
1469 */
1470 for (error = 0; error == 0 && i <= lim; i++)
1472 if ((rnh = V_rt_tables[req->td->td_proc->p_fibnum][i]) != NULL) {
1471 rnh = rt_tables_get_rnh(req->td->td_proc->p_fibnum, i);
1472 if (rnh != NULL) {
1473 RADIX_NODE_HEAD_LOCK(rnh);
1474 error = rnh->rnh_walktree(rnh,
1475 sysctl_dumpentry, &w);
1476 RADIX_NODE_HEAD_UNLOCK(rnh);
1477 } else if (af != 0)
1478 error = EAFNOSUPPORT;
1479 break;
1480
1481 case NET_RT_IFLIST:
1482 error = sysctl_iflist(af, &w);
1483 break;
1484
1485 case NET_RT_IFMALIST:
1486 error = sysctl_ifmalist(af, &w);
1487 break;
1488 }
1489 if (w.w_tmem)
1490 free(w.w_tmem, M_RTABLE);
1491 return (error);
1492}
1493
1494SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1495
1496/*
1497 * Definitions of protocols supported in the ROUTE domain.
1498 */
1499
1500static struct domain routedomain; /* or at least forward */
1501
1502static struct protosw routesw[] = {
1503{
1504 .pr_type = SOCK_RAW,
1505 .pr_domain = &routedomain,
1506 .pr_flags = PR_ATOMIC|PR_ADDR,
1507 .pr_output = route_output,
1508 .pr_ctlinput = raw_ctlinput,
1509 .pr_init = raw_init,
1510 .pr_usrreqs = &route_usrreqs
1511}
1512};
1513
1514static struct domain routedomain = {
1515 .dom_family = PF_ROUTE,
1516 .dom_name = "route",
1517 .dom_protosw = routesw,
1518 .dom_protoswNPROTOSW = &routesw[sizeof(routesw)/sizeof(routesw[0])]
1519};
1520
1521DOMAIN_SET(route);
1473 RADIX_NODE_HEAD_LOCK(rnh);
1474 error = rnh->rnh_walktree(rnh,
1475 sysctl_dumpentry, &w);
1476 RADIX_NODE_HEAD_UNLOCK(rnh);
1477 } else if (af != 0)
1478 error = EAFNOSUPPORT;
1479 break;
1480
1481 case NET_RT_IFLIST:
1482 error = sysctl_iflist(af, &w);
1483 break;
1484
1485 case NET_RT_IFMALIST:
1486 error = sysctl_ifmalist(af, &w);
1487 break;
1488 }
1489 if (w.w_tmem)
1490 free(w.w_tmem, M_RTABLE);
1491 return (error);
1492}
1493
1494SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1495
1496/*
1497 * Definitions of protocols supported in the ROUTE domain.
1498 */
1499
1500static struct domain routedomain; /* or at least forward */
1501
1502static struct protosw routesw[] = {
1503{
1504 .pr_type = SOCK_RAW,
1505 .pr_domain = &routedomain,
1506 .pr_flags = PR_ATOMIC|PR_ADDR,
1507 .pr_output = route_output,
1508 .pr_ctlinput = raw_ctlinput,
1509 .pr_init = raw_init,
1510 .pr_usrreqs = &route_usrreqs
1511}
1512};
1513
1514static struct domain routedomain = {
1515 .dom_family = PF_ROUTE,
1516 .dom_name = "route",
1517 .dom_protosw = routesw,
1518 .dom_protoswNPROTOSW = &routesw[sizeof(routesw)/sizeof(routesw[0])]
1519};
1520
1521DOMAIN_SET(route);