Deleted Added
full compact
route.c (132780) route.c (133513)
1/*
2 * Copyright (c) 1980, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95
1/*
2 * Copyright (c) 1980, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95
30 * $FreeBSD: head/sys/net/route.c 132780 2004-07-28 06:59:55Z kan $
30 * $FreeBSD: head/sys/net/route.c 133513 2004-08-11 17:26:56Z andre $
31 */
32
33#include "opt_inet.h"
34#include "opt_mrouting.h"
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/malloc.h>
39#include <sys/mbuf.h>
40#include <sys/socket.h>
41#include <sys/domain.h>
42#include <sys/kernel.h>
43
44#include <net/if.h>
45#include <net/route.h>
46
47#include <netinet/in.h>
48#include <netinet/ip_mroute.h>
49
31 */
32
33#include "opt_inet.h"
34#include "opt_mrouting.h"
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/malloc.h>
39#include <sys/mbuf.h>
40#include <sys/socket.h>
41#include <sys/domain.h>
42#include <sys/kernel.h>
43
44#include <net/if.h>
45#include <net/route.h>
46
47#include <netinet/in.h>
48#include <netinet/ip_mroute.h>
49
50#include <vm/uma.h>
51
50static struct rtstat rtstat;
51struct radix_node_head *rt_tables[AF_MAX+1];
52
53static int rttrash; /* routes not in table but not freed */
54
55static void rt_maskedcopy(struct sockaddr *,
56 struct sockaddr *, struct sockaddr *);
57static void rtable_init(void **);
58
59/* compare two sockaddr structures */
60#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
61
62/*
63 * Convert a 'struct radix_node *' to a 'struct rtentry *'.
64 * The operation can be done safely (in this code) because a
65 * 'struct rtentry' starts with two 'struct radix_node''s, the first
66 * one representing leaf nodes in the routing tree, which is
67 * what the code in radix.c passes us as a 'struct radix_node'.
68 *
69 * But because there are a lot of assumptions in this conversion,
70 * do not cast explicitly, but always use the macro below.
71 */
72#define RNTORT(p) ((struct rtentry *)(p))
73
74static void
75rtable_init(void **table)
76{
77 struct domain *dom;
78 for (dom = domains; dom; dom = dom->dom_next)
79 if (dom->dom_rtattach)
80 dom->dom_rtattach(&table[dom->dom_family],
81 dom->dom_rtoffset);
82}
83
52static struct rtstat rtstat;
53struct radix_node_head *rt_tables[AF_MAX+1];
54
55static int rttrash; /* routes not in table but not freed */
56
57static void rt_maskedcopy(struct sockaddr *,
58 struct sockaddr *, struct sockaddr *);
59static void rtable_init(void **);
60
61/* compare two sockaddr structures */
62#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
63
64/*
65 * Convert a 'struct radix_node *' to a 'struct rtentry *'.
66 * The operation can be done safely (in this code) because a
67 * 'struct rtentry' starts with two 'struct radix_node''s, the first
68 * one representing leaf nodes in the routing tree, which is
69 * what the code in radix.c passes us as a 'struct radix_node'.
70 *
71 * But because there are a lot of assumptions in this conversion,
72 * do not cast explicitly, but always use the macro below.
73 */
74#define RNTORT(p) ((struct rtentry *)(p))
75
76static void
77rtable_init(void **table)
78{
79 struct domain *dom;
80 for (dom = domains; dom; dom = dom->dom_next)
81 if (dom->dom_rtattach)
82 dom->dom_rtattach(&table[dom->dom_family],
83 dom->dom_rtoffset);
84}
85
86static uma_zone_t rtzone; /* Routing table UMA zone. */
87
84static void
85route_init(void)
86{
88static void
89route_init(void)
90{
91 rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
92 NULL, NULL, UMA_ALIGN_PTR, 0);
87 rn_init(); /* initialize all zeroes, all ones, mask table */
88 rtable_init((void **)rt_tables);
89}
90
91/*
92 * Packet routing routines.
93 */
94void
95rtalloc(struct route *ro)
96{
97 rtalloc_ign(ro, 0UL);
98}
99
100void
101rtalloc_ign(struct route *ro, u_long ignore)
102{
103 struct rtentry *rt;
104
105 if ((rt = ro->ro_rt) != NULL) {
106 if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
107 return;
108 RTFREE(rt);
109 ro->ro_rt = NULL;
110 }
111 ro->ro_rt = rtalloc1(&ro->ro_dst, 1, ignore);
112 if (ro->ro_rt)
113 RT_UNLOCK(ro->ro_rt);
114}
115
116/*
117 * Look up the route that matches the address given
118 * Or, at least try.. Create a cloned route if needed.
119 *
120 * The returned route, if any, is locked.
121 */
122struct rtentry *
123rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
124{
125 struct radix_node_head *rnh = rt_tables[dst->sa_family];
126 struct rtentry *rt;
127 struct radix_node *rn;
128 struct rtentry *newrt;
129 struct rt_addrinfo info;
130 u_long nflags;
131 int err = 0, msgtype = RTM_MISS;
132
133 newrt = NULL;
134 bzero(&info, sizeof(info));
135 /*
136 * Look up the address in the table for that Address Family
137 */
138 if (rnh == NULL) {
139 rtstat.rts_unreach++;
140 goto miss2;
141 }
142 RADIX_NODE_HEAD_LOCK(rnh);
143 if ((rn = rnh->rnh_matchaddr(dst, rnh)) &&
144 (rn->rn_flags & RNF_ROOT) == 0) {
145 /*
146 * If we find it and it's not the root node, then
147 * get a refernce on the rtentry associated.
148 */
149 newrt = rt = RNTORT(rn);
150 nflags = rt->rt_flags & ~ignflags;
151 if (report && (nflags & RTF_CLONING)) {
152 /*
153 * We are apparently adding (report = 0 in delete).
154 * If it requires that it be cloned, do so.
155 * (This implies it wasn't a HOST route.)
156 */
157 err = rtrequest(RTM_RESOLVE, dst, NULL,
158 NULL, 0, &newrt);
159 if (err) {
160 /*
161 * If the cloning didn't succeed, maybe
162 * what we have will do. Return that.
163 */
164 newrt = rt; /* existing route */
165 RT_LOCK(newrt);
166 RT_ADDREF(newrt);
167 goto miss;
168 }
169 KASSERT(newrt, ("no route and no error"));
170 RT_LOCK(newrt);
171 if (newrt->rt_flags & RTF_XRESOLVE) {
172 /*
173 * If the new route specifies it be
174 * externally resolved, then go do that.
175 */
176 msgtype = RTM_RESOLVE;
177 goto miss;
178 }
179 /* Inform listeners of the new route. */
180 info.rti_info[RTAX_DST] = rt_key(newrt);
181 info.rti_info[RTAX_NETMASK] = rt_mask(newrt);
182 info.rti_info[RTAX_GATEWAY] = newrt->rt_gateway;
183 if (newrt->rt_ifp != NULL) {
184 info.rti_info[RTAX_IFP] =
185 ifaddr_byindex(newrt->rt_ifp->if_index)->ifa_addr;
186 info.rti_info[RTAX_IFA] = newrt->rt_ifa->ifa_addr;
187 }
188 rt_missmsg(RTM_ADD, &info, newrt->rt_flags, 0);
189 } else {
190 KASSERT(rt == newrt, ("locking wrong route"));
191 RT_LOCK(newrt);
192 RT_ADDREF(newrt);
193 }
194 RADIX_NODE_HEAD_UNLOCK(rnh);
195 } else {
196 /*
197 * Either we hit the root or couldn't find any match,
198 * Which basically means
199 * "caint get there frm here"
200 */
201 rtstat.rts_unreach++;
202 miss:
203 RADIX_NODE_HEAD_UNLOCK(rnh);
204 miss2: if (report) {
205 /*
206 * If required, report the failure to the supervising
207 * Authorities.
208 * For a delete, this is not an error. (report == 0)
209 */
210 info.rti_info[RTAX_DST] = dst;
211 rt_missmsg(msgtype, &info, 0, err);
212 }
213 }
214 if (newrt)
215 RT_LOCK_ASSERT(newrt);
216 return (newrt);
217}
218
219/*
220 * Remove a reference count from an rtentry.
221 * If the count gets low enough, take it out of the routing table
222 */
223void
224rtfree(struct rtentry *rt)
225{
226 struct radix_node_head *rnh;
227
228 /* XXX the NULL checks are probably useless */
229 if (rt == NULL)
230 panic("rtfree: NULL rt");
231 rnh = rt_tables[rt_key(rt)->sa_family];
232 if (rnh == NULL)
233 panic("rtfree: NULL rnh");
234
235 RT_LOCK_ASSERT(rt);
236
237 /*
238 * decrement the reference count by one and if it reaches 0,
239 * and there is a close function defined, call the close function
240 */
241 RT_REMREF(rt);
242 if (rt->rt_refcnt > 0)
243 goto done;
244
245 /*
246 * On last reference give the "close method" a chance
247 * to cleanup private state. This also permits (for
248 * IPv4 and IPv6) a chance to decide if the routing table
249 * entry should be purged immediately or at a later time.
250 * When an immediate purge is to happen the close routine
251 * typically calls rtexpunge which clears the RTF_UP flag
252 * on the entry so that the code below reclaims the storage.
253 */
254 if (rt->rt_refcnt == 0 && rnh->rnh_close)
255 rnh->rnh_close((struct radix_node *)rt, rnh);
256
257 /*
258 * If we are no longer "up" (and ref == 0)
259 * then we can free the resources associated
260 * with the route.
261 */
262 if ((rt->rt_flags & RTF_UP) == 0) {
263 if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
264 panic ("rtfree 2");
265 /*
266 * the rtentry must have been removed from the routing table
267 * so it is represented in rttrash.. remove that now.
268 */
269 rttrash--;
270#ifdef DIAGNOSTIC
271 if (rt->rt_refcnt < 0) {
272 printf("rtfree: %p not freed (neg refs)\n", rt);
273 goto done;
274 }
275#endif
276 /*
277 * release references on items we hold them on..
278 * e.g other routes and ifaddrs.
279 */
280 if (rt->rt_ifa)
281 IFAFREE(rt->rt_ifa);
282 rt->rt_parent = NULL; /* NB: no refcnt on parent */
283
284 /*
285 * The key is separatly alloc'd so free it (see rt_setgate()).
286 * This also frees the gateway, as they are always malloc'd
287 * together.
288 */
289 Free(rt_key(rt));
290
291 /*
292 * and the rtentry itself of course
293 */
294 RT_LOCK_DESTROY(rt);
93 rn_init(); /* initialize all zeroes, all ones, mask table */
94 rtable_init((void **)rt_tables);
95}
96
97/*
98 * Packet routing routines.
99 */
100void
101rtalloc(struct route *ro)
102{
103 rtalloc_ign(ro, 0UL);
104}
105
106void
107rtalloc_ign(struct route *ro, u_long ignore)
108{
109 struct rtentry *rt;
110
111 if ((rt = ro->ro_rt) != NULL) {
112 if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
113 return;
114 RTFREE(rt);
115 ro->ro_rt = NULL;
116 }
117 ro->ro_rt = rtalloc1(&ro->ro_dst, 1, ignore);
118 if (ro->ro_rt)
119 RT_UNLOCK(ro->ro_rt);
120}
121
122/*
123 * Look up the route that matches the address given
124 * Or, at least try.. Create a cloned route if needed.
125 *
126 * The returned route, if any, is locked.
127 */
128struct rtentry *
129rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
130{
131 struct radix_node_head *rnh = rt_tables[dst->sa_family];
132 struct rtentry *rt;
133 struct radix_node *rn;
134 struct rtentry *newrt;
135 struct rt_addrinfo info;
136 u_long nflags;
137 int err = 0, msgtype = RTM_MISS;
138
139 newrt = NULL;
140 bzero(&info, sizeof(info));
141 /*
142 * Look up the address in the table for that Address Family
143 */
144 if (rnh == NULL) {
145 rtstat.rts_unreach++;
146 goto miss2;
147 }
148 RADIX_NODE_HEAD_LOCK(rnh);
149 if ((rn = rnh->rnh_matchaddr(dst, rnh)) &&
150 (rn->rn_flags & RNF_ROOT) == 0) {
151 /*
152 * If we find it and it's not the root node, then
153 * get a refernce on the rtentry associated.
154 */
155 newrt = rt = RNTORT(rn);
156 nflags = rt->rt_flags & ~ignflags;
157 if (report && (nflags & RTF_CLONING)) {
158 /*
159 * We are apparently adding (report = 0 in delete).
160 * If it requires that it be cloned, do so.
161 * (This implies it wasn't a HOST route.)
162 */
163 err = rtrequest(RTM_RESOLVE, dst, NULL,
164 NULL, 0, &newrt);
165 if (err) {
166 /*
167 * If the cloning didn't succeed, maybe
168 * what we have will do. Return that.
169 */
170 newrt = rt; /* existing route */
171 RT_LOCK(newrt);
172 RT_ADDREF(newrt);
173 goto miss;
174 }
175 KASSERT(newrt, ("no route and no error"));
176 RT_LOCK(newrt);
177 if (newrt->rt_flags & RTF_XRESOLVE) {
178 /*
179 * If the new route specifies it be
180 * externally resolved, then go do that.
181 */
182 msgtype = RTM_RESOLVE;
183 goto miss;
184 }
185 /* Inform listeners of the new route. */
186 info.rti_info[RTAX_DST] = rt_key(newrt);
187 info.rti_info[RTAX_NETMASK] = rt_mask(newrt);
188 info.rti_info[RTAX_GATEWAY] = newrt->rt_gateway;
189 if (newrt->rt_ifp != NULL) {
190 info.rti_info[RTAX_IFP] =
191 ifaddr_byindex(newrt->rt_ifp->if_index)->ifa_addr;
192 info.rti_info[RTAX_IFA] = newrt->rt_ifa->ifa_addr;
193 }
194 rt_missmsg(RTM_ADD, &info, newrt->rt_flags, 0);
195 } else {
196 KASSERT(rt == newrt, ("locking wrong route"));
197 RT_LOCK(newrt);
198 RT_ADDREF(newrt);
199 }
200 RADIX_NODE_HEAD_UNLOCK(rnh);
201 } else {
202 /*
203 * Either we hit the root or couldn't find any match,
204 * Which basically means
205 * "caint get there frm here"
206 */
207 rtstat.rts_unreach++;
208 miss:
209 RADIX_NODE_HEAD_UNLOCK(rnh);
210 miss2: if (report) {
211 /*
212 * If required, report the failure to the supervising
213 * Authorities.
214 * For a delete, this is not an error. (report == 0)
215 */
216 info.rti_info[RTAX_DST] = dst;
217 rt_missmsg(msgtype, &info, 0, err);
218 }
219 }
220 if (newrt)
221 RT_LOCK_ASSERT(newrt);
222 return (newrt);
223}
224
225/*
226 * Remove a reference count from an rtentry.
227 * If the count gets low enough, take it out of the routing table
228 */
229void
230rtfree(struct rtentry *rt)
231{
232 struct radix_node_head *rnh;
233
234 /* XXX the NULL checks are probably useless */
235 if (rt == NULL)
236 panic("rtfree: NULL rt");
237 rnh = rt_tables[rt_key(rt)->sa_family];
238 if (rnh == NULL)
239 panic("rtfree: NULL rnh");
240
241 RT_LOCK_ASSERT(rt);
242
243 /*
244 * decrement the reference count by one and if it reaches 0,
245 * and there is a close function defined, call the close function
246 */
247 RT_REMREF(rt);
248 if (rt->rt_refcnt > 0)
249 goto done;
250
251 /*
252 * On last reference give the "close method" a chance
253 * to cleanup private state. This also permits (for
254 * IPv4 and IPv6) a chance to decide if the routing table
255 * entry should be purged immediately or at a later time.
256 * When an immediate purge is to happen the close routine
257 * typically calls rtexpunge which clears the RTF_UP flag
258 * on the entry so that the code below reclaims the storage.
259 */
260 if (rt->rt_refcnt == 0 && rnh->rnh_close)
261 rnh->rnh_close((struct radix_node *)rt, rnh);
262
263 /*
264 * If we are no longer "up" (and ref == 0)
265 * then we can free the resources associated
266 * with the route.
267 */
268 if ((rt->rt_flags & RTF_UP) == 0) {
269 if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
270 panic ("rtfree 2");
271 /*
272 * the rtentry must have been removed from the routing table
273 * so it is represented in rttrash.. remove that now.
274 */
275 rttrash--;
276#ifdef DIAGNOSTIC
277 if (rt->rt_refcnt < 0) {
278 printf("rtfree: %p not freed (neg refs)\n", rt);
279 goto done;
280 }
281#endif
282 /*
283 * release references on items we hold them on..
284 * e.g other routes and ifaddrs.
285 */
286 if (rt->rt_ifa)
287 IFAFREE(rt->rt_ifa);
288 rt->rt_parent = NULL; /* NB: no refcnt on parent */
289
290 /*
291 * The key is separatly alloc'd so free it (see rt_setgate()).
292 * This also frees the gateway, as they are always malloc'd
293 * together.
294 */
295 Free(rt_key(rt));
296
297 /*
298 * and the rtentry itself of course
299 */
300 RT_LOCK_DESTROY(rt);
295 Free(rt);
301 uma_zfree(rtzone, rt);
296 return;
297 }
298done:
299 RT_UNLOCK(rt);
300}
301
302
303/*
304 * Force a routing table entry to the specified
305 * destination to go through the given gateway.
306 * Normally called as a result of a routing redirect
307 * message from the network layer.
308 */
309void
310rtredirect(struct sockaddr *dst,
311 struct sockaddr *gateway,
312 struct sockaddr *netmask,
313 int flags,
314 struct sockaddr *src)
315{
316 struct rtentry *rt;
317 int error = 0;
318 short *stat = NULL;
319 struct rt_addrinfo info;
320 struct ifaddr *ifa;
321
322 /* verify the gateway is directly reachable */
323 if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
324 error = ENETUNREACH;
325 goto out;
326 }
327 rt = rtalloc1(dst, 0, 0UL); /* NB: rt is locked */
328 /*
329 * If the redirect isn't from our current router for this dst,
330 * it's either old or wrong. If it redirects us to ourselves,
331 * we have a routing loop, perhaps as a result of an interface
332 * going down recently.
333 */
334 if (!(flags & RTF_DONE) && rt &&
335 (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
336 error = EINVAL;
337 else if (ifa_ifwithaddr(gateway))
338 error = EHOSTUNREACH;
339 if (error)
340 goto done;
341 /*
342 * Create a new entry if we just got back a wildcard entry
343 * or the the lookup failed. This is necessary for hosts
344 * which use routing redirects generated by smart gateways
345 * to dynamically build the routing tables.
346 */
347 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
348 goto create;
349 /*
350 * Don't listen to the redirect if it's
351 * for a route to an interface.
352 */
353 if (rt->rt_flags & RTF_GATEWAY) {
354 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
355 /*
356 * Changing from route to net => route to host.
357 * Create new route, rather than smashing route to net.
358 */
359 create:
360 if (rt)
361 rtfree(rt);
362 flags |= RTF_GATEWAY | RTF_DYNAMIC;
363 bzero((caddr_t)&info, sizeof(info));
364 info.rti_info[RTAX_DST] = dst;
365 info.rti_info[RTAX_GATEWAY] = gateway;
366 info.rti_info[RTAX_NETMASK] = netmask;
367 info.rti_ifa = ifa;
368 info.rti_flags = flags;
369 rt = NULL;
370 error = rtrequest1(RTM_ADD, &info, &rt);
371 if (rt != NULL) {
372 RT_LOCK(rt);
373 flags = rt->rt_flags;
374 }
375 stat = &rtstat.rts_dynamic;
376 } else {
377 /*
378 * Smash the current notion of the gateway to
379 * this destination. Should check about netmask!!!
380 */
381 rt->rt_flags |= RTF_MODIFIED;
382 flags |= RTF_MODIFIED;
383 stat = &rtstat.rts_newgateway;
384 /*
385 * add the key and gateway (in one malloc'd chunk).
386 */
387 rt_setgate(rt, rt_key(rt), gateway);
388 }
389 } else
390 error = EHOSTUNREACH;
391done:
392 if (rt)
393 rtfree(rt);
394out:
395 if (error)
396 rtstat.rts_badredirect++;
397 else if (stat != NULL)
398 (*stat)++;
399 bzero((caddr_t)&info, sizeof(info));
400 info.rti_info[RTAX_DST] = dst;
401 info.rti_info[RTAX_GATEWAY] = gateway;
402 info.rti_info[RTAX_NETMASK] = netmask;
403 info.rti_info[RTAX_AUTHOR] = src;
404 rt_missmsg(RTM_REDIRECT, &info, flags, error);
405}
406
407/*
408 * Routing table ioctl interface.
409 */
410int
411rtioctl(u_long req, caddr_t data)
412{
413#ifdef INET
414 /* Multicast goop, grrr... */
415 return mrt_ioctl ? mrt_ioctl(req, data) : EOPNOTSUPP;
416#else /* INET */
417 return ENXIO;
418#endif /* INET */
419}
420
421struct ifaddr *
422ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
423{
424 register struct ifaddr *ifa;
425
426 if ((flags & RTF_GATEWAY) == 0) {
427 /*
428 * If we are adding a route to an interface,
429 * and the interface is a pt to pt link
430 * we should search for the destination
431 * as our clue to the interface. Otherwise
432 * we can use the local address.
433 */
434 ifa = NULL;
435 if (flags & RTF_HOST)
436 ifa = ifa_ifwithdstaddr(dst);
437 if (ifa == NULL)
438 ifa = ifa_ifwithaddr(gateway);
439 } else {
440 /*
441 * If we are adding a route to a remote net
442 * or host, the gateway may still be on the
443 * other end of a pt to pt link.
444 */
445 ifa = ifa_ifwithdstaddr(gateway);
446 }
447 if (ifa == NULL)
448 ifa = ifa_ifwithnet(gateway);
449 if (ifa == NULL) {
450 struct rtentry *rt = rtalloc1(gateway, 0, 0UL);
451 if (rt == NULL)
452 return (NULL);
453 RT_REMREF(rt);
454 RT_UNLOCK(rt);
455 if ((ifa = rt->rt_ifa) == NULL)
456 return (NULL);
457 }
458 if (ifa->ifa_addr->sa_family != dst->sa_family) {
459 struct ifaddr *oifa = ifa;
460 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
461 if (ifa == NULL)
462 ifa = oifa;
463 }
464 return (ifa);
465}
466
467static walktree_f_t rt_fixdelete;
468static walktree_f_t rt_fixchange;
469
470struct rtfc_arg {
471 struct rtentry *rt0;
472 struct radix_node_head *rnh;
473};
474
475/*
476 * Do appropriate manipulations of a routing tree given
477 * all the bits of info needed
478 */
479int
480rtrequest(int req,
481 struct sockaddr *dst,
482 struct sockaddr *gateway,
483 struct sockaddr *netmask,
484 int flags,
485 struct rtentry **ret_nrt)
486{
487 struct rt_addrinfo info;
488
489 bzero((caddr_t)&info, sizeof(info));
490 info.rti_flags = flags;
491 info.rti_info[RTAX_DST] = dst;
492 info.rti_info[RTAX_GATEWAY] = gateway;
493 info.rti_info[RTAX_NETMASK] = netmask;
494 return rtrequest1(req, &info, ret_nrt);
495}
496
497/*
498 * These (questionable) definitions of apparent local variables apply
499 * to the next two functions. XXXXXX!!!
500 */
501#define dst info->rti_info[RTAX_DST]
502#define gateway info->rti_info[RTAX_GATEWAY]
503#define netmask info->rti_info[RTAX_NETMASK]
504#define ifaaddr info->rti_info[RTAX_IFA]
505#define ifpaddr info->rti_info[RTAX_IFP]
506#define flags info->rti_flags
507
508int
509rt_getifa(struct rt_addrinfo *info)
510{
511 struct ifaddr *ifa;
512 int error = 0;
513
514 /*
515 * ifp may be specified by sockaddr_dl
516 * when protocol address is ambiguous.
517 */
518 if (info->rti_ifp == NULL && ifpaddr != NULL &&
519 ifpaddr->sa_family == AF_LINK &&
520 (ifa = ifa_ifwithnet(ifpaddr)) != NULL)
521 info->rti_ifp = ifa->ifa_ifp;
522 if (info->rti_ifa == NULL && ifaaddr != NULL)
523 info->rti_ifa = ifa_ifwithaddr(ifaaddr);
524 if (info->rti_ifa == NULL) {
525 struct sockaddr *sa;
526
527 sa = ifaaddr != NULL ? ifaaddr :
528 (gateway != NULL ? gateway : dst);
529 if (sa != NULL && info->rti_ifp != NULL)
530 info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
531 else if (dst != NULL && gateway != NULL)
532 info->rti_ifa = ifa_ifwithroute(flags, dst, gateway);
533 else if (sa != NULL)
534 info->rti_ifa = ifa_ifwithroute(flags, sa, sa);
535 }
536 if ((ifa = info->rti_ifa) != NULL) {
537 if (info->rti_ifp == NULL)
538 info->rti_ifp = ifa->ifa_ifp;
539 } else
540 error = ENETUNREACH;
541 return (error);
542}
543
544/*
545 * Expunges references to a route that's about to be reclaimed.
546 * The route must be locked.
547 */
548int
549rtexpunge(struct rtentry *rt)
550{
551 struct radix_node *rn;
552 struct radix_node_head *rnh;
553 struct ifaddr *ifa;
554 int error = 0;
555
556 RT_LOCK_ASSERT(rt);
557#if 0
558 /*
559 * We cannot assume anything about the reference count
560 * because protocols call us in many situations; often
561 * before unwinding references to the table entry.
562 */
563 KASSERT(rt->rt_refcnt <= 1, ("bogus refcnt %ld", rt->rt_refcnt));
564#endif
565 /*
566 * Find the correct routing tree to use for this Address Family
567 */
568 rnh = rt_tables[rt_key(rt)->sa_family];
569 if (rnh == NULL)
570 return (EAFNOSUPPORT);
571
572 RADIX_NODE_HEAD_LOCK(rnh);
573
574 /*
575 * Remove the item from the tree; it should be there,
576 * but when callers invoke us blindly it may not (sigh).
577 */
578 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
579 if (rn == NULL) {
580 error = ESRCH;
581 goto bad;
582 }
583 KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0,
584 ("unexpected flags 0x%x", rn->rn_flags));
585 KASSERT(rt == RNTORT(rn),
586 ("lookup mismatch, rt %p rn %p", rt, rn));
587
588 rt->rt_flags &= ~RTF_UP;
589
590 /*
591 * Now search what's left of the subtree for any cloned
592 * routes which might have been formed from this node.
593 */
594 if ((rt->rt_flags & RTF_CLONING) && rt_mask(rt))
595 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
596 rt_fixdelete, rt);
597
598 /*
599 * Remove any external references we may have.
600 * This might result in another rtentry being freed if
601 * we held its last reference.
602 */
603 if (rt->rt_gwroute) {
604 RTFREE(rt->rt_gwroute);
605 rt->rt_gwroute = NULL;
606 }
607
608 /*
609 * Give the protocol a chance to keep things in sync.
610 */
611 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) {
612 struct rt_addrinfo info;
613
614 bzero((caddr_t)&info, sizeof(info));
615 info.rti_flags = rt->rt_flags;
616 info.rti_info[RTAX_DST] = rt_key(rt);
617 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
618 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
619 ifa->ifa_rtrequest(RTM_DELETE, rt, &info);
620 }
621
622 /*
623 * one more rtentry floating around that is not
624 * linked to the routing table.
625 */
626 rttrash++;
627bad:
628 RADIX_NODE_HEAD_UNLOCK(rnh);
629 return (error);
630}
631
632int
633rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
634{
635 int error = 0;
636 register struct rtentry *rt;
637 register struct radix_node *rn;
638 register struct radix_node_head *rnh;
639 struct ifaddr *ifa;
640 struct sockaddr *ndst;
641#define senderr(x) { error = x ; goto bad; }
642
643 /*
644 * Find the correct routing tree to use for this Address Family
645 */
646 rnh = rt_tables[dst->sa_family];
647 if (rnh == NULL)
648 return (EAFNOSUPPORT);
649 RADIX_NODE_HEAD_LOCK(rnh);
650 /*
651 * If we are adding a host route then we don't want to put
652 * a netmask in the tree, nor do we want to clone it.
653 */
654 if (flags & RTF_HOST) {
655 netmask = NULL;
656 flags &= ~RTF_CLONING;
657 }
658 switch (req) {
659 case RTM_DELETE:
660 /*
661 * Remove the item from the tree and return it.
662 * Complain if it is not there and do no more processing.
663 */
664 rn = rnh->rnh_deladdr(dst, netmask, rnh);
665 if (rn == NULL)
666 senderr(ESRCH);
667 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
668 panic ("rtrequest delete");
669 rt = RNTORT(rn);
670 RT_LOCK(rt);
671 RT_ADDREF(rt);
672 rt->rt_flags &= ~RTF_UP;
673
674 /*
675 * Now search what's left of the subtree for any cloned
676 * routes which might have been formed from this node.
677 */
678 if ((rt->rt_flags & RTF_CLONING) &&
679 rt_mask(rt)) {
680 rnh->rnh_walktree_from(rnh, dst, rt_mask(rt),
681 rt_fixdelete, rt);
682 }
683
684 /*
685 * Remove any external references we may have.
686 * This might result in another rtentry being freed if
687 * we held its last reference.
688 */
689 if (rt->rt_gwroute) {
690 RTFREE(rt->rt_gwroute);
691 rt->rt_gwroute = NULL;
692 }
693
694 /*
695 * give the protocol a chance to keep things in sync.
696 */
697 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
698 ifa->ifa_rtrequest(RTM_DELETE, rt, info);
699
700 /*
701 * One more rtentry floating around that is not
702 * linked to the routing table. rttrash will be decremented
703 * when RTFREE(rt) is eventually called.
704 */
705 rttrash++;
706
707 /*
708 * If the caller wants it, then it can have it,
709 * but it's up to it to free the rtentry as we won't be
710 * doing it.
711 */
712 if (ret_nrt) {
713 *ret_nrt = rt;
714 RT_UNLOCK(rt);
715 } else
716 RTFREE_LOCKED(rt);
717 break;
718
719 case RTM_RESOLVE:
720 if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
721 senderr(EINVAL);
722 ifa = rt->rt_ifa;
723 /* XXX locking? */
724 flags = rt->rt_flags &
725 ~(RTF_CLONING | RTF_STATIC);
726 flags |= RTF_WASCLONED;
727 gateway = rt->rt_gateway;
728 if ((netmask = rt->rt_genmask) == NULL)
729 flags |= RTF_HOST;
730 goto makeroute;
731
732 case RTM_ADD:
733 if ((flags & RTF_GATEWAY) && !gateway)
734 panic("rtrequest: GATEWAY but no gateway");
735
736 if (info->rti_ifa == NULL && (error = rt_getifa(info)))
737 senderr(error);
738 ifa = info->rti_ifa;
739
740 makeroute:
302 return;
303 }
304done:
305 RT_UNLOCK(rt);
306}
307
308
309/*
310 * Force a routing table entry to the specified
311 * destination to go through the given gateway.
312 * Normally called as a result of a routing redirect
313 * message from the network layer.
314 */
315void
316rtredirect(struct sockaddr *dst,
317 struct sockaddr *gateway,
318 struct sockaddr *netmask,
319 int flags,
320 struct sockaddr *src)
321{
322 struct rtentry *rt;
323 int error = 0;
324 short *stat = NULL;
325 struct rt_addrinfo info;
326 struct ifaddr *ifa;
327
328 /* verify the gateway is directly reachable */
329 if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
330 error = ENETUNREACH;
331 goto out;
332 }
333 rt = rtalloc1(dst, 0, 0UL); /* NB: rt is locked */
334 /*
335 * If the redirect isn't from our current router for this dst,
336 * it's either old or wrong. If it redirects us to ourselves,
337 * we have a routing loop, perhaps as a result of an interface
338 * going down recently.
339 */
340 if (!(flags & RTF_DONE) && rt &&
341 (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
342 error = EINVAL;
343 else if (ifa_ifwithaddr(gateway))
344 error = EHOSTUNREACH;
345 if (error)
346 goto done;
347 /*
348 * Create a new entry if we just got back a wildcard entry
349 * or the the lookup failed. This is necessary for hosts
350 * which use routing redirects generated by smart gateways
351 * to dynamically build the routing tables.
352 */
353 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
354 goto create;
355 /*
356 * Don't listen to the redirect if it's
357 * for a route to an interface.
358 */
359 if (rt->rt_flags & RTF_GATEWAY) {
360 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
361 /*
362 * Changing from route to net => route to host.
363 * Create new route, rather than smashing route to net.
364 */
365 create:
366 if (rt)
367 rtfree(rt);
368 flags |= RTF_GATEWAY | RTF_DYNAMIC;
369 bzero((caddr_t)&info, sizeof(info));
370 info.rti_info[RTAX_DST] = dst;
371 info.rti_info[RTAX_GATEWAY] = gateway;
372 info.rti_info[RTAX_NETMASK] = netmask;
373 info.rti_ifa = ifa;
374 info.rti_flags = flags;
375 rt = NULL;
376 error = rtrequest1(RTM_ADD, &info, &rt);
377 if (rt != NULL) {
378 RT_LOCK(rt);
379 flags = rt->rt_flags;
380 }
381 stat = &rtstat.rts_dynamic;
382 } else {
383 /*
384 * Smash the current notion of the gateway to
385 * this destination. Should check about netmask!!!
386 */
387 rt->rt_flags |= RTF_MODIFIED;
388 flags |= RTF_MODIFIED;
389 stat = &rtstat.rts_newgateway;
390 /*
391 * add the key and gateway (in one malloc'd chunk).
392 */
393 rt_setgate(rt, rt_key(rt), gateway);
394 }
395 } else
396 error = EHOSTUNREACH;
397done:
398 if (rt)
399 rtfree(rt);
400out:
401 if (error)
402 rtstat.rts_badredirect++;
403 else if (stat != NULL)
404 (*stat)++;
405 bzero((caddr_t)&info, sizeof(info));
406 info.rti_info[RTAX_DST] = dst;
407 info.rti_info[RTAX_GATEWAY] = gateway;
408 info.rti_info[RTAX_NETMASK] = netmask;
409 info.rti_info[RTAX_AUTHOR] = src;
410 rt_missmsg(RTM_REDIRECT, &info, flags, error);
411}
412
413/*
414 * Routing table ioctl interface.
415 */
416int
417rtioctl(u_long req, caddr_t data)
418{
419#ifdef INET
420 /* Multicast goop, grrr... */
421 return mrt_ioctl ? mrt_ioctl(req, data) : EOPNOTSUPP;
422#else /* INET */
423 return ENXIO;
424#endif /* INET */
425}
426
427struct ifaddr *
428ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
429{
430 register struct ifaddr *ifa;
431
432 if ((flags & RTF_GATEWAY) == 0) {
433 /*
434 * If we are adding a route to an interface,
435 * and the interface is a pt to pt link
436 * we should search for the destination
437 * as our clue to the interface. Otherwise
438 * we can use the local address.
439 */
440 ifa = NULL;
441 if (flags & RTF_HOST)
442 ifa = ifa_ifwithdstaddr(dst);
443 if (ifa == NULL)
444 ifa = ifa_ifwithaddr(gateway);
445 } else {
446 /*
447 * If we are adding a route to a remote net
448 * or host, the gateway may still be on the
449 * other end of a pt to pt link.
450 */
451 ifa = ifa_ifwithdstaddr(gateway);
452 }
453 if (ifa == NULL)
454 ifa = ifa_ifwithnet(gateway);
455 if (ifa == NULL) {
456 struct rtentry *rt = rtalloc1(gateway, 0, 0UL);
457 if (rt == NULL)
458 return (NULL);
459 RT_REMREF(rt);
460 RT_UNLOCK(rt);
461 if ((ifa = rt->rt_ifa) == NULL)
462 return (NULL);
463 }
464 if (ifa->ifa_addr->sa_family != dst->sa_family) {
465 struct ifaddr *oifa = ifa;
466 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
467 if (ifa == NULL)
468 ifa = oifa;
469 }
470 return (ifa);
471}
472
473static walktree_f_t rt_fixdelete;
474static walktree_f_t rt_fixchange;
475
476struct rtfc_arg {
477 struct rtentry *rt0;
478 struct radix_node_head *rnh;
479};
480
481/*
482 * Do appropriate manipulations of a routing tree given
483 * all the bits of info needed
484 */
485int
486rtrequest(int req,
487 struct sockaddr *dst,
488 struct sockaddr *gateway,
489 struct sockaddr *netmask,
490 int flags,
491 struct rtentry **ret_nrt)
492{
493 struct rt_addrinfo info;
494
495 bzero((caddr_t)&info, sizeof(info));
496 info.rti_flags = flags;
497 info.rti_info[RTAX_DST] = dst;
498 info.rti_info[RTAX_GATEWAY] = gateway;
499 info.rti_info[RTAX_NETMASK] = netmask;
500 return rtrequest1(req, &info, ret_nrt);
501}
502
503/*
504 * These (questionable) definitions of apparent local variables apply
505 * to the next two functions. XXXXXX!!!
506 */
507#define dst info->rti_info[RTAX_DST]
508#define gateway info->rti_info[RTAX_GATEWAY]
509#define netmask info->rti_info[RTAX_NETMASK]
510#define ifaaddr info->rti_info[RTAX_IFA]
511#define ifpaddr info->rti_info[RTAX_IFP]
512#define flags info->rti_flags
513
514int
515rt_getifa(struct rt_addrinfo *info)
516{
517 struct ifaddr *ifa;
518 int error = 0;
519
520 /*
521 * ifp may be specified by sockaddr_dl
522 * when protocol address is ambiguous.
523 */
524 if (info->rti_ifp == NULL && ifpaddr != NULL &&
525 ifpaddr->sa_family == AF_LINK &&
526 (ifa = ifa_ifwithnet(ifpaddr)) != NULL)
527 info->rti_ifp = ifa->ifa_ifp;
528 if (info->rti_ifa == NULL && ifaaddr != NULL)
529 info->rti_ifa = ifa_ifwithaddr(ifaaddr);
530 if (info->rti_ifa == NULL) {
531 struct sockaddr *sa;
532
533 sa = ifaaddr != NULL ? ifaaddr :
534 (gateway != NULL ? gateway : dst);
535 if (sa != NULL && info->rti_ifp != NULL)
536 info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
537 else if (dst != NULL && gateway != NULL)
538 info->rti_ifa = ifa_ifwithroute(flags, dst, gateway);
539 else if (sa != NULL)
540 info->rti_ifa = ifa_ifwithroute(flags, sa, sa);
541 }
542 if ((ifa = info->rti_ifa) != NULL) {
543 if (info->rti_ifp == NULL)
544 info->rti_ifp = ifa->ifa_ifp;
545 } else
546 error = ENETUNREACH;
547 return (error);
548}
549
550/*
551 * Expunges references to a route that's about to be reclaimed.
552 * The route must be locked.
553 */
554int
555rtexpunge(struct rtentry *rt)
556{
557 struct radix_node *rn;
558 struct radix_node_head *rnh;
559 struct ifaddr *ifa;
560 int error = 0;
561
562 RT_LOCK_ASSERT(rt);
563#if 0
564 /*
565 * We cannot assume anything about the reference count
566 * because protocols call us in many situations; often
567 * before unwinding references to the table entry.
568 */
569 KASSERT(rt->rt_refcnt <= 1, ("bogus refcnt %ld", rt->rt_refcnt));
570#endif
571 /*
572 * Find the correct routing tree to use for this Address Family
573 */
574 rnh = rt_tables[rt_key(rt)->sa_family];
575 if (rnh == NULL)
576 return (EAFNOSUPPORT);
577
578 RADIX_NODE_HEAD_LOCK(rnh);
579
580 /*
581 * Remove the item from the tree; it should be there,
582 * but when callers invoke us blindly it may not (sigh).
583 */
584 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
585 if (rn == NULL) {
586 error = ESRCH;
587 goto bad;
588 }
589 KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0,
590 ("unexpected flags 0x%x", rn->rn_flags));
591 KASSERT(rt == RNTORT(rn),
592 ("lookup mismatch, rt %p rn %p", rt, rn));
593
594 rt->rt_flags &= ~RTF_UP;
595
596 /*
597 * Now search what's left of the subtree for any cloned
598 * routes which might have been formed from this node.
599 */
600 if ((rt->rt_flags & RTF_CLONING) && rt_mask(rt))
601 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
602 rt_fixdelete, rt);
603
604 /*
605 * Remove any external references we may have.
606 * This might result in another rtentry being freed if
607 * we held its last reference.
608 */
609 if (rt->rt_gwroute) {
610 RTFREE(rt->rt_gwroute);
611 rt->rt_gwroute = NULL;
612 }
613
614 /*
615 * Give the protocol a chance to keep things in sync.
616 */
617 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) {
618 struct rt_addrinfo info;
619
620 bzero((caddr_t)&info, sizeof(info));
621 info.rti_flags = rt->rt_flags;
622 info.rti_info[RTAX_DST] = rt_key(rt);
623 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
624 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
625 ifa->ifa_rtrequest(RTM_DELETE, rt, &info);
626 }
627
628 /*
629 * one more rtentry floating around that is not
630 * linked to the routing table.
631 */
632 rttrash++;
633bad:
634 RADIX_NODE_HEAD_UNLOCK(rnh);
635 return (error);
636}
637
638int
639rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
640{
641 int error = 0;
642 register struct rtentry *rt;
643 register struct radix_node *rn;
644 register struct radix_node_head *rnh;
645 struct ifaddr *ifa;
646 struct sockaddr *ndst;
647#define senderr(x) { error = x ; goto bad; }
648
649 /*
650 * Find the correct routing tree to use for this Address Family
651 */
652 rnh = rt_tables[dst->sa_family];
653 if (rnh == NULL)
654 return (EAFNOSUPPORT);
655 RADIX_NODE_HEAD_LOCK(rnh);
656 /*
657 * If we are adding a host route then we don't want to put
658 * a netmask in the tree, nor do we want to clone it.
659 */
660 if (flags & RTF_HOST) {
661 netmask = NULL;
662 flags &= ~RTF_CLONING;
663 }
664 switch (req) {
665 case RTM_DELETE:
666 /*
667 * Remove the item from the tree and return it.
668 * Complain if it is not there and do no more processing.
669 */
670 rn = rnh->rnh_deladdr(dst, netmask, rnh);
671 if (rn == NULL)
672 senderr(ESRCH);
673 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
674 panic ("rtrequest delete");
675 rt = RNTORT(rn);
676 RT_LOCK(rt);
677 RT_ADDREF(rt);
678 rt->rt_flags &= ~RTF_UP;
679
680 /*
681 * Now search what's left of the subtree for any cloned
682 * routes which might have been formed from this node.
683 */
684 if ((rt->rt_flags & RTF_CLONING) &&
685 rt_mask(rt)) {
686 rnh->rnh_walktree_from(rnh, dst, rt_mask(rt),
687 rt_fixdelete, rt);
688 }
689
690 /*
691 * Remove any external references we may have.
692 * This might result in another rtentry being freed if
693 * we held its last reference.
694 */
695 if (rt->rt_gwroute) {
696 RTFREE(rt->rt_gwroute);
697 rt->rt_gwroute = NULL;
698 }
699
700 /*
701 * give the protocol a chance to keep things in sync.
702 */
703 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
704 ifa->ifa_rtrequest(RTM_DELETE, rt, info);
705
706 /*
707 * One more rtentry floating around that is not
708 * linked to the routing table. rttrash will be decremented
709 * when RTFREE(rt) is eventually called.
710 */
711 rttrash++;
712
713 /*
714 * If the caller wants it, then it can have it,
715 * but it's up to it to free the rtentry as we won't be
716 * doing it.
717 */
718 if (ret_nrt) {
719 *ret_nrt = rt;
720 RT_UNLOCK(rt);
721 } else
722 RTFREE_LOCKED(rt);
723 break;
724
725 case RTM_RESOLVE:
726 if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
727 senderr(EINVAL);
728 ifa = rt->rt_ifa;
729 /* XXX locking? */
730 flags = rt->rt_flags &
731 ~(RTF_CLONING | RTF_STATIC);
732 flags |= RTF_WASCLONED;
733 gateway = rt->rt_gateway;
734 if ((netmask = rt->rt_genmask) == NULL)
735 flags |= RTF_HOST;
736 goto makeroute;
737
738 case RTM_ADD:
739 if ((flags & RTF_GATEWAY) && !gateway)
740 panic("rtrequest: GATEWAY but no gateway");
741
742 if (info->rti_ifa == NULL && (error = rt_getifa(info)))
743 senderr(error);
744 ifa = info->rti_ifa;
745
746 makeroute:
741 R_Zalloc(rt, struct rtentry *, sizeof(*rt));
747 rt = uma_zalloc(rtzone, M_NOWAIT | M_ZERO);
742 if (rt == NULL)
743 senderr(ENOBUFS);
744 RT_LOCK_INIT(rt);
745 rt->rt_flags = RTF_UP | flags;
746 /*
747 * Add the gateway. Possibly re-malloc-ing the storage for it
748 * also add the rt_gwroute if possible.
749 */
750 RT_LOCK(rt);
751 if ((error = rt_setgate(rt, dst, gateway)) != 0) {
752 RT_LOCK_DESTROY(rt);
748 if (rt == NULL)
749 senderr(ENOBUFS);
750 RT_LOCK_INIT(rt);
751 rt->rt_flags = RTF_UP | flags;
752 /*
753 * Add the gateway. Possibly re-malloc-ing the storage for it
754 * also add the rt_gwroute if possible.
755 */
756 RT_LOCK(rt);
757 if ((error = rt_setgate(rt, dst, gateway)) != 0) {
758 RT_LOCK_DESTROY(rt);
753 Free(rt);
759 uma_zfree(rtzone, rt);
754 senderr(error);
755 }
756
757 /*
758 * point to the (possibly newly malloc'd) dest address.
759 */
760 ndst = (struct sockaddr *)rt_key(rt);
761
762 /*
763 * make sure it contains the value we want (masked if needed).
764 */
765 if (netmask) {
766 rt_maskedcopy(dst, ndst, netmask);
767 } else
768 bcopy(dst, ndst, dst->sa_len);
769
770 /*
771 * Note that we now have a reference to the ifa.
772 * This moved from below so that rnh->rnh_addaddr() can
773 * examine the ifa and ifa->ifa_ifp if it so desires.
774 */
775 IFAREF(ifa);
776 rt->rt_ifa = ifa;
777 rt->rt_ifp = ifa->ifa_ifp;
778
779 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
780 rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
781 if (rn == NULL) {
782 struct rtentry *rt2;
783 /*
784 * Uh-oh, we already have one of these in the tree.
785 * We do a special hack: if the route that's already
786 * there was generated by the cloning mechanism
787 * then we just blow it away and retry the insertion
788 * of the new one.
789 */
790 rt2 = rtalloc1(dst, 0, 0);
791 if (rt2 && rt2->rt_parent) {
792 rtexpunge(rt2);
793 RT_UNLOCK(rt2);
794 rn = rnh->rnh_addaddr(ndst, netmask,
795 rnh, rt->rt_nodes);
796 } else if (rt2) {
797 /* undo the extra ref we got */
798 RTFREE_LOCKED(rt2);
799 }
800 }
801
802 /*
803 * If it still failed to go into the tree,
804 * then un-make it (this should be a function)
805 */
806 if (rn == NULL) {
807 if (rt->rt_gwroute)
808 RTFREE(rt->rt_gwroute);
809 if (rt->rt_ifa)
810 IFAFREE(rt->rt_ifa);
811 Free(rt_key(rt));
812 RT_LOCK_DESTROY(rt);
760 senderr(error);
761 }
762
763 /*
764 * point to the (possibly newly malloc'd) dest address.
765 */
766 ndst = (struct sockaddr *)rt_key(rt);
767
768 /*
769 * make sure it contains the value we want (masked if needed).
770 */
771 if (netmask) {
772 rt_maskedcopy(dst, ndst, netmask);
773 } else
774 bcopy(dst, ndst, dst->sa_len);
775
776 /*
777 * Note that we now have a reference to the ifa.
778 * This moved from below so that rnh->rnh_addaddr() can
779 * examine the ifa and ifa->ifa_ifp if it so desires.
780 */
781 IFAREF(ifa);
782 rt->rt_ifa = ifa;
783 rt->rt_ifp = ifa->ifa_ifp;
784
785 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
786 rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
787 if (rn == NULL) {
788 struct rtentry *rt2;
789 /*
790 * Uh-oh, we already have one of these in the tree.
791 * We do a special hack: if the route that's already
792 * there was generated by the cloning mechanism
793 * then we just blow it away and retry the insertion
794 * of the new one.
795 */
796 rt2 = rtalloc1(dst, 0, 0);
797 if (rt2 && rt2->rt_parent) {
798 rtexpunge(rt2);
799 RT_UNLOCK(rt2);
800 rn = rnh->rnh_addaddr(ndst, netmask,
801 rnh, rt->rt_nodes);
802 } else if (rt2) {
803 /* undo the extra ref we got */
804 RTFREE_LOCKED(rt2);
805 }
806 }
807
808 /*
809 * If it still failed to go into the tree,
810 * then un-make it (this should be a function)
811 */
812 if (rn == NULL) {
813 if (rt->rt_gwroute)
814 RTFREE(rt->rt_gwroute);
815 if (rt->rt_ifa)
816 IFAFREE(rt->rt_ifa);
817 Free(rt_key(rt));
818 RT_LOCK_DESTROY(rt);
813 Free(rt);
819 uma_zfree(rtzone, rt);
814 senderr(EEXIST);
815 }
816
817 rt->rt_parent = NULL;
818
819 /*
820 * If we got here from RESOLVE, then we are cloning
821 * so clone the rest, and note that we
822 * are a clone (and increment the parent's references)
823 */
824 if (req == RTM_RESOLVE) {
825 KASSERT(ret_nrt && *ret_nrt,
826 ("no route to clone from"));
827 rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
828 rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */
829 if ((*ret_nrt)->rt_flags & RTF_CLONING) {
830 /*
831 * NB: We do not bump the refcnt on the parent
832 * entry under the assumption that it will
833 * remain so long as we do. This is
834 * important when deleting the parent route
835 * as this operation requires traversing
836 * the tree to delete all clones and futzing
837 * with refcnts requires us to double-lock
838 * parent through this back reference.
839 */
840 rt->rt_parent = *ret_nrt;
841 }
842 }
843
844 /*
845 * if this protocol has something to add to this then
846 * allow it to do that as well.
847 */
848 if (ifa->ifa_rtrequest)
849 ifa->ifa_rtrequest(req, rt, info);
850
851 /*
852 * We repeat the same procedure from rt_setgate() here because
853 * it doesn't fire when we call it there because the node
854 * hasn't been added to the tree yet.
855 */
856 if (req == RTM_ADD &&
857 !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) {
858 struct rtfc_arg arg;
859 arg.rnh = rnh;
860 arg.rt0 = rt;
861 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
862 rt_fixchange, &arg);
863 }
864
865 /*
866 * actually return a resultant rtentry and
867 * give the caller a single reference.
868 */
869 if (ret_nrt) {
870 *ret_nrt = rt;
871 RT_ADDREF(rt);
872 }
873 RT_UNLOCK(rt);
874 break;
875 default:
876 error = EOPNOTSUPP;
877 }
878bad:
879 RADIX_NODE_HEAD_UNLOCK(rnh);
880 return (error);
881#undef senderr
882}
883
884#undef dst
885#undef gateway
886#undef netmask
887#undef ifaaddr
888#undef ifpaddr
889#undef flags
890
891/*
892 * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
893 * (i.e., the routes related to it by the operation of cloning). This
894 * routine is iterated over all potential former-child-routes by way of
895 * rnh->rnh_walktree_from() above, and those that actually are children of
896 * the late parent (passed in as VP here) are themselves deleted.
897 */
898static int
899rt_fixdelete(struct radix_node *rn, void *vp)
900{
901 struct rtentry *rt = RNTORT(rn);
902 struct rtentry *rt0 = vp;
903
904 if (rt->rt_parent == rt0 &&
905 !(rt->rt_flags & (RTF_PINNED | RTF_CLONING))) {
906 return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
907 rt->rt_flags, NULL);
908 }
909 return 0;
910}
911
912/*
913 * This routine is called from rt_setgate() to do the analogous thing for
914 * adds and changes. There is the added complication in this case of a
915 * middle insert; i.e., insertion of a new network route between an older
916 * network route and (cloned) host routes. For this reason, a simple check
917 * of rt->rt_parent is insufficient; each candidate route must be tested
918 * against the (mask, value) of the new route (passed as before in vp)
919 * to see if the new route matches it.
920 *
921 * XXX - it may be possible to do fixdelete() for changes and reserve this
922 * routine just for adds. I'm not sure why I thought it was necessary to do
923 * changes this way.
924 */
925
926static int
927rt_fixchange(struct radix_node *rn, void *vp)
928{
929 struct rtentry *rt = RNTORT(rn);
930 struct rtfc_arg *ap = vp;
931 struct rtentry *rt0 = ap->rt0;
932 struct radix_node_head *rnh = ap->rnh;
933 u_char *xk1, *xm1, *xk2, *xmp;
934 int i, len, mlen;
935
936 /* make sure we have a parent, and route is not pinned or cloning */
937 if (!rt->rt_parent ||
938 (rt->rt_flags & (RTF_PINNED | RTF_CLONING)))
939 return 0;
940
941 if (rt->rt_parent == rt0) /* parent match */
942 goto delete_rt;
943 /*
944 * There probably is a function somewhere which does this...
945 * if not, there should be.
946 */
947 len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
948
949 xk1 = (u_char *)rt_key(rt0);
950 xm1 = (u_char *)rt_mask(rt0);
951 xk2 = (u_char *)rt_key(rt);
952
953 /* avoid applying a less specific route */
954 xmp = (u_char *)rt_mask(rt->rt_parent);
955 mlen = rt_key(rt->rt_parent)->sa_len;
956 if (mlen > rt_key(rt0)->sa_len) /* less specific route */
957 return 0;
958 for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++)
959 if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i])
960 return 0; /* less specific route */
961
962 for (i = rnh->rnh_treetop->rn_offset; i < len; i++)
963 if ((xk2[i] & xm1[i]) != xk1[i])
964 return 0; /* no match */
965
966 /*
967 * OK, this node is a clone, and matches the node currently being
968 * changed/added under the node's mask. So, get rid of it.
969 */
970delete_rt:
971 return rtrequest(RTM_DELETE, rt_key(rt), NULL,
972 rt_mask(rt), rt->rt_flags, NULL);
973}
974
975int
976rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
977{
978 /* XXX dst may be overwritten, can we move this to below */
979 struct radix_node_head *rnh = rt_tables[dst->sa_family];
980 int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
981
982 RT_LOCK_ASSERT(rt);
983
984 /*
985 * A host route with the destination equal to the gateway
986 * will interfere with keeping LLINFO in the routing
987 * table, so disallow it.
988 */
989 if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
990 (RTF_HOST|RTF_GATEWAY)) &&
991 dst->sa_len == gate->sa_len &&
992 bcmp(dst, gate, dst->sa_len) == 0) {
993 /*
994 * The route might already exist if this is an RTM_CHANGE
995 * or a routing redirect, so try to delete it.
996 */
997 if (rt_key(rt))
998 rtexpunge(rt);
999 return EADDRNOTAVAIL;
1000 }
1001
1002 /*
1003 * Prepare to store the gateway in rt->rt_gateway.
1004 * Both dst and gateway are stored one after the other in the same
1005 * malloc'd chunk. If we have room, we can reuse the old buffer,
1006 * rt_gateway already points to the right place.
1007 * Otherwise, malloc a new block and update the 'dst' address.
1008 */
1009 if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) {
1010 caddr_t new;
1011
1012 R_Malloc(new, caddr_t, dlen + glen);
1013 if (new == NULL)
1014 return ENOBUFS;
1015 /*
1016 * XXX note, we copy from *dst and not *rt_key(rt) because
1017 * rt_setgate() can be called to initialize a newly
1018 * allocated route entry, in which case rt_key(rt) == NULL
1019 * (and also rt->rt_gateway == NULL).
1020 * Free()/free() handle a NULL argument just fine.
1021 */
1022 bcopy(dst, new, dlen);
1023 Free(rt_key(rt)); /* free old block, if any */
1024 rt_key(rt) = (struct sockaddr *)new;
1025 rt->rt_gateway = (struct sockaddr *)(new + dlen);
1026 }
1027
1028 /*
1029 * Copy the new gateway value into the memory chunk.
1030 */
1031 bcopy(gate, rt->rt_gateway, glen);
1032
1033 /*
1034 * If there is already a gwroute, it's now almost definitly wrong
1035 * so drop it.
1036 */
1037 if (rt->rt_gwroute != NULL) {
1038 RTFREE(rt->rt_gwroute);
1039 rt->rt_gwroute = NULL;
1040 }
1041 /*
1042 * Cloning loop avoidance:
1043 * In the presence of protocol-cloning and bad configuration,
1044 * it is possible to get stuck in bottomless mutual recursion
1045 * (rtrequest rt_setgate rtalloc1). We avoid this by not allowing
1046 * protocol-cloning to operate for gateways (which is probably the
1047 * correct choice anyway), and avoid the resulting reference loops
1048 * by disallowing any route to run through itself as a gateway.
1049 * This is obviously mandatory when we get rt->rt_output().
1050 * XXX: After removal of PRCLONING this is probably not needed anymore.
1051 */
1052 if (rt->rt_flags & RTF_GATEWAY) {
1053 struct rtentry *gwrt;
1054
1055 RT_UNLOCK(rt); /* XXX workaround LOR */
1056 gwrt = rtalloc1(gate, 1, 0);
1057 RT_LOCK(rt);
1058 rt->rt_gwroute = gwrt;
1059 if (rt->rt_gwroute == rt) {
1060 RTFREE_LOCKED(rt->rt_gwroute);
1061 rt->rt_gwroute = NULL;
1062 return EDQUOT; /* failure */
1063 }
1064 if (rt->rt_gwroute != NULL)
1065 RT_UNLOCK(rt->rt_gwroute);
1066 }
1067
1068 /*
1069 * This isn't going to do anything useful for host routes, so
1070 * don't bother. Also make sure we have a reasonable mask
1071 * (we don't yet have one during adds).
1072 */
1073 if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
1074 struct rtfc_arg arg;
1075
1076 arg.rnh = rnh;
1077 arg.rt0 = rt;
1078 RT_UNLOCK(rt); /* XXX workaround LOR */
1079 RADIX_NODE_HEAD_LOCK(rnh);
1080 RT_LOCK(rt);
1081 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
1082 rt_fixchange, &arg);
1083 RADIX_NODE_HEAD_UNLOCK(rnh);
1084 }
1085
1086 return 0;
1087}
1088
1089static void
1090rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
1091{
1092 register u_char *cp1 = (u_char *)src;
1093 register u_char *cp2 = (u_char *)dst;
1094 register u_char *cp3 = (u_char *)netmask;
1095 u_char *cplim = cp2 + *cp3;
1096 u_char *cplim2 = cp2 + *cp1;
1097
1098 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1099 cp3 += 2;
1100 if (cplim > cplim2)
1101 cplim = cplim2;
1102 while (cp2 < cplim)
1103 *cp2++ = *cp1++ & *cp3++;
1104 if (cp2 < cplim2)
1105 bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
1106}
1107
1108/*
1109 * Set up a routing table entry, normally
1110 * for an interface.
1111 */
1112int
1113rtinit(struct ifaddr *ifa, int cmd, int flags)
1114{
1115 struct sockaddr *dst;
1116 struct sockaddr *netmask;
1117 struct mbuf *m = NULL;
1118 struct rtentry *rt = NULL;
1119 struct rt_addrinfo info;
1120 int error;
1121
1122 if (flags & RTF_HOST) {
1123 dst = ifa->ifa_dstaddr;
1124 netmask = NULL;
1125 } else {
1126 dst = ifa->ifa_addr;
1127 netmask = ifa->ifa_netmask;
1128 }
1129 /*
1130 * If it's a delete, check that if it exists, it's on the correct
1131 * interface or we might scrub a route to another ifa which would
1132 * be confusing at best and possibly worse.
1133 */
1134 if (cmd == RTM_DELETE) {
1135 struct sockaddr *deldst;
1136 struct radix_node_head *rnh;
1137 struct radix_node *rn;
1138
1139 /*
1140 * It's a delete, so it should already exist..
1141 * If it's a net, mask off the host bits
1142 * (Assuming we have a mask)
1143 */
1144 if (netmask != NULL) {
1145 m = m_get(M_DONTWAIT, MT_SONAME);
1146 if (m == NULL)
1147 return(ENOBUFS);
1148 deldst = mtod(m, struct sockaddr *);
1149 rt_maskedcopy(dst, deldst, netmask);
1150 dst = deldst;
1151 }
1152 /*
1153 * Look up an rtentry that is in the routing tree and
1154 * contains the correct info.
1155 */
1156 if ((rnh = rt_tables[dst->sa_family]) == NULL)
1157 goto bad;
1158 RADIX_NODE_HEAD_LOCK(rnh);
1159 error = ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL ||
1160 (rn->rn_flags & RNF_ROOT) ||
1161 RNTORT(rn)->rt_ifa != ifa ||
1162 !sa_equal((struct sockaddr *)rn->rn_key, dst));
1163 RADIX_NODE_HEAD_UNLOCK(rnh);
1164 if (error) {
1165bad:
1166 if (m)
1167 (void) m_free(m);
1168 return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1169 }
1170 }
1171 /*
1172 * Do the actual request
1173 */
1174 bzero((caddr_t)&info, sizeof(info));
1175 info.rti_ifa = ifa;
1176 info.rti_flags = flags | ifa->ifa_flags;
1177 info.rti_info[RTAX_DST] = dst;
1178 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1179 info.rti_info[RTAX_NETMASK] = netmask;
1180 error = rtrequest1(cmd, &info, &rt);
1181 if (error == 0 && rt != NULL) {
1182 /*
1183 * notify any listening routing agents of the change
1184 */
1185 RT_LOCK(rt);
1186 rt_newaddrmsg(cmd, ifa, error, rt);
1187 if (cmd == RTM_DELETE) {
1188 /*
1189 * If we are deleting, and we found an entry, then
1190 * it's been removed from the tree.. now throw it away.
1191 */
1192 RTFREE_LOCKED(rt);
1193 } else {
1194 if (cmd == RTM_ADD) {
1195 /*
1196 * We just wanted to add it.. we don't actually
1197 * need a reference.
1198 */
1199 RT_REMREF(rt);
1200 }
1201 RT_UNLOCK(rt);
1202 }
1203 }
1204 if (m)
1205 (void) m_free(m);
1206 return (error);
1207}
1208
1209/*
1210 * rt_check() is invoked on each layer 2 output path, prior to
1211 * encapsulating outbound packets.
1212 *
1213 * The function is mostly used to find a routing entry for the gateway,
1214 * which in some protocol families could also point to the link-level
1215 * address for the gateway itself (the side effect of revalidating the
1216 * route to the destination is rather pointless at this stage, we did it
1217 * already a moment before in the pr_output() routine to locate the ifp
1218 * and gateway to use).
1219 *
1220 * When we remove the layer-3 to layer-2 mapping tables from the
1221 * routing table, this function can be removed.
1222 *
1223 * === On input ===
1224 * *dst is the address of the NEXT HOP (which coincides with the
1225 * final destination if directly reachable);
1226 * *lrt0 points to the cached route to the final destination;
1227 * *lrt is not meaningful;
1228 *
1229 * === Operation ===
1230 * If the route is marked down try to find a new route. If the route
1231 * to the gateway is gone, try to setup a new route. Otherwise,
1232 * if the route is marked for packets to be rejected, enforce that.
1233 *
1234 * === On return ===
1235 * *dst is unchanged;
1236 * *lrt0 points to the (possibly new) route to the final destination
1237 * *lrt points to the route to the next hop
1238 *
1239 * Their values are meaningful ONLY if no error is returned.
1240 */
1241int
1242rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
1243{
1244#define senderr(x) { error = x ; goto bad; }
1245 struct rtentry *rt;
1246 struct rtentry *rt0;
1247 int error;
1248
1249 rt0 = *lrt0;
1250 rt = rt0;
1251 if (rt) {
1252 /* NB: the locking here is tortuous... */
1253 RT_LOCK(rt);
1254 if ((rt->rt_flags & RTF_UP) == 0) {
1255 RT_UNLOCK(rt);
1256 rt = rtalloc1(dst, 1, 0UL);
1257 if (rt != NULL) {
1258 RT_REMREF(rt);
1259 /* XXX what about if change? */
1260 } else
1261 senderr(EHOSTUNREACH);
1262 rt0 = rt;
1263 }
1264 /* XXX BSD/OS checks dst->sa_family != AF_NS */
1265 if (rt->rt_flags & RTF_GATEWAY) {
1266 if (rt->rt_gwroute == NULL)
1267 goto lookup;
1268 rt = rt->rt_gwroute;
1269 RT_LOCK(rt); /* NB: gwroute */
1270 if ((rt->rt_flags & RTF_UP) == 0) {
1271 rtfree(rt); /* unlock gwroute */
1272 rt = rt0;
1273 lookup:
1274 RT_UNLOCK(rt0);
1275 rt = rtalloc1(rt->rt_gateway, 1, 0UL);
1276 RT_LOCK(rt0);
1277 rt0->rt_gwroute = rt;
1278 if (rt == NULL) {
1279 RT_UNLOCK(rt0);
1280 senderr(EHOSTUNREACH);
1281 }
1282 }
1283 RT_UNLOCK(rt0);
1284 }
1285 /* XXX why are we inspecting rmx_expire? */
1286 error = (rt->rt_flags & RTF_REJECT) &&
1287 (rt->rt_rmx.rmx_expire == 0 ||
1288 time_second < rt->rt_rmx.rmx_expire);
1289 RT_UNLOCK(rt);
1290 if (error)
1291 senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
1292 }
1293 *lrt = rt; /* NB: return unlocked */
1294 *lrt0 = rt0;
1295 return (0);
1296bad:
1297 /* NB: lrt and lrt0 should not be interpreted if error is non-zero */
1298 return (error);
1299#undef senderr
1300}
1301
1302/* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */
1303SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
820 senderr(EEXIST);
821 }
822
823 rt->rt_parent = NULL;
824
825 /*
826 * If we got here from RESOLVE, then we are cloning
827 * so clone the rest, and note that we
828 * are a clone (and increment the parent's references)
829 */
830 if (req == RTM_RESOLVE) {
831 KASSERT(ret_nrt && *ret_nrt,
832 ("no route to clone from"));
833 rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
834 rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */
835 if ((*ret_nrt)->rt_flags & RTF_CLONING) {
836 /*
837 * NB: We do not bump the refcnt on the parent
838 * entry under the assumption that it will
839 * remain so long as we do. This is
840 * important when deleting the parent route
841 * as this operation requires traversing
842 * the tree to delete all clones and futzing
843 * with refcnts requires us to double-lock
844 * parent through this back reference.
845 */
846 rt->rt_parent = *ret_nrt;
847 }
848 }
849
850 /*
851 * if this protocol has something to add to this then
852 * allow it to do that as well.
853 */
854 if (ifa->ifa_rtrequest)
855 ifa->ifa_rtrequest(req, rt, info);
856
857 /*
858 * We repeat the same procedure from rt_setgate() here because
859 * it doesn't fire when we call it there because the node
860 * hasn't been added to the tree yet.
861 */
862 if (req == RTM_ADD &&
863 !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) {
864 struct rtfc_arg arg;
865 arg.rnh = rnh;
866 arg.rt0 = rt;
867 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
868 rt_fixchange, &arg);
869 }
870
871 /*
872 * actually return a resultant rtentry and
873 * give the caller a single reference.
874 */
875 if (ret_nrt) {
876 *ret_nrt = rt;
877 RT_ADDREF(rt);
878 }
879 RT_UNLOCK(rt);
880 break;
881 default:
882 error = EOPNOTSUPP;
883 }
884bad:
885 RADIX_NODE_HEAD_UNLOCK(rnh);
886 return (error);
887#undef senderr
888}
889
890#undef dst
891#undef gateway
892#undef netmask
893#undef ifaaddr
894#undef ifpaddr
895#undef flags
896
897/*
898 * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
899 * (i.e., the routes related to it by the operation of cloning). This
900 * routine is iterated over all potential former-child-routes by way of
901 * rnh->rnh_walktree_from() above, and those that actually are children of
902 * the late parent (passed in as VP here) are themselves deleted.
903 */
904static int
905rt_fixdelete(struct radix_node *rn, void *vp)
906{
907 struct rtentry *rt = RNTORT(rn);
908 struct rtentry *rt0 = vp;
909
910 if (rt->rt_parent == rt0 &&
911 !(rt->rt_flags & (RTF_PINNED | RTF_CLONING))) {
912 return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
913 rt->rt_flags, NULL);
914 }
915 return 0;
916}
917
918/*
919 * This routine is called from rt_setgate() to do the analogous thing for
920 * adds and changes. There is the added complication in this case of a
921 * middle insert; i.e., insertion of a new network route between an older
922 * network route and (cloned) host routes. For this reason, a simple check
923 * of rt->rt_parent is insufficient; each candidate route must be tested
924 * against the (mask, value) of the new route (passed as before in vp)
925 * to see if the new route matches it.
926 *
927 * XXX - it may be possible to do fixdelete() for changes and reserve this
928 * routine just for adds. I'm not sure why I thought it was necessary to do
929 * changes this way.
930 */
931
932static int
933rt_fixchange(struct radix_node *rn, void *vp)
934{
935 struct rtentry *rt = RNTORT(rn);
936 struct rtfc_arg *ap = vp;
937 struct rtentry *rt0 = ap->rt0;
938 struct radix_node_head *rnh = ap->rnh;
939 u_char *xk1, *xm1, *xk2, *xmp;
940 int i, len, mlen;
941
942 /* make sure we have a parent, and route is not pinned or cloning */
943 if (!rt->rt_parent ||
944 (rt->rt_flags & (RTF_PINNED | RTF_CLONING)))
945 return 0;
946
947 if (rt->rt_parent == rt0) /* parent match */
948 goto delete_rt;
949 /*
950 * There probably is a function somewhere which does this...
951 * if not, there should be.
952 */
953 len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
954
955 xk1 = (u_char *)rt_key(rt0);
956 xm1 = (u_char *)rt_mask(rt0);
957 xk2 = (u_char *)rt_key(rt);
958
959 /* avoid applying a less specific route */
960 xmp = (u_char *)rt_mask(rt->rt_parent);
961 mlen = rt_key(rt->rt_parent)->sa_len;
962 if (mlen > rt_key(rt0)->sa_len) /* less specific route */
963 return 0;
964 for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++)
965 if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i])
966 return 0; /* less specific route */
967
968 for (i = rnh->rnh_treetop->rn_offset; i < len; i++)
969 if ((xk2[i] & xm1[i]) != xk1[i])
970 return 0; /* no match */
971
972 /*
973 * OK, this node is a clone, and matches the node currently being
974 * changed/added under the node's mask. So, get rid of it.
975 */
976delete_rt:
977 return rtrequest(RTM_DELETE, rt_key(rt), NULL,
978 rt_mask(rt), rt->rt_flags, NULL);
979}
980
981int
982rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
983{
984 /* XXX dst may be overwritten, can we move this to below */
985 struct radix_node_head *rnh = rt_tables[dst->sa_family];
986 int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
987
988 RT_LOCK_ASSERT(rt);
989
990 /*
991 * A host route with the destination equal to the gateway
992 * will interfere with keeping LLINFO in the routing
993 * table, so disallow it.
994 */
995 if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
996 (RTF_HOST|RTF_GATEWAY)) &&
997 dst->sa_len == gate->sa_len &&
998 bcmp(dst, gate, dst->sa_len) == 0) {
999 /*
1000 * The route might already exist if this is an RTM_CHANGE
1001 * or a routing redirect, so try to delete it.
1002 */
1003 if (rt_key(rt))
1004 rtexpunge(rt);
1005 return EADDRNOTAVAIL;
1006 }
1007
1008 /*
1009 * Prepare to store the gateway in rt->rt_gateway.
1010 * Both dst and gateway are stored one after the other in the same
1011 * malloc'd chunk. If we have room, we can reuse the old buffer,
1012 * rt_gateway already points to the right place.
1013 * Otherwise, malloc a new block and update the 'dst' address.
1014 */
1015 if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) {
1016 caddr_t new;
1017
1018 R_Malloc(new, caddr_t, dlen + glen);
1019 if (new == NULL)
1020 return ENOBUFS;
1021 /*
1022 * XXX note, we copy from *dst and not *rt_key(rt) because
1023 * rt_setgate() can be called to initialize a newly
1024 * allocated route entry, in which case rt_key(rt) == NULL
1025 * (and also rt->rt_gateway == NULL).
1026 * Free()/free() handle a NULL argument just fine.
1027 */
1028 bcopy(dst, new, dlen);
1029 Free(rt_key(rt)); /* free old block, if any */
1030 rt_key(rt) = (struct sockaddr *)new;
1031 rt->rt_gateway = (struct sockaddr *)(new + dlen);
1032 }
1033
1034 /*
1035 * Copy the new gateway value into the memory chunk.
1036 */
1037 bcopy(gate, rt->rt_gateway, glen);
1038
1039 /*
1040 * If there is already a gwroute, it's now almost definitly wrong
1041 * so drop it.
1042 */
1043 if (rt->rt_gwroute != NULL) {
1044 RTFREE(rt->rt_gwroute);
1045 rt->rt_gwroute = NULL;
1046 }
1047 /*
1048 * Cloning loop avoidance:
1049 * In the presence of protocol-cloning and bad configuration,
1050 * it is possible to get stuck in bottomless mutual recursion
1051 * (rtrequest rt_setgate rtalloc1). We avoid this by not allowing
1052 * protocol-cloning to operate for gateways (which is probably the
1053 * correct choice anyway), and avoid the resulting reference loops
1054 * by disallowing any route to run through itself as a gateway.
1055 * This is obviously mandatory when we get rt->rt_output().
1056 * XXX: After removal of PRCLONING this is probably not needed anymore.
1057 */
1058 if (rt->rt_flags & RTF_GATEWAY) {
1059 struct rtentry *gwrt;
1060
1061 RT_UNLOCK(rt); /* XXX workaround LOR */
1062 gwrt = rtalloc1(gate, 1, 0);
1063 RT_LOCK(rt);
1064 rt->rt_gwroute = gwrt;
1065 if (rt->rt_gwroute == rt) {
1066 RTFREE_LOCKED(rt->rt_gwroute);
1067 rt->rt_gwroute = NULL;
1068 return EDQUOT; /* failure */
1069 }
1070 if (rt->rt_gwroute != NULL)
1071 RT_UNLOCK(rt->rt_gwroute);
1072 }
1073
1074 /*
1075 * This isn't going to do anything useful for host routes, so
1076 * don't bother. Also make sure we have a reasonable mask
1077 * (we don't yet have one during adds).
1078 */
1079 if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
1080 struct rtfc_arg arg;
1081
1082 arg.rnh = rnh;
1083 arg.rt0 = rt;
1084 RT_UNLOCK(rt); /* XXX workaround LOR */
1085 RADIX_NODE_HEAD_LOCK(rnh);
1086 RT_LOCK(rt);
1087 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
1088 rt_fixchange, &arg);
1089 RADIX_NODE_HEAD_UNLOCK(rnh);
1090 }
1091
1092 return 0;
1093}
1094
1095static void
1096rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
1097{
1098 register u_char *cp1 = (u_char *)src;
1099 register u_char *cp2 = (u_char *)dst;
1100 register u_char *cp3 = (u_char *)netmask;
1101 u_char *cplim = cp2 + *cp3;
1102 u_char *cplim2 = cp2 + *cp1;
1103
1104 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1105 cp3 += 2;
1106 if (cplim > cplim2)
1107 cplim = cplim2;
1108 while (cp2 < cplim)
1109 *cp2++ = *cp1++ & *cp3++;
1110 if (cp2 < cplim2)
1111 bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
1112}
1113
1114/*
1115 * Set up a routing table entry, normally
1116 * for an interface.
1117 */
1118int
1119rtinit(struct ifaddr *ifa, int cmd, int flags)
1120{
1121 struct sockaddr *dst;
1122 struct sockaddr *netmask;
1123 struct mbuf *m = NULL;
1124 struct rtentry *rt = NULL;
1125 struct rt_addrinfo info;
1126 int error;
1127
1128 if (flags & RTF_HOST) {
1129 dst = ifa->ifa_dstaddr;
1130 netmask = NULL;
1131 } else {
1132 dst = ifa->ifa_addr;
1133 netmask = ifa->ifa_netmask;
1134 }
1135 /*
1136 * If it's a delete, check that if it exists, it's on the correct
1137 * interface or we might scrub a route to another ifa which would
1138 * be confusing at best and possibly worse.
1139 */
1140 if (cmd == RTM_DELETE) {
1141 struct sockaddr *deldst;
1142 struct radix_node_head *rnh;
1143 struct radix_node *rn;
1144
1145 /*
1146 * It's a delete, so it should already exist..
1147 * If it's a net, mask off the host bits
1148 * (Assuming we have a mask)
1149 */
1150 if (netmask != NULL) {
1151 m = m_get(M_DONTWAIT, MT_SONAME);
1152 if (m == NULL)
1153 return(ENOBUFS);
1154 deldst = mtod(m, struct sockaddr *);
1155 rt_maskedcopy(dst, deldst, netmask);
1156 dst = deldst;
1157 }
1158 /*
1159 * Look up an rtentry that is in the routing tree and
1160 * contains the correct info.
1161 */
1162 if ((rnh = rt_tables[dst->sa_family]) == NULL)
1163 goto bad;
1164 RADIX_NODE_HEAD_LOCK(rnh);
1165 error = ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL ||
1166 (rn->rn_flags & RNF_ROOT) ||
1167 RNTORT(rn)->rt_ifa != ifa ||
1168 !sa_equal((struct sockaddr *)rn->rn_key, dst));
1169 RADIX_NODE_HEAD_UNLOCK(rnh);
1170 if (error) {
1171bad:
1172 if (m)
1173 (void) m_free(m);
1174 return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1175 }
1176 }
1177 /*
1178 * Do the actual request
1179 */
1180 bzero((caddr_t)&info, sizeof(info));
1181 info.rti_ifa = ifa;
1182 info.rti_flags = flags | ifa->ifa_flags;
1183 info.rti_info[RTAX_DST] = dst;
1184 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1185 info.rti_info[RTAX_NETMASK] = netmask;
1186 error = rtrequest1(cmd, &info, &rt);
1187 if (error == 0 && rt != NULL) {
1188 /*
1189 * notify any listening routing agents of the change
1190 */
1191 RT_LOCK(rt);
1192 rt_newaddrmsg(cmd, ifa, error, rt);
1193 if (cmd == RTM_DELETE) {
1194 /*
1195 * If we are deleting, and we found an entry, then
1196 * it's been removed from the tree.. now throw it away.
1197 */
1198 RTFREE_LOCKED(rt);
1199 } else {
1200 if (cmd == RTM_ADD) {
1201 /*
1202 * We just wanted to add it.. we don't actually
1203 * need a reference.
1204 */
1205 RT_REMREF(rt);
1206 }
1207 RT_UNLOCK(rt);
1208 }
1209 }
1210 if (m)
1211 (void) m_free(m);
1212 return (error);
1213}
1214
1215/*
1216 * rt_check() is invoked on each layer 2 output path, prior to
1217 * encapsulating outbound packets.
1218 *
1219 * The function is mostly used to find a routing entry for the gateway,
1220 * which in some protocol families could also point to the link-level
1221 * address for the gateway itself (the side effect of revalidating the
1222 * route to the destination is rather pointless at this stage, we did it
1223 * already a moment before in the pr_output() routine to locate the ifp
1224 * and gateway to use).
1225 *
1226 * When we remove the layer-3 to layer-2 mapping tables from the
1227 * routing table, this function can be removed.
1228 *
1229 * === On input ===
1230 * *dst is the address of the NEXT HOP (which coincides with the
1231 * final destination if directly reachable);
1232 * *lrt0 points to the cached route to the final destination;
1233 * *lrt is not meaningful;
1234 *
1235 * === Operation ===
1236 * If the route is marked down try to find a new route. If the route
1237 * to the gateway is gone, try to setup a new route. Otherwise,
1238 * if the route is marked for packets to be rejected, enforce that.
1239 *
1240 * === On return ===
1241 * *dst is unchanged;
1242 * *lrt0 points to the (possibly new) route to the final destination
1243 * *lrt points to the route to the next hop
1244 *
1245 * Their values are meaningful ONLY if no error is returned.
1246 */
1247int
1248rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
1249{
1250#define senderr(x) { error = x ; goto bad; }
1251 struct rtentry *rt;
1252 struct rtentry *rt0;
1253 int error;
1254
1255 rt0 = *lrt0;
1256 rt = rt0;
1257 if (rt) {
1258 /* NB: the locking here is tortuous... */
1259 RT_LOCK(rt);
1260 if ((rt->rt_flags & RTF_UP) == 0) {
1261 RT_UNLOCK(rt);
1262 rt = rtalloc1(dst, 1, 0UL);
1263 if (rt != NULL) {
1264 RT_REMREF(rt);
1265 /* XXX what about if change? */
1266 } else
1267 senderr(EHOSTUNREACH);
1268 rt0 = rt;
1269 }
1270 /* XXX BSD/OS checks dst->sa_family != AF_NS */
1271 if (rt->rt_flags & RTF_GATEWAY) {
1272 if (rt->rt_gwroute == NULL)
1273 goto lookup;
1274 rt = rt->rt_gwroute;
1275 RT_LOCK(rt); /* NB: gwroute */
1276 if ((rt->rt_flags & RTF_UP) == 0) {
1277 rtfree(rt); /* unlock gwroute */
1278 rt = rt0;
1279 lookup:
1280 RT_UNLOCK(rt0);
1281 rt = rtalloc1(rt->rt_gateway, 1, 0UL);
1282 RT_LOCK(rt0);
1283 rt0->rt_gwroute = rt;
1284 if (rt == NULL) {
1285 RT_UNLOCK(rt0);
1286 senderr(EHOSTUNREACH);
1287 }
1288 }
1289 RT_UNLOCK(rt0);
1290 }
1291 /* XXX why are we inspecting rmx_expire? */
1292 error = (rt->rt_flags & RTF_REJECT) &&
1293 (rt->rt_rmx.rmx_expire == 0 ||
1294 time_second < rt->rt_rmx.rmx_expire);
1295 RT_UNLOCK(rt);
1296 if (error)
1297 senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
1298 }
1299 *lrt = rt; /* NB: return unlocked */
1300 *lrt0 = rt0;
1301 return (0);
1302bad:
1303 /* NB: lrt and lrt0 should not be interpreted if error is non-zero */
1304 return (error);
1305#undef senderr
1306}
1307
1308/* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */
1309SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);