1/* $NetBSD: route.c,v 1.237 2023/06/05 03:51:45 ozaki-r Exp $ */ 2 3/*- 4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33/* 34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the project nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 */ 61 62/* 63 * Copyright (c) 1980, 1986, 1991, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)route.c 8.3 (Berkeley) 1/9/95 91 */ 92 93#ifdef _KERNEL_OPT 94#include "opt_inet.h" 95#include "opt_route.h" 96#include "opt_net_mpsafe.h" 97#endif 98 99#include <sys/cdefs.h> 100__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.237 2023/06/05 03:51:45 ozaki-r Exp $"); 101 102#include <sys/param.h> 103#ifdef RTFLUSH_DEBUG 104#include <sys/sysctl.h> 105#endif 106#include <sys/systm.h> 107#include <sys/callout.h> 108#include <sys/proc.h> 109#include <sys/mbuf.h> 110#include <sys/socket.h> 111#include <sys/socketvar.h> 112#include <sys/domain.h> 113#include <sys/kernel.h> 114#include <sys/ioctl.h> 115#include <sys/pool.h> 116#include <sys/kauth.h> 117#include <sys/workqueue.h> 118#include <sys/syslog.h> 119#include <sys/rwlock.h> 120#include <sys/mutex.h> 121#include <sys/cpu.h> 122#include <sys/kmem.h> 123 124#include <net/if.h> 125#include <net/if_dl.h> 126#include <net/route.h> 127#if defined(INET) || defined(INET6) 128#include <net/if_llatbl.h> 129#endif 130 131#include <netinet/in.h> 132#include <netinet/in_var.h> 133 134#define PRESERVED_RTF (RTF_UP | RTF_GATEWAY | RTF_HOST | RTF_DONE | RTF_MASK) 135 136#ifdef RTFLUSH_DEBUG 137#define rtcache_debug() __predict_false(_rtcache_debug) 138#else /* RTFLUSH_DEBUG */ 139#define rtcache_debug() 0 140#endif /* RTFLUSH_DEBUG */ 141 142#ifdef RT_DEBUG 143#define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \ 144 __func__, __LINE__, (rt), (rt)->rt_refcnt) 145#else 146#define RT_REFCNT_TRACE(rt) do {} while (0) 147#endif 148 149#ifdef RT_DEBUG 150#define dlog(level, fmt, args...) log(level, fmt, ##args) 151#else 152#define dlog(level, fmt, args...) do {} while (0) 153#endif 154 155struct rtstat rtstat; 156 157static int rttrash; /* routes not in table but not freed */ 158 159static struct pool rtentry_pool; 160static struct pool rttimer_pool; 161 162static struct callout rt_timer_ch; /* callout for rt_timer_timer() */ 163static struct workqueue *rt_timer_wq; 164static struct work rt_timer_wk; 165 166static void rt_timer_init(void); 167static void rt_timer_queue_remove_all(struct rttimer_queue *); 168static void rt_timer_remove_all(struct rtentry *); 169static void rt_timer_timer(void *); 170 171/* 172 * Locking notes: 173 * - The routing table is protected by a global rwlock 174 * - API: RT_RLOCK and friends 175 * - rtcaches are NOT protected by the framework 176 * - Callers must guarantee a rtcache isn't accessed simultaneously 177 * - How the constraint is guaranteed in the wild 178 * - Protect a rtcache by a mutex (e.g., inp_route) 179 * - Make rtcache per-CPU and allow only accesses from softint 180 * (e.g., ipforward_rt_percpu) 181 * - References to a rtentry is managed by reference counting and psref 182 * - Reference counting is used for temporal reference when a rtentry 183 * is fetched from the routing table 184 * - psref is used for temporal reference when a rtentry is fetched 185 * from a rtcache 186 * - struct route (rtcache) has struct psref, so we cannot obtain 187 * a reference twice on the same struct route 188 * - Before destroying or updating a rtentry, we have to wait for 189 * all references left (see below for details) 190 * - APIs 191 * - An obtained rtentry via rtalloc1 or rtrequest* must be 192 * unreferenced by rt_unref 193 * - An obtained rtentry via rtcache_* must be unreferenced by 194 * rtcache_unref 195 * - TODO: once we get a lockless routing table, we should use only 196 * psref for rtentries 197 * - rtentry destruction 198 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE) 199 * - If a caller of rtrequest grabs a reference of a rtentry, the caller 200 * has a responsibility to destroy the rtentry by itself by calling 201 * rt_free 202 * - If not, rtrequest itself does that 203 * - If rt_free is called in softint, the actual destruction routine is 204 * deferred to a workqueue 205 * - rtentry update 206 * - When updating a rtentry, RTF_UPDATING flag is set 207 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from 208 * the routing table or a rtcache results in either of the following 209 * cases: 210 * - if the caller runs in softint, the caller fails to fetch 211 * - otherwise, the caller waits for the update completed and retries 212 * to fetch (probably succeed to fetch for the second time) 213 * - rtcache invalidation 214 * - There is a global generation counter that is incremented when 215 * any routes have been added or deleted 216 * - When a rtcache caches a rtentry into itself, it also stores 217 * a snapshot of the generation counter 218 * - If the snapshot equals to the global counter, the cache is valid, 219 * otherwise the cache is invalidated 220 */ 221 222/* 223 * Global lock for the routing table. 224 */ 225static krwlock_t rt_lock __cacheline_aligned; 226#ifdef NET_MPSAFE 227#define RT_RLOCK() rw_enter(&rt_lock, RW_READER) 228#define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER) 229#define RT_UNLOCK() rw_exit(&rt_lock) 230#define RT_WLOCKED() rw_write_held(&rt_lock) 231#define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock)) 232#define RT_WQ_FLAGS WQ_MPSAFE 233#else 234#define RT_RLOCK() do {} while (0) 235#define RT_WLOCK() do {} while (0) 236#define RT_UNLOCK() do {} while (0) 237#define RT_WLOCKED() true 238#define RT_ASSERT_WLOCK() do {} while (0) 239#define RT_WQ_FLAGS 0 240#endif 241 242static uint64_t rtcache_generation; 243 244/* 245 * mutex and cv that are used to wait for references to a rtentry left 246 * before updating the rtentry. 247 */ 248static struct { 249 kmutex_t lock; 250 kcondvar_t cv; 251 bool ongoing; 252 const struct lwp *lwp; 253} rt_update_global __cacheline_aligned; 254 255/* 256 * A workqueue and stuff that are used to defer the destruction routine 257 * of rtentries. 258 */ 259static struct { 260 struct workqueue *wq; 261 struct work wk; 262 kmutex_t lock; 263 SLIST_HEAD(, rtentry) queue; 264 bool enqueued; 265} rt_free_global __cacheline_aligned; 266 267/* psref for rtentry */ 268static struct psref_class *rt_psref_class __read_mostly; 269 270#ifdef RTFLUSH_DEBUG 271static int _rtcache_debug = 0; 272#endif /* RTFLUSH_DEBUG */ 273 274static kauth_listener_t route_listener; 275 276static int rtdeletemsg(struct rtentry *); 277 278static void rt_maskedcopy(const struct sockaddr *, 279 struct sockaddr *, const struct sockaddr *); 280 281static void rtcache_invalidate(void); 282 283static void rt_ref(struct rtentry *); 284 285static struct rtentry * 286 rtalloc1_locked(const struct sockaddr *, int, bool, bool); 287 288static struct ifaddr *rt_getifa(struct rt_addrinfo *, struct psref *); 289static struct ifnet *rt_getifp(struct rt_addrinfo *, struct psref *); 290static struct ifaddr *ifa_ifwithroute_psref(int, const struct sockaddr *, 291 const struct sockaddr *, struct psref *); 292 293static void rtcache_ref(struct rtentry *, struct route *); 294 295#ifdef NET_MPSAFE 296static void rt_update_wait(void); 297#endif 298 299static bool rt_wait_ok(void); 300static void rt_wait_refcnt(const char *, struct rtentry *, int); 301static void rt_wait_psref(struct rtentry *); 302 303#ifdef DDB 304static void db_print_sa(const struct sockaddr *); 305static void db_print_ifa(struct ifaddr *); 306static int db_show_rtentry(struct rtentry *, void *); 307#endif 308 309#ifdef RTFLUSH_DEBUG 310static void sysctl_net_rtcache_setup(struct sysctllog **); 311static void 312sysctl_net_rtcache_setup(struct sysctllog **clog) 313{ 314 const struct sysctlnode *rnode; 315 316 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT, 317 CTLTYPE_NODE, 318 "rtcache", SYSCTL_DESCR("Route cache related settings"), 319 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0) 320 return; 321 if (sysctl_createv(clog, 0, &rnode, &rnode, 322 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, 323 "debug", SYSCTL_DESCR("Debug route caches"), 324 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0) 325 return; 326} 327#endif /* RTFLUSH_DEBUG */ 328 329static inline void 330rt_destroy(struct rtentry *rt) 331{ 332 if (rt->_rt_key != NULL) 333 sockaddr_free(rt->_rt_key); 334 if (rt->rt_gateway != NULL) 335 sockaddr_free(rt->rt_gateway); 336 if (rt_gettag(rt) != NULL) 337 sockaddr_free(rt_gettag(rt)); 338 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL; 339} 340 341static inline const struct sockaddr * 342rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags) 343{ 344 if (rt->_rt_key == key) 345 goto out; 346 347 if (rt->_rt_key != NULL) 348 sockaddr_free(rt->_rt_key); 349 rt->_rt_key = sockaddr_dup(key, flags); 350out: 351 rt->rt_nodes->rn_key = (const char *)rt->_rt_key; 352 return rt->_rt_key; 353} 354 355struct ifaddr * 356rt_get_ifa(struct rtentry *rt) 357{ 358 struct ifaddr *ifa; 359 360 ifa = rt->rt_ifa; 361 if (ifa->ifa_getifa == NULL) 362 return ifa; 363#if 0 364 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno) 365 return ifa; 366#endif 367 else { 368 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt)); 369 if (ifa == NULL) 370 return NULL; 371 rt_replace_ifa(rt, ifa); 372 return ifa; 373 } 374} 375 376static void 377rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa) 378{ 379 rt->rt_ifa = ifa; 380 if (ifa->ifa_seqno != NULL) 381 rt->rt_ifa_seqno = *ifa->ifa_seqno; 382} 383 384/* 385 * Is this route the connected route for the ifa? 386 */ 387static int 388rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa) 389{ 390 const struct sockaddr *key, *dst, *odst; 391 struct sockaddr_storage maskeddst; 392 393 key = rt_getkey(rt); 394 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 395 if (dst == NULL || 396 dst->sa_family != key->sa_family || 397 dst->sa_len != key->sa_len) 398 return 0; 399 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 400 odst = dst; 401 dst = (struct sockaddr *)&maskeddst; 402 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst, 403 ifa->ifa_netmask); 404 } 405 return (memcmp(dst, key, dst->sa_len) == 0); 406} 407 408void 409rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa) 410{ 411 struct ifaddr *old; 412 413 if (rt->rt_ifa == ifa) 414 return; 415 416 if (rt->rt_ifa != ifa && 417 rt->rt_ifa->ifa_flags & IFA_ROUTE && 418 rt_ifa_connected(rt, rt->rt_ifa)) 419 { 420 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 421 "replace deleted IFA_ROUTE\n", 422 (void *)rt->_rt_key, (void *)rt->rt_ifa); 423 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE; 424 if (rt_ifa_connected(rt, ifa)) { 425 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 426 "replace added IFA_ROUTE\n", 427 (void *)rt->_rt_key, (void *)ifa); 428 ifa->ifa_flags |= IFA_ROUTE; 429 } 430 } 431 432 ifaref(ifa); 433 old = rt->rt_ifa; 434 rt_set_ifa1(rt, ifa); 435 ifafree(old); 436} 437 438static void 439rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa) 440{ 441 ifaref(ifa); 442 rt_set_ifa1(rt, ifa); 443} 444 445static int 446route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 447 void *arg0, void *arg1, void *arg2, void *arg3) 448{ 449 struct rt_msghdr *rtm; 450 int result; 451 452 result = KAUTH_RESULT_DEFER; 453 rtm = arg1; 454 455 if (action != KAUTH_NETWORK_ROUTE) 456 return result; 457 458 if (rtm->rtm_type == RTM_GET) 459 result = KAUTH_RESULT_ALLOW; 460 461 return result; 462} 463 464static void rt_free_work(struct work *, void *); 465 466void 467rt_init(void) 468{ 469 int error; 470 471#ifdef RTFLUSH_DEBUG 472 sysctl_net_rtcache_setup(NULL); 473#endif 474 475 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 476 SLIST_INIT(&rt_free_global.queue); 477 rt_free_global.enqueued = false; 478 479 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET); 480 481 error = workqueue_create(&rt_free_global.wq, "rt_free", 482 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, RT_WQ_FLAGS); 483 if (error) 484 panic("%s: workqueue_create failed (%d)\n", __func__, error); 485 486 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET); 487 cv_init(&rt_update_global.cv, "rt_update"); 488 489 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl", 490 NULL, IPL_SOFTNET); 491 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl", 492 NULL, IPL_SOFTNET); 493 494 rn_init(); /* initialize all zeroes, all ones, mask table */ 495 rtbl_init(); 496 497 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 498 route_listener_cb, NULL); 499} 500 501static void 502rtcache_invalidate(void) 503{ 504 505 RT_ASSERT_WLOCK(); 506 507 if (rtcache_debug()) 508 printf("%s: enter\n", __func__); 509 510 rtcache_generation++; 511} 512 513#ifdef RT_DEBUG 514static void 515dump_rt(const struct rtentry *rt) 516{ 517 char buf[512]; 518 519 log(LOG_DEBUG, "rt: "); 520 log(LOG_DEBUG, "p=%p ", rt); 521 if (rt->_rt_key == NULL) { 522 log(LOG_DEBUG, "dst=(NULL) "); 523 } else { 524 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 525 log(LOG_DEBUG, "dst=%s ", buf); 526 } 527 if (rt->rt_gateway == NULL) { 528 log(LOG_DEBUG, "gw=(NULL) "); 529 } else { 530 sockaddr_format(rt->_rt_key, buf, sizeof(buf)); 531 log(LOG_DEBUG, "gw=%s ", buf); 532 } 533 log(LOG_DEBUG, "flags=%x ", rt->rt_flags); 534 if (rt->rt_ifp == NULL) { 535 log(LOG_DEBUG, "if=(NULL) "); 536 } else { 537 log(LOG_DEBUG, "if=%s ", rt->rt_ifp->if_xname); 538 } 539 log(LOG_DEBUG, "\n"); 540} 541#endif /* RT_DEBUG */ 542 543/* 544 * Packet routing routines. If success, refcnt of a returned rtentry 545 * will be incremented. The caller has to rtfree it by itself. 546 */ 547struct rtentry * 548rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok, 549 bool wlock) 550{ 551 rtbl_t *rtbl; 552 struct rtentry *rt; 553 int s; 554 555#ifdef NET_MPSAFE 556retry: 557#endif 558 s = splsoftnet(); 559 rtbl = rt_gettable(dst->sa_family); 560 if (rtbl == NULL) 561 goto miss; 562 563 rt = rt_matchaddr(rtbl, dst); 564 if (rt == NULL) 565 goto miss; 566 567 if (!ISSET(rt->rt_flags, RTF_UP)) 568 goto miss; 569 570#ifdef NET_MPSAFE 571 if (ISSET(rt->rt_flags, RTF_UPDATING) && 572 /* XXX updater should be always able to acquire */ 573 curlwp != rt_update_global.lwp) { 574 if (!wait_ok || !rt_wait_ok()) 575 goto miss; 576 RT_UNLOCK(); 577 splx(s); 578 579 /* We can wait until the update is complete */ 580 rt_update_wait(); 581 582 if (wlock) 583 RT_WLOCK(); 584 else 585 RT_RLOCK(); 586 goto retry; 587 } 588#endif /* NET_MPSAFE */ 589 590 rt_ref(rt); 591 RT_REFCNT_TRACE(rt); 592 593 splx(s); 594 return rt; 595miss: 596 rtstat.rts_unreach++; 597 if (report) { 598 struct rt_addrinfo info; 599 600 memset(&info, 0, sizeof(info)); 601 info.rti_info[RTAX_DST] = dst; 602 rt_missmsg(RTM_MISS, &info, 0, 0); 603 } 604 splx(s); 605 return NULL; 606} 607 608struct rtentry * 609rtalloc1(const struct sockaddr *dst, int report) 610{ 611 struct rtentry *rt; 612 613 RT_RLOCK(); 614 rt = rtalloc1_locked(dst, report, true, false); 615 RT_UNLOCK(); 616 617 return rt; 618} 619 620static void 621rt_ref(struct rtentry *rt) 622{ 623 624 KASSERTMSG(rt->rt_refcnt >= 0, "rt_refcnt=%d", rt->rt_refcnt); 625 atomic_inc_uint(&rt->rt_refcnt); 626} 627 628void 629rt_unref(struct rtentry *rt) 630{ 631 632 KASSERT(rt != NULL); 633 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt); 634 635 atomic_dec_uint(&rt->rt_refcnt); 636 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) { 637 mutex_enter(&rt_free_global.lock); 638 cv_broadcast(&rt->rt_cv); 639 mutex_exit(&rt_free_global.lock); 640 } 641} 642 643static bool 644rt_wait_ok(void) 645{ 646 647 /* 648 * This originally returned !cpu_softintr_p(), but that doesn't 649 * work: the caller may hold a lock (probably softnet lock) 650 * that a softint is waiting for, in which case waiting here 651 * would cause a deadlock. See https://gnats.netbsd.org/56844 652 * for details. For now, until the locking paths are sorted 653 * out, we just disable the waiting option altogether and 654 * always defer to workqueue. 655 */ 656 KASSERT(!cpu_intr_p()); 657 return false; 658} 659 660void 661rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt) 662{ 663 mutex_enter(&rt_free_global.lock); 664 while (rt->rt_refcnt > cnt) { 665 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n", 666 __func__, title, rt->rt_refcnt); 667 cv_wait(&rt->rt_cv, &rt_free_global.lock); 668 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n", 669 __func__, title, rt->rt_refcnt); 670 } 671 mutex_exit(&rt_free_global.lock); 672} 673 674void 675rt_wait_psref(struct rtentry *rt) 676{ 677 678 psref_target_destroy(&rt->rt_psref, rt_psref_class); 679 psref_target_init(&rt->rt_psref, rt_psref_class); 680} 681 682static void 683_rt_free(struct rtentry *rt) 684{ 685 struct ifaddr *ifa; 686 687 /* 688 * Need to avoid a deadlock on rt_wait_refcnt of update 689 * and a conflict on psref_target_destroy of update. 690 */ 691#ifdef NET_MPSAFE 692 rt_update_wait(); 693#endif 694 695 RT_REFCNT_TRACE(rt); 696 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt); 697 rt_wait_refcnt("free", rt, 0); 698#ifdef NET_MPSAFE 699 psref_target_destroy(&rt->rt_psref, rt_psref_class); 700#endif 701 702 rt_assert_inactive(rt); 703 rttrash--; 704 ifa = rt->rt_ifa; 705 rt->rt_ifa = NULL; 706 ifafree(ifa); 707 rt->rt_ifp = NULL; 708 cv_destroy(&rt->rt_cv); 709 rt_destroy(rt); 710 pool_put(&rtentry_pool, rt); 711} 712 713static void 714rt_free_work(struct work *wk, void *arg) 715{ 716 717 for (;;) { 718 struct rtentry *rt; 719 720 mutex_enter(&rt_free_global.lock); 721 if ((rt = SLIST_FIRST(&rt_free_global.queue)) == NULL) { 722 rt_free_global.enqueued = false; 723 mutex_exit(&rt_free_global.lock); 724 return; 725 } 726 SLIST_REMOVE_HEAD(&rt_free_global.queue, rt_free); 727 mutex_exit(&rt_free_global.lock); 728 atomic_dec_uint(&rt->rt_refcnt); 729 _rt_free(rt); 730 } 731} 732 733void 734rt_free(struct rtentry *rt) 735{ 736 737 KASSERTMSG(rt->rt_refcnt > 0, "rt_refcnt=%d", rt->rt_refcnt); 738 if (rt_wait_ok()) { 739 atomic_dec_uint(&rt->rt_refcnt); 740 _rt_free(rt); 741 return; 742 } 743 744 mutex_enter(&rt_free_global.lock); 745 /* No need to add a reference here. */ 746 SLIST_INSERT_HEAD(&rt_free_global.queue, rt, rt_free); 747 if (!rt_free_global.enqueued) { 748 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL); 749 rt_free_global.enqueued = true; 750 } 751 mutex_exit(&rt_free_global.lock); 752} 753 754#ifdef NET_MPSAFE 755static void 756rt_update_wait(void) 757{ 758 759 mutex_enter(&rt_update_global.lock); 760 while (rt_update_global.ongoing) { 761 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp); 762 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 763 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp); 764 } 765 mutex_exit(&rt_update_global.lock); 766} 767#endif 768 769int 770rt_update_prepare(struct rtentry *rt) 771{ 772 773 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp); 774 775 RT_WLOCK(); 776 /* If the entry is being destroyed, don't proceed the update. */ 777 if (!ISSET(rt->rt_flags, RTF_UP)) { 778 RT_UNLOCK(); 779 return ESRCH; 780 } 781 rt->rt_flags |= RTF_UPDATING; 782 RT_UNLOCK(); 783 784 mutex_enter(&rt_update_global.lock); 785 while (rt_update_global.ongoing) { 786 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n", 787 __func__, rt, curlwp); 788 cv_wait(&rt_update_global.cv, &rt_update_global.lock); 789 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n", 790 __func__, rt, curlwp); 791 } 792 rt_update_global.ongoing = true; 793 /* XXX need it to avoid rt_update_wait by updater itself. */ 794 rt_update_global.lwp = curlwp; 795 mutex_exit(&rt_update_global.lock); 796 797 rt_wait_refcnt("update", rt, 1); 798 rt_wait_psref(rt); 799 800 return 0; 801} 802 803void 804rt_update_finish(struct rtentry *rt) 805{ 806 807 RT_WLOCK(); 808 rt->rt_flags &= ~RTF_UPDATING; 809 RT_UNLOCK(); 810 811 mutex_enter(&rt_update_global.lock); 812 rt_update_global.ongoing = false; 813 rt_update_global.lwp = NULL; 814 cv_broadcast(&rt_update_global.cv); 815 mutex_exit(&rt_update_global.lock); 816 817 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp); 818} 819 820/* 821 * Force a routing table entry to the specified 822 * destination to go through the given gateway. 823 * Normally called as a result of a routing redirect 824 * message from the network layer. 825 * 826 * N.B.: must be called at splsoftnet 827 */ 828void 829rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway, 830 const struct sockaddr *netmask, int flags, const struct sockaddr *src, 831 struct rtentry **rtp) 832{ 833 struct rtentry *rt; 834 int error = 0; 835 uint64_t *stat = NULL; 836 struct rt_addrinfo info; 837 struct ifaddr *ifa; 838 struct psref psref; 839 840 /* verify the gateway is directly reachable */ 841 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) { 842 error = ENETUNREACH; 843 goto out; 844 } 845 rt = rtalloc1(dst, 0); 846 /* 847 * If the redirect isn't from our current router for this dst, 848 * it's either old or wrong. If it redirects us to ourselves, 849 * we have a routing loop, perhaps as a result of an interface 850 * going down recently. 851 */ 852 if (!(flags & RTF_DONE) && rt && 853 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa)) 854 error = EINVAL; 855 else { 856 int s = pserialize_read_enter(); 857 struct ifaddr *_ifa; 858 859 _ifa = ifa_ifwithaddr(gateway); 860 if (_ifa != NULL) 861 error = EHOSTUNREACH; 862 pserialize_read_exit(s); 863 } 864 if (error) 865 goto done; 866 /* 867 * Create a new entry if we just got back a wildcard entry 868 * or the lookup failed. This is necessary for hosts 869 * which use routing redirects generated by smart gateways 870 * to dynamically build the routing tables. 871 */ 872 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 873 goto create; 874 /* 875 * Don't listen to the redirect if it's 876 * for a route to an interface. 877 */ 878 if (rt->rt_flags & RTF_GATEWAY) { 879 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 880 /* 881 * Changing from route to net => route to host. 882 * Create new route, rather than smashing route to net. 883 */ 884 create: 885 if (rt != NULL) 886 rt_unref(rt); 887 flags |= RTF_GATEWAY | RTF_DYNAMIC; 888 memset(&info, 0, sizeof(info)); 889 info.rti_info[RTAX_DST] = dst; 890 info.rti_info[RTAX_GATEWAY] = gateway; 891 info.rti_info[RTAX_NETMASK] = netmask; 892 info.rti_ifa = ifa; 893 info.rti_flags = flags; 894 rt = NULL; 895 error = rtrequest1(RTM_ADD, &info, &rt); 896 if (rt != NULL) 897 flags = rt->rt_flags; 898 if (error == 0) 899 rt_newmsg_dynamic(RTM_ADD, rt); 900 stat = &rtstat.rts_dynamic; 901 } else { 902 /* 903 * Smash the current notion of the gateway to 904 * this destination. Should check about netmask!!! 905 */ 906#ifdef NET_MPSAFE 907 KASSERT(!cpu_softintr_p()); 908 909 error = rt_update_prepare(rt); 910 if (error == 0) { 911#endif 912 RT_WLOCK(); 913 error = rt_setgate(rt, gateway); 914 if (error == 0) { 915 rt->rt_flags |= RTF_MODIFIED; 916 flags |= RTF_MODIFIED; 917 } 918 RT_UNLOCK(); 919#ifdef NET_MPSAFE 920 rt_update_finish(rt); 921 } else { 922 /* 923 * If error != 0, the rtentry is being 924 * destroyed, so doing nothing doesn't 925 * matter. 926 */ 927 } 928#endif 929 stat = &rtstat.rts_newgateway; 930 } 931 } else 932 error = EHOSTUNREACH; 933done: 934 if (rt) { 935 if (rtp != NULL && !error) 936 *rtp = rt; 937 else 938 rt_unref(rt); 939 } 940out: 941 if (error) 942 rtstat.rts_badredirect++; 943 else if (stat != NULL) 944 (*stat)++; 945 memset(&info, 0, sizeof(info)); 946 info.rti_info[RTAX_DST] = dst; 947 info.rti_info[RTAX_GATEWAY] = gateway; 948 info.rti_info[RTAX_NETMASK] = netmask; 949 info.rti_info[RTAX_AUTHOR] = src; 950 rt_missmsg(RTM_REDIRECT, &info, flags, error); 951 ifa_release(ifa, &psref); 952} 953 954/* 955 * Delete a route and generate a message. 956 * It doesn't free a passed rt. 957 */ 958static int 959rtdeletemsg(struct rtentry *rt) 960{ 961 int error; 962 struct rt_addrinfo info; 963 struct rtentry *retrt; 964 965 /* 966 * Request the new route so that the entry is not actually 967 * deleted. That will allow the information being reported to 968 * be accurate (and consistent with route_output()). 969 */ 970 memset(&info, 0, sizeof(info)); 971 info.rti_info[RTAX_DST] = rt_getkey(rt); 972 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 973 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 974 info.rti_flags = rt->rt_flags; 975 error = rtrequest1(RTM_DELETE, &info, &retrt); 976 977 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error); 978 979 return error; 980} 981 982static struct ifaddr * 983ifa_ifwithroute_psref(int flags, const struct sockaddr *dst, 984 const struct sockaddr *gateway, struct psref *psref) 985{ 986 struct ifaddr *ifa = NULL; 987 988 if ((flags & RTF_GATEWAY) == 0) { 989 /* 990 * If we are adding a route to an interface, 991 * and the interface is a pt to pt link 992 * we should search for the destination 993 * as our clue to the interface. Otherwise 994 * we can use the local address. 995 */ 996 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK) 997 ifa = ifa_ifwithdstaddr_psref(dst, psref); 998 if (ifa == NULL) 999 ifa = ifa_ifwithaddr_psref(gateway, psref); 1000 } else { 1001 /* 1002 * If we are adding a route to a remote net 1003 * or host, the gateway may still be on the 1004 * other end of a pt to pt link. 1005 */ 1006 ifa = ifa_ifwithdstaddr_psref(gateway, psref); 1007 } 1008 if (ifa == NULL) 1009 ifa = ifa_ifwithnet_psref(gateway, psref); 1010 if (ifa == NULL) { 1011 int s; 1012 struct rtentry *rt; 1013 1014 rt = rtalloc1_locked(gateway, 0, true, true); 1015 if (rt == NULL) 1016 return NULL; 1017 if (rt->rt_flags & RTF_GATEWAY) { 1018 rt_unref(rt); 1019 return NULL; 1020 } 1021 /* 1022 * Just in case. May not need to do this workaround. 1023 * Revisit when working on rtentry MP-ification. 1024 */ 1025 s = pserialize_read_enter(); 1026 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) { 1027 if (ifa == rt->rt_ifa) 1028 break; 1029 } 1030 if (ifa != NULL) 1031 ifa_acquire(ifa, psref); 1032 pserialize_read_exit(s); 1033 rt_unref(rt); 1034 if (ifa == NULL) 1035 return NULL; 1036 } 1037 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1038 struct ifaddr *nifa; 1039 int s; 1040 1041 s = pserialize_read_enter(); 1042 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1043 if (nifa != NULL) { 1044 ifa_release(ifa, psref); 1045 ifa_acquire(nifa, psref); 1046 ifa = nifa; 1047 } 1048 pserialize_read_exit(s); 1049 } 1050 return ifa; 1051} 1052 1053/* 1054 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1055 * The caller has to rtfree it by itself. 1056 */ 1057int 1058rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway, 1059 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) 1060{ 1061 struct rt_addrinfo info; 1062 1063 memset(&info, 0, sizeof(info)); 1064 info.rti_flags = flags; 1065 info.rti_info[RTAX_DST] = dst; 1066 info.rti_info[RTAX_GATEWAY] = gateway; 1067 info.rti_info[RTAX_NETMASK] = netmask; 1068 return rtrequest1(req, &info, ret_nrt); 1069} 1070 1071static struct ifnet * 1072rt_getifp(struct rt_addrinfo *info, struct psref *psref) 1073{ 1074 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP]; 1075 1076 if (info->rti_ifp != NULL) 1077 return NULL; 1078 /* 1079 * ifp may be specified by sockaddr_dl when protocol address 1080 * is ambiguous 1081 */ 1082 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) { 1083 struct ifaddr *ifa; 1084 int s = pserialize_read_enter(); 1085 1086 ifa = ifa_ifwithnet(ifpaddr); 1087 if (ifa != NULL) 1088 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index, 1089 psref); 1090 pserialize_read_exit(s); 1091 } 1092 1093 return info->rti_ifp; 1094} 1095 1096static struct ifaddr * 1097rt_getifa(struct rt_addrinfo *info, struct psref *psref) 1098{ 1099 struct ifaddr *ifa = NULL; 1100 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1101 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1102 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA]; 1103 int flags = info->rti_flags; 1104 const struct sockaddr *sa; 1105 1106 if (info->rti_ifa == NULL && ifaaddr != NULL) { 1107 ifa = ifa_ifwithaddr_psref(ifaaddr, psref); 1108 if (ifa != NULL) 1109 goto got; 1110 } 1111 1112 sa = ifaaddr != NULL ? ifaaddr : 1113 (gateway != NULL ? gateway : dst); 1114 if (sa != NULL && info->rti_ifp != NULL) 1115 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref); 1116 else if (dst != NULL && gateway != NULL) 1117 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref); 1118 else if (sa != NULL) 1119 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref); 1120 if (ifa == NULL) 1121 return NULL; 1122got: 1123 if (ifa->ifa_getifa != NULL) { 1124 /* FIXME ifa_getifa is NOMPSAFE */ 1125 ifa = (*ifa->ifa_getifa)(ifa, dst); 1126 if (ifa == NULL) 1127 return NULL; 1128 ifa_acquire(ifa, psref); 1129 } 1130 info->rti_ifa = ifa; 1131 if (info->rti_ifp == NULL) 1132 info->rti_ifp = ifa->ifa_ifp; 1133 return ifa; 1134} 1135 1136/* 1137 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented. 1138 * The caller has to rtfree it by itself. 1139 */ 1140int 1141rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) 1142{ 1143 int s = splsoftnet(), ss; 1144 int error = 0, rc; 1145 struct rtentry *rt; 1146 rtbl_t *rtbl; 1147 struct ifaddr *ifa = NULL; 1148 struct sockaddr_storage maskeddst; 1149 const struct sockaddr *dst = info->rti_info[RTAX_DST]; 1150 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY]; 1151 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK]; 1152 int flags = info->rti_flags; 1153 struct psref psref_ifp, psref_ifa; 1154 int bound = 0; 1155 struct ifnet *ifp = NULL; 1156 bool need_to_release_ifa = true; 1157 bool need_unlock = true; 1158#define senderr(x) { error = x ; goto bad; } 1159 1160 RT_WLOCK(); 1161 1162 bound = curlwp_bind(); 1163 if ((rtbl = rt_gettable(dst->sa_family)) == NULL) 1164 senderr(ESRCH); 1165 if (flags & RTF_HOST) 1166 netmask = NULL; 1167 switch (req) { 1168 case RTM_DELETE: 1169 if (netmask) { 1170 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1171 netmask); 1172 dst = (struct sockaddr *)&maskeddst; 1173 } 1174 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1175 senderr(ESRCH); 1176 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL) 1177 senderr(ESRCH); 1178 rt->rt_flags &= ~RTF_UP; 1179 ifa = rt->rt_ifa; 1180 if (ifa->ifa_flags & IFA_ROUTE && 1181 rt_ifa_connected(rt, ifa)) { 1182 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, " 1183 "deleted IFA_ROUTE\n", 1184 (void *)rt->_rt_key, (void *)ifa); 1185 ifa->ifa_flags &= ~IFA_ROUTE; 1186 } 1187 if (ifa->ifa_rtrequest) 1188 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1189 ifa = NULL; 1190 rttrash++; 1191 if (ret_nrt) { 1192 *ret_nrt = rt; 1193 rt_ref(rt); 1194 RT_REFCNT_TRACE(rt); 1195 } 1196 rtcache_invalidate(); 1197 RT_UNLOCK(); 1198 need_unlock = false; 1199 rt_timer_remove_all(rt); 1200#if defined(INET) || defined(INET6) 1201 if (netmask != NULL) 1202 lltable_prefix_free(dst->sa_family, dst, netmask, 0); 1203#endif 1204 if (ret_nrt == NULL) { 1205 /* Adjust the refcount */ 1206 rt_ref(rt); 1207 RT_REFCNT_TRACE(rt); 1208 rt_free(rt); 1209 } 1210 break; 1211 1212 case RTM_ADD: 1213 if (info->rti_ifa == NULL) { 1214 ifp = rt_getifp(info, &psref_ifp); 1215 ifa = rt_getifa(info, &psref_ifa); 1216 if (ifa == NULL) 1217 senderr(ENETUNREACH); 1218 } else { 1219 /* Caller should have a reference of ifa */ 1220 ifa = info->rti_ifa; 1221 need_to_release_ifa = false; 1222 } 1223 rt = pool_get(&rtentry_pool, PR_NOWAIT); 1224 if (rt == NULL) 1225 senderr(ENOBUFS); 1226 memset(rt, 0, sizeof(*rt)); 1227 rt->rt_flags = RTF_UP | (flags & ~RTF_DONTCHANGEIFA); 1228 LIST_INIT(&rt->rt_timer); 1229 1230 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1231 if (netmask) { 1232 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1233 netmask); 1234 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT); 1235 } else { 1236 rt_setkey(rt, dst, M_NOWAIT); 1237 } 1238 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1239 if (rt_getkey(rt) == NULL || 1240 rt_setgate(rt, gateway) != 0) { 1241 pool_put(&rtentry_pool, rt); 1242 senderr(ENOBUFS); 1243 } 1244 1245 rt_set_ifa(rt, ifa); 1246 if (info->rti_info[RTAX_TAG] != NULL) { 1247 const struct sockaddr *tag; 1248 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1249 if (tag == NULL) 1250 senderr(ENOBUFS); 1251 } 1252 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1253 1254 ss = pserialize_read_enter(); 1255 if (info->rti_info[RTAX_IFP] != NULL) { 1256 struct ifaddr *ifa2; 1257 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]); 1258 if (ifa2 != NULL) 1259 rt->rt_ifp = ifa2->ifa_ifp; 1260 else 1261 rt->rt_ifp = ifa->ifa_ifp; 1262 } else 1263 rt->rt_ifp = ifa->ifa_ifp; 1264 pserialize_read_exit(ss); 1265 cv_init(&rt->rt_cv, "rtentry"); 1266 psref_target_init(&rt->rt_psref, rt_psref_class); 1267 1268 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1269 rc = rt_addaddr(rtbl, rt, netmask); 1270 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1271 if (rc != 0) { 1272 ifafree(ifa); /* for rt_set_ifa above */ 1273 cv_destroy(&rt->rt_cv); 1274 rt_destroy(rt); 1275 pool_put(&rtentry_pool, rt); 1276 senderr(rc); 1277 } 1278 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1279 if (ifa->ifa_rtrequest) 1280 ifa->ifa_rtrequest(req, rt, info); 1281 if (need_to_release_ifa) 1282 ifa_release(ifa, &psref_ifa); 1283 ifa = NULL; 1284 if_put(ifp, &psref_ifp); 1285 ifp = NULL; 1286 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1287 if (ret_nrt) { 1288 *ret_nrt = rt; 1289 rt_ref(rt); 1290 RT_REFCNT_TRACE(rt); 1291 } 1292 rtcache_invalidate(); 1293 RT_UNLOCK(); 1294 need_unlock = false; 1295 break; 1296 case RTM_GET: 1297 if (netmask != NULL) { 1298 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst, 1299 netmask); 1300 dst = (struct sockaddr *)&maskeddst; 1301 } 1302 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL) 1303 senderr(ESRCH); 1304 if (ret_nrt != NULL) { 1305 *ret_nrt = rt; 1306 rt_ref(rt); 1307 RT_REFCNT_TRACE(rt); 1308 } 1309 break; 1310 } 1311bad: 1312 if (need_to_release_ifa) 1313 ifa_release(ifa, &psref_ifa); 1314 if_put(ifp, &psref_ifp); 1315 curlwp_bindx(bound); 1316 if (need_unlock) 1317 RT_UNLOCK(); 1318 splx(s); 1319 return error; 1320} 1321 1322int 1323rt_setgate(struct rtentry *rt, const struct sockaddr *gate) 1324{ 1325 struct sockaddr *new, *old; 1326 1327 KASSERT(RT_WLOCKED()); 1328 KASSERT(rt->_rt_key != NULL); 1329 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1330 1331 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT); 1332 if (new == NULL) 1333 return ENOMEM; 1334 1335 old = rt->rt_gateway; 1336 rt->rt_gateway = new; 1337 if (old != NULL) 1338 sockaddr_free(old); 1339 1340 KASSERT(rt->_rt_key != NULL); 1341 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1342 1343 if (rt->rt_flags & RTF_GATEWAY) { 1344 struct rtentry *gwrt; 1345 1346 gwrt = rtalloc1_locked(gate, 1, false, true); 1347 /* 1348 * If we switched gateways, grab the MTU from the new 1349 * gateway route if the current MTU, if the current MTU is 1350 * greater than the MTU of gateway. 1351 * Note that, if the MTU of gateway is 0, we will reset the 1352 * MTU of the route to run PMTUD again from scratch. XXX 1353 */ 1354 if (gwrt != NULL) { 1355 KASSERT(gwrt->_rt_key != NULL); 1356 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key); 1357 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 && 1358 rt->rt_rmx.rmx_mtu && 1359 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) { 1360 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu; 1361 } 1362 rt_unref(gwrt); 1363 } 1364 } 1365 KASSERT(rt->_rt_key != NULL); 1366 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key); 1367 return 0; 1368} 1369 1370static struct ifaddr * 1371rt_update_get_ifa(const struct rt_addrinfo *info, const struct rtentry *rt, 1372 struct ifnet **ifp, struct psref *psref_ifp, struct psref *psref) 1373{ 1374 struct ifaddr *ifa = NULL; 1375 1376 *ifp = NULL; 1377 if (info->rti_info[RTAX_IFP] != NULL) { 1378 ifa = ifa_ifwithnet_psref(info->rti_info[RTAX_IFP], psref); 1379 if (ifa == NULL) 1380 goto next; 1381 if (ifa->ifa_ifp->if_flags & IFF_UNNUMBERED) { 1382 ifa_release(ifa, psref); 1383 ifa = NULL; 1384 goto next; 1385 } 1386 *ifp = ifa->ifa_ifp; 1387 if_acquire(*ifp, psref_ifp); 1388 if (info->rti_info[RTAX_IFA] == NULL && 1389 info->rti_info[RTAX_GATEWAY] == NULL) 1390 goto out; 1391 ifa_release(ifa, psref); 1392 if (info->rti_info[RTAX_IFA] == NULL) { 1393 /* route change <dst> <gw> -ifp <if> */ 1394 ifa = ifaof_ifpforaddr_psref( 1395 info->rti_info[RTAX_GATEWAY], *ifp, psref); 1396 } else { 1397 /* route change <dst> -ifp <if> -ifa <addr> */ 1398 ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA], 1399 psref); 1400 if (ifa != NULL) 1401 goto out; 1402 ifa = ifaof_ifpforaddr_psref(info->rti_info[RTAX_IFA], 1403 *ifp, psref); 1404 } 1405 goto out; 1406 } 1407next: 1408 if (info->rti_info[RTAX_IFA] != NULL) { 1409 /* route change <dst> <gw> -ifa <addr> */ 1410 ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA], psref); 1411 if (ifa != NULL) 1412 goto out; 1413 } 1414 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1415 /* route change <dst> <gw> */ 1416 ifa = ifa_ifwithroute_psref(rt->rt_flags, rt_getkey(rt), 1417 info->rti_info[RTAX_GATEWAY], psref); 1418 } 1419out: 1420 if (ifa != NULL && *ifp == NULL) { 1421 *ifp = ifa->ifa_ifp; 1422 if_acquire(*ifp, psref_ifp); 1423 } 1424 if (ifa == NULL && *ifp != NULL) { 1425 if_put(*ifp, psref_ifp); 1426 *ifp = NULL; 1427 } 1428 return ifa; 1429} 1430 1431int 1432rt_update(struct rtentry *rt, struct rt_addrinfo *info, void *rtm) 1433{ 1434 int error = 0; 1435 struct ifnet *ifp = NULL, *new_ifp = NULL; 1436 struct ifaddr *ifa = NULL, *new_ifa; 1437 struct psref psref_ifa, psref_new_ifa, psref_ifp, psref_new_ifp; 1438 bool newgw, ifp_changed = false; 1439 1440 RT_WLOCK(); 1441 /* 1442 * New gateway could require new ifaddr, ifp; 1443 * flags may also be different; ifp may be specified 1444 * by ll sockaddr when protocol address is ambiguous 1445 */ 1446 newgw = info->rti_info[RTAX_GATEWAY] != NULL && 1447 sockaddr_cmp(info->rti_info[RTAX_GATEWAY], rt->rt_gateway) != 0; 1448 1449 if (newgw || info->rti_info[RTAX_IFP] != NULL || 1450 info->rti_info[RTAX_IFA] != NULL) { 1451 ifp = rt_getifp(info, &psref_ifp); 1452 /* info refers ifp so we need to keep a reference */ 1453 ifa = rt_getifa(info, &psref_ifa); 1454 if (ifa == NULL) { 1455 error = ENETUNREACH; 1456 goto out; 1457 } 1458 } 1459 if (newgw) { 1460 error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]); 1461 if (error != 0) 1462 goto out; 1463 } 1464 if (info->rti_info[RTAX_TAG]) { 1465 const struct sockaddr *tag; 1466 tag = rt_settag(rt, info->rti_info[RTAX_TAG]); 1467 if (tag == NULL) { 1468 error = ENOBUFS; 1469 goto out; 1470 } 1471 } 1472 /* 1473 * New gateway could require new ifaddr, ifp; 1474 * flags may also be different; ifp may be specified 1475 * by ll sockaddr when protocol address is ambiguous 1476 */ 1477 new_ifa = rt_update_get_ifa(info, rt, &new_ifp, &psref_new_ifp, 1478 &psref_new_ifa); 1479 if (new_ifa != NULL) { 1480 ifa_release(ifa, &psref_ifa); 1481 ifa = new_ifa; 1482 } 1483 if (ifa) { 1484 struct ifaddr *oifa = rt->rt_ifa; 1485 if (oifa != ifa && !ifa_is_destroying(ifa) && 1486 new_ifp != NULL && !if_is_deactivated(new_ifp)) { 1487 if (oifa && oifa->ifa_rtrequest) 1488 oifa->ifa_rtrequest(RTM_DELETE, rt, info); 1489 rt_replace_ifa(rt, ifa); 1490 rt->rt_ifp = new_ifp; 1491 ifp_changed = true; 1492 } 1493 if (new_ifa == NULL) 1494 ifa_release(ifa, &psref_ifa); 1495 /* To avoid ifa_release below */ 1496 ifa = NULL; 1497 } 1498 ifa_release(new_ifa, &psref_new_ifa); 1499 if (new_ifp && rt->rt_ifp != new_ifp && !if_is_deactivated(new_ifp)) { 1500 rt->rt_ifp = new_ifp; 1501 ifp_changed = true; 1502 } 1503 rt_setmetrics(rtm, rt); 1504 if (rt->rt_flags != info->rti_flags) { 1505 rt->rt_flags = (info->rti_flags & ~PRESERVED_RTF) | 1506 (rt->rt_flags & PRESERVED_RTF); 1507 } 1508 if (rt->rt_ifa->ifa_rtrequest) 1509 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info); 1510#if defined(INET) || defined(INET6) 1511 if (ifp_changed && rt_mask(rt) != NULL) 1512 lltable_prefix_free(rt_getkey(rt)->sa_family, rt_getkey(rt), 1513 rt_mask(rt), 0); 1514#else 1515 (void)ifp_changed; /* XXX gcc */ 1516#endif 1517out: 1518 ifa_release(ifa, &psref_ifa); 1519 if_put(new_ifp, &psref_new_ifp); 1520 if_put(ifp, &psref_ifp); 1521 1522 RT_UNLOCK(); 1523 1524 return error; 1525} 1526 1527static void 1528rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 1529 const struct sockaddr *netmask) 1530{ 1531 const char *netmaskp = &netmask->sa_data[0], 1532 *srcp = &src->sa_data[0]; 1533 char *dstp = &dst->sa_data[0]; 1534 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len); 1535 const char *srcend = (char *)dst + src->sa_len; 1536 1537 dst->sa_len = src->sa_len; 1538 dst->sa_family = src->sa_family; 1539 1540 while (dstp < maskend) 1541 *dstp++ = *srcp++ & *netmaskp++; 1542 if (dstp < srcend) 1543 memset(dstp, 0, (size_t)(srcend - dstp)); 1544} 1545 1546/* 1547 * Inform the routing socket of a route change. 1548 */ 1549void 1550rt_newmsg(const int cmd, const struct rtentry *rt) 1551{ 1552 struct rt_addrinfo info; 1553 1554 memset((void *)&info, 0, sizeof(info)); 1555 info.rti_info[RTAX_DST] = rt_getkey(rt); 1556 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1557 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1558 if (rt->rt_ifp) { 1559 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1560 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1561 } 1562 1563 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1564} 1565 1566/* 1567 * Inform the routing socket of a route change for RTF_DYNAMIC. 1568 */ 1569void 1570rt_newmsg_dynamic(const int cmd, const struct rtentry *rt) 1571{ 1572 struct rt_addrinfo info; 1573 struct sockaddr *gateway = rt->rt_gateway; 1574 1575 if (gateway == NULL) 1576 return; 1577 1578 switch(gateway->sa_family) { 1579#ifdef INET 1580 case AF_INET: { 1581 extern bool icmp_dynamic_rt_msg; 1582 if (!icmp_dynamic_rt_msg) 1583 return; 1584 break; 1585 } 1586#endif 1587#ifdef INET6 1588 case AF_INET6: { 1589 extern bool icmp6_dynamic_rt_msg; 1590 if (!icmp6_dynamic_rt_msg) 1591 return; 1592 break; 1593 } 1594#endif 1595 default: 1596 return; 1597 } 1598 1599 memset((void *)&info, 0, sizeof(info)); 1600 info.rti_info[RTAX_DST] = rt_getkey(rt); 1601 info.rti_info[RTAX_GATEWAY] = gateway; 1602 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1603 if (rt->rt_ifp) { 1604 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr; 1605 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1606 } 1607 1608 rt_missmsg(cmd, &info, rt->rt_flags, 0); 1609} 1610 1611/* 1612 * Set up or tear down a routing table entry, normally 1613 * for an interface. 1614 */ 1615int 1616rtinit(struct ifaddr *ifa, int cmd, int flags) 1617{ 1618 struct rtentry *rt; 1619 struct sockaddr *dst, *odst; 1620 struct sockaddr_storage maskeddst; 1621 struct rtentry *nrt = NULL; 1622 int error; 1623 struct rt_addrinfo info; 1624 1625 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; 1626 if (cmd == RTM_DELETE) { 1627 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { 1628 /* Delete subnet route for this interface */ 1629 odst = dst; 1630 dst = (struct sockaddr *)&maskeddst; 1631 rt_maskedcopy(odst, dst, ifa->ifa_netmask); 1632 } 1633 if ((rt = rtalloc1(dst, 0)) != NULL) { 1634 if (rt->rt_ifa != ifa) { 1635 rt_unref(rt); 1636 return (flags & RTF_HOST) ? EHOSTUNREACH 1637 : ENETUNREACH; 1638 } 1639 rt_unref(rt); 1640 } 1641 } 1642 memset(&info, 0, sizeof(info)); 1643 info.rti_ifa = ifa; 1644 info.rti_flags = flags | ifa->ifa_flags | RTF_DONTCHANGEIFA; 1645 info.rti_info[RTAX_DST] = dst; 1646 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1647 1648 /* 1649 * XXX here, it seems that we are assuming that ifa_netmask is NULL 1650 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate 1651 * variable) when RTF_HOST is 1. still not sure if i can safely 1652 * change it to meet bsdi4 behavior. 1653 */ 1654 if (cmd != RTM_LLINFO_UPD) 1655 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1656 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info, 1657 &nrt); 1658 if (error != 0) 1659 return error; 1660 1661 rt = nrt; 1662 RT_REFCNT_TRACE(rt); 1663 switch (cmd) { 1664 case RTM_DELETE: 1665 rt_newmsg(cmd, rt); 1666 rt_free(rt); 1667 break; 1668 case RTM_LLINFO_UPD: 1669 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL) 1670 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info); 1671 rt_newmsg(RTM_CHANGE, rt); 1672 rt_unref(rt); 1673 break; 1674 case RTM_ADD: 1675 KASSERT(rt->rt_ifa == ifa); 1676 rt_newmsg(cmd, rt); 1677 rt_unref(rt); 1678 RT_REFCNT_TRACE(rt); 1679 break; 1680 } 1681 return error; 1682} 1683 1684/* 1685 * Create a local route entry for the address. 1686 * Announce the addition of the address and the route to the routing socket. 1687 */ 1688int 1689rt_ifa_addlocal(struct ifaddr *ifa) 1690{ 1691 struct rtentry *rt; 1692 int e; 1693 1694 /* If there is no loopback entry, allocate one. */ 1695 rt = rtalloc1(ifa->ifa_addr, 0); 1696#ifdef RT_DEBUG 1697 if (rt != NULL) 1698 dump_rt(rt); 1699#endif 1700 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || 1701 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) 1702 { 1703 struct rt_addrinfo info; 1704 struct rtentry *nrt; 1705 1706 memset(&info, 0, sizeof(info)); 1707 info.rti_flags = RTF_HOST | RTF_LOCAL | RTF_DONTCHANGEIFA; 1708 info.rti_info[RTAX_DST] = ifa->ifa_addr; 1709 info.rti_info[RTAX_GATEWAY] = 1710 (const struct sockaddr *)ifa->ifa_ifp->if_sadl; 1711 info.rti_ifa = ifa; 1712 nrt = NULL; 1713 e = rtrequest1(RTM_ADD, &info, &nrt); 1714 rt_addrmsg_rt(RTM_ADD, ifa, e, nrt); 1715 if (nrt != NULL) { 1716 KASSERT(nrt->rt_ifa == ifa); 1717#ifdef RT_DEBUG 1718 dump_rt(nrt); 1719#endif 1720 rt_unref(nrt); 1721 RT_REFCNT_TRACE(nrt); 1722 } 1723 } else { 1724 e = 0; 1725 rt_addrmsg(RTM_NEWADDR, ifa); 1726 } 1727 if (rt != NULL) 1728 rt_unref(rt); 1729 return e; 1730} 1731 1732/* 1733 * Remove the local route entry for the address. 1734 * Announce the removal of the address and the route to the routing socket. 1735 */ 1736int 1737rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa) 1738{ 1739 struct rtentry *rt; 1740 int e = 0; 1741 1742 rt = rtalloc1(ifa->ifa_addr, 0); 1743 1744 /* 1745 * Before deleting, check if a corresponding loopbacked 1746 * host route surely exists. With this check, we can avoid 1747 * deleting an interface direct route whose destination is 1748 * the same as the address being removed. This can happen 1749 * when removing a subnet-router anycast address on an 1750 * interface attached to a shared medium. 1751 */ 1752 if (rt != NULL && 1753 (rt->rt_flags & RTF_HOST) && 1754 (rt->rt_ifp->if_flags & IFF_LOOPBACK)) 1755 { 1756 /* If we cannot replace the route's ifaddr with the equivalent 1757 * ifaddr of another interface, I believe it is safest to 1758 * delete the route. 1759 */ 1760 if (alt_ifa == NULL) { 1761 e = rtdeletemsg(rt); 1762 if (e == 0) { 1763 rt_unref(rt); 1764 rt_free(rt); 1765 rt = NULL; 1766 } 1767 rt_addrmsg(RTM_DELADDR, ifa); 1768 } else { 1769#ifdef NET_MPSAFE 1770 int error = rt_update_prepare(rt); 1771 if (error == 0) { 1772 rt_replace_ifa(rt, alt_ifa); 1773 rt_update_finish(rt); 1774 } else { 1775 /* 1776 * If error != 0, the rtentry is being 1777 * destroyed, so doing nothing doesn't 1778 * matter. 1779 */ 1780 } 1781#else 1782 rt_replace_ifa(rt, alt_ifa); 1783#endif 1784 rt_newmsg(RTM_CHANGE, rt); 1785 } 1786 } else 1787 rt_addrmsg(RTM_DELADDR, ifa); 1788 if (rt != NULL) 1789 rt_unref(rt); 1790 return e; 1791} 1792 1793/* 1794 * Route timer routines. These routes allow functions to be called 1795 * for various routes at any time. This is useful in supporting 1796 * path MTU discovery and redirect route deletion. 1797 * 1798 * This is similar to some BSDI internal functions, but it provides 1799 * for multiple queues for efficiency's sake... 1800 */ 1801 1802LIST_HEAD(, rttimer_queue) rttimer_queue_head; 1803static int rt_init_done = 0; 1804 1805/* 1806 * Some subtle order problems with domain initialization mean that 1807 * we cannot count on this being run from rt_init before various 1808 * protocol initializations are done. Therefore, we make sure 1809 * that this is run when the first queue is added... 1810 */ 1811 1812static void rt_timer_work(struct work *, void *); 1813 1814static void 1815rt_timer_init(void) 1816{ 1817 int error; 1818 1819 assert(rt_init_done == 0); 1820 1821 /* XXX should be in rt_init */ 1822 rw_init(&rt_lock); 1823 1824 LIST_INIT(&rttimer_queue_head); 1825 callout_init(&rt_timer_ch, CALLOUT_MPSAFE); 1826 error = workqueue_create(&rt_timer_wq, "rt_timer", 1827 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, RT_WQ_FLAGS); 1828 if (error) 1829 panic("%s: workqueue_create failed (%d)\n", __func__, error); 1830 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 1831 rt_init_done = 1; 1832} 1833 1834struct rttimer_queue * 1835rt_timer_queue_create(u_int timeout) 1836{ 1837 struct rttimer_queue *rtq; 1838 1839 if (rt_init_done == 0) 1840 rt_timer_init(); 1841 1842 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq); 1843 if (rtq == NULL) 1844 return NULL; 1845 memset(rtq, 0, sizeof(*rtq)); 1846 1847 rtq->rtq_timeout = timeout; 1848 TAILQ_INIT(&rtq->rtq_head); 1849 RT_WLOCK(); 1850 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link); 1851 RT_UNLOCK(); 1852 1853 return rtq; 1854} 1855 1856void 1857rt_timer_queue_change(struct rttimer_queue *rtq, long timeout) 1858{ 1859 1860 rtq->rtq_timeout = timeout; 1861} 1862 1863static void 1864rt_timer_queue_remove_all(struct rttimer_queue *rtq) 1865{ 1866 struct rttimer *r; 1867 1868 RT_ASSERT_WLOCK(); 1869 1870 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { 1871 LIST_REMOVE(r, rtt_link); 1872 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1873 rt_ref(r->rtt_rt); /* XXX */ 1874 RT_REFCNT_TRACE(r->rtt_rt); 1875 RT_UNLOCK(); 1876 (*r->rtt_func)(r->rtt_rt, r); 1877 pool_put(&rttimer_pool, r); 1878 RT_WLOCK(); 1879 if (rtq->rtq_count > 0) 1880 rtq->rtq_count--; 1881 else 1882 printf("rt_timer_queue_remove_all: " 1883 "rtq_count reached 0\n"); 1884 } 1885} 1886 1887void 1888rt_timer_queue_destroy(struct rttimer_queue *rtq) 1889{ 1890 1891 RT_WLOCK(); 1892 rt_timer_queue_remove_all(rtq); 1893 LIST_REMOVE(rtq, rtq_link); 1894 RT_UNLOCK(); 1895 1896 /* 1897 * Caller is responsible for freeing the rttimer_queue structure. 1898 */ 1899} 1900 1901unsigned long 1902rt_timer_count(struct rttimer_queue *rtq) 1903{ 1904 return rtq->rtq_count; 1905} 1906 1907static void 1908rt_timer_remove_all(struct rtentry *rt) 1909{ 1910 struct rttimer *r; 1911 1912 RT_WLOCK(); 1913 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) { 1914 LIST_REMOVE(r, rtt_link); 1915 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1916 if (r->rtt_queue->rtq_count > 0) 1917 r->rtt_queue->rtq_count--; 1918 else 1919 printf("rt_timer_remove_all: rtq_count reached 0\n"); 1920 pool_put(&rttimer_pool, r); 1921 } 1922 RT_UNLOCK(); 1923} 1924 1925int 1926rt_timer_add(struct rtentry *rt, 1927 void (*func)(struct rtentry *, struct rttimer *), 1928 struct rttimer_queue *queue) 1929{ 1930 struct rttimer *r; 1931 1932 KASSERT(func != NULL); 1933 RT_WLOCK(); 1934 /* 1935 * If there's already a timer with this action, destroy it before 1936 * we add a new one. 1937 */ 1938 LIST_FOREACH(r, &rt->rt_timer, rtt_link) { 1939 if (r->rtt_func == func) 1940 break; 1941 } 1942 if (r != NULL) { 1943 LIST_REMOVE(r, rtt_link); 1944 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next); 1945 if (r->rtt_queue->rtq_count > 0) 1946 r->rtt_queue->rtq_count--; 1947 else 1948 printf("rt_timer_add: rtq_count reached 0\n"); 1949 } else { 1950 r = pool_get(&rttimer_pool, PR_NOWAIT); 1951 if (r == NULL) { 1952 RT_UNLOCK(); 1953 return ENOBUFS; 1954 } 1955 } 1956 1957 memset(r, 0, sizeof(*r)); 1958 1959 r->rtt_rt = rt; 1960 r->rtt_time = time_uptime; 1961 r->rtt_func = func; 1962 r->rtt_queue = queue; 1963 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link); 1964 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next); 1965 r->rtt_queue->rtq_count++; 1966 1967 RT_UNLOCK(); 1968 1969 return 0; 1970} 1971 1972static void 1973rt_timer_work(struct work *wk, void *arg) 1974{ 1975 struct rttimer_queue *rtq; 1976 struct rttimer *r; 1977 1978 RT_WLOCK(); 1979 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) { 1980 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && 1981 (r->rtt_time + rtq->rtq_timeout) < time_uptime) { 1982 LIST_REMOVE(r, rtt_link); 1983 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); 1984 /* 1985 * Take a reference to avoid the rtentry is freed 1986 * accidentally after RT_UNLOCK. The callback 1987 * (rtt_func) must rt_unref it by itself. 1988 */ 1989 rt_ref(r->rtt_rt); 1990 RT_REFCNT_TRACE(r->rtt_rt); 1991 RT_UNLOCK(); 1992 (*r->rtt_func)(r->rtt_rt, r); 1993 pool_put(&rttimer_pool, r); 1994 RT_WLOCK(); 1995 if (rtq->rtq_count > 0) 1996 rtq->rtq_count--; 1997 else 1998 printf("rt_timer_timer: rtq_count reached 0\n"); 1999 } 2000 } 2001 RT_UNLOCK(); 2002 2003 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL); 2004} 2005 2006static void 2007rt_timer_timer(void *arg) 2008{ 2009 2010 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL); 2011} 2012 2013static struct rtentry * 2014_rtcache_init(struct route *ro, int flag) 2015{ 2016 struct rtentry *rt; 2017 2018 rtcache_invariants(ro); 2019 KASSERT(ro->_ro_rt == NULL); 2020 2021 if (rtcache_getdst(ro) == NULL) 2022 return NULL; 2023 rt = rtalloc1(rtcache_getdst(ro), flag); 2024 if (rt != NULL) { 2025 RT_RLOCK(); 2026 if (ISSET(rt->rt_flags, RTF_UP)) { 2027 ro->_ro_rt = rt; 2028 ro->ro_rtcache_generation = rtcache_generation; 2029 rtcache_ref(rt, ro); 2030 } 2031 RT_UNLOCK(); 2032 rt_unref(rt); 2033 } 2034 2035 rtcache_invariants(ro); 2036 return ro->_ro_rt; 2037} 2038 2039struct rtentry * 2040rtcache_init(struct route *ro) 2041{ 2042 2043 return _rtcache_init(ro, 1); 2044} 2045 2046struct rtentry * 2047rtcache_init_noclone(struct route *ro) 2048{ 2049 2050 return _rtcache_init(ro, 0); 2051} 2052 2053struct rtentry * 2054rtcache_update(struct route *ro, int clone) 2055{ 2056 2057 ro->_ro_rt = NULL; 2058 return _rtcache_init(ro, clone); 2059} 2060 2061void 2062rtcache_copy(struct route *new_ro, struct route *old_ro) 2063{ 2064 struct rtentry *rt; 2065 int ret; 2066 2067 KASSERT(new_ro != old_ro); 2068 rtcache_invariants(new_ro); 2069 rtcache_invariants(old_ro); 2070 2071 rt = rtcache_validate(old_ro); 2072 2073 if (rtcache_getdst(old_ro) == NULL) 2074 goto out; 2075 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro)); 2076 if (ret != 0) 2077 goto out; 2078 2079 RT_RLOCK(); 2080 new_ro->_ro_rt = rt; 2081 new_ro->ro_rtcache_generation = rtcache_generation; 2082 RT_UNLOCK(); 2083 rtcache_invariants(new_ro); 2084out: 2085 rtcache_unref(rt, old_ro); 2086 return; 2087} 2088 2089#if defined(RT_DEBUG) && defined(NET_MPSAFE) 2090static void 2091rtcache_trace(const char *func, struct rtentry *rt, struct route *ro) 2092{ 2093 char dst[64]; 2094 2095 sockaddr_format(ro->ro_sa, dst, 64); 2096 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst, 2097 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref); 2098} 2099#define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro)) 2100#else 2101#define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0) 2102#endif 2103 2104static void 2105rtcache_ref(struct rtentry *rt, struct route *ro) 2106{ 2107 2108 KASSERT(rt != NULL); 2109 2110#ifdef NET_MPSAFE 2111 RTCACHE_PSREF_TRACE(rt, ro); 2112 ro->ro_bound = curlwp_bind(); 2113 /* XXX Use a real caller's address */ 2114 PSREF_DEBUG_FILL_RETURN_ADDRESS(&ro->ro_psref); 2115 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2116#endif 2117} 2118 2119void 2120rtcache_unref(struct rtentry *rt, struct route *ro) 2121{ 2122 2123 if (rt == NULL) 2124 return; 2125 2126#ifdef NET_MPSAFE 2127 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class); 2128 curlwp_bindx(ro->ro_bound); 2129 RTCACHE_PSREF_TRACE(rt, ro); 2130#endif 2131} 2132 2133struct rtentry * 2134rtcache_validate(struct route *ro) 2135{ 2136 struct rtentry *rt = NULL; 2137 2138#ifdef NET_MPSAFE 2139retry: 2140#endif 2141 rtcache_invariants(ro); 2142 RT_RLOCK(); 2143 if (ro->ro_rtcache_generation != rtcache_generation) { 2144 /* The cache is invalidated */ 2145 rt = NULL; 2146 goto out; 2147 } 2148 2149 rt = ro->_ro_rt; 2150 if (rt == NULL) 2151 goto out; 2152 2153 if ((rt->rt_flags & RTF_UP) == 0) { 2154 rt = NULL; 2155 goto out; 2156 } 2157#ifdef NET_MPSAFE 2158 if (ISSET(rt->rt_flags, RTF_UPDATING)) { 2159 if (rt_wait_ok()) { 2160 RT_UNLOCK(); 2161 2162 /* We can wait until the update is complete */ 2163 rt_update_wait(); 2164 goto retry; 2165 } else { 2166 rt = NULL; 2167 } 2168 } else 2169#endif 2170 rtcache_ref(rt, ro); 2171out: 2172 RT_UNLOCK(); 2173 return rt; 2174} 2175 2176struct rtentry * 2177rtcache_lookup2(struct route *ro, const struct sockaddr *dst, 2178 int clone, int *hitp) 2179{ 2180 const struct sockaddr *odst; 2181 struct rtentry *rt = NULL; 2182 2183 odst = rtcache_getdst(ro); 2184 if (odst == NULL) 2185 goto miss; 2186 2187 if (sockaddr_cmp(odst, dst) != 0) { 2188 rtcache_free(ro); 2189 goto miss; 2190 } 2191 2192 rt = rtcache_validate(ro); 2193 if (rt == NULL) { 2194 ro->_ro_rt = NULL; 2195 goto miss; 2196 } 2197 2198 rtcache_invariants(ro); 2199 2200 if (hitp != NULL) 2201 *hitp = 1; 2202 return rt; 2203miss: 2204 if (hitp != NULL) 2205 *hitp = 0; 2206 if (rtcache_setdst(ro, dst) == 0) 2207 rt = _rtcache_init(ro, clone); 2208 2209 rtcache_invariants(ro); 2210 2211 return rt; 2212} 2213 2214void 2215rtcache_free(struct route *ro) 2216{ 2217 2218 ro->_ro_rt = NULL; 2219 if (ro->ro_sa != NULL) { 2220 sockaddr_free(ro->ro_sa); 2221 ro->ro_sa = NULL; 2222 } 2223 rtcache_invariants(ro); 2224} 2225 2226int 2227rtcache_setdst(struct route *ro, const struct sockaddr *sa) 2228{ 2229 KASSERT(sa != NULL); 2230 2231 rtcache_invariants(ro); 2232 if (ro->ro_sa != NULL) { 2233 if (ro->ro_sa->sa_family == sa->sa_family) { 2234 ro->_ro_rt = NULL; 2235 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa); 2236 rtcache_invariants(ro); 2237 return 0; 2238 } 2239 /* free ro_sa, wrong family */ 2240 rtcache_free(ro); 2241 } 2242 2243 KASSERT(ro->_ro_rt == NULL); 2244 2245 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) { 2246 rtcache_invariants(ro); 2247 return ENOMEM; 2248 } 2249 rtcache_invariants(ro); 2250 return 0; 2251} 2252 2253static void 2254rtcache_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused) 2255{ 2256 struct route **rop = p; 2257 2258 /* 2259 * We can't have struct route as percpu data because it can be destroyed 2260 * over a memory enlargement processing of percpu. 2261 */ 2262 *rop = kmem_zalloc(sizeof(**rop), KM_SLEEP); 2263} 2264 2265percpu_t * 2266rtcache_percpu_alloc(void) 2267{ 2268 2269 return percpu_create(sizeof(struct route *), 2270 rtcache_percpu_init_cpu, NULL, NULL); 2271} 2272 2273const struct sockaddr * 2274rt_settag(struct rtentry *rt, const struct sockaddr *tag) 2275{ 2276 if (rt->rt_tag != tag) { 2277 if (rt->rt_tag != NULL) 2278 sockaddr_free(rt->rt_tag); 2279 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT); 2280 } 2281 return rt->rt_tag; 2282} 2283 2284struct sockaddr * 2285rt_gettag(const struct rtentry *rt) 2286{ 2287 return rt->rt_tag; 2288} 2289 2290int 2291rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp) 2292{ 2293 2294 if ((rt->rt_flags & RTF_REJECT) != 0) { 2295 /* Mimic looutput */ 2296 if (ifp->if_flags & IFF_LOOPBACK) 2297 return (rt->rt_flags & RTF_HOST) ? 2298 EHOSTUNREACH : ENETUNREACH; 2299 else if (rt->rt_rmx.rmx_expire == 0 || 2300 time_uptime < rt->rt_rmx.rmx_expire) 2301 return (rt->rt_flags & RTF_GATEWAY) ? 2302 EHOSTUNREACH : EHOSTDOWN; 2303 } 2304 2305 return 0; 2306} 2307 2308void 2309rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *), 2310 void *v, bool notify) 2311{ 2312 2313 for (;;) { 2314 int s; 2315 int error; 2316 struct rtentry *rt, *retrt = NULL; 2317 2318 RT_RLOCK(); 2319 s = splsoftnet(); 2320 rt = rtbl_search_matched_entry(family, f, v); 2321 if (rt == NULL) { 2322 splx(s); 2323 RT_UNLOCK(); 2324 return; 2325 } 2326 rt_ref(rt); 2327 RT_REFCNT_TRACE(rt); 2328 splx(s); 2329 RT_UNLOCK(); 2330 2331 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway, 2332 rt_mask(rt), rt->rt_flags, &retrt); 2333 if (error == 0) { 2334 KASSERT(retrt == rt); 2335 KASSERT((retrt->rt_flags & RTF_UP) == 0); 2336 if (notify) 2337 rt_newmsg(RTM_DELETE, retrt); 2338 retrt->rt_ifp = NULL; 2339 rt_unref(rt); 2340 RT_REFCNT_TRACE(rt); 2341 rt_free(retrt); 2342 } else if (error == ESRCH) { 2343 /* Someone deleted the entry already. */ 2344 rt_unref(rt); 2345 RT_REFCNT_TRACE(rt); 2346 } else { 2347 log(LOG_ERR, "%s: unable to delete rtentry @ %p, " 2348 "error = %d\n", rt->rt_ifp->if_xname, rt, error); 2349 /* XXX how to treat this case? */ 2350 } 2351 } 2352} 2353 2354static int 2355rt_walktree_locked(sa_family_t family, int (*f)(struct rtentry *, void *), 2356 void *v) 2357{ 2358 2359 return rtbl_walktree(family, f, v); 2360} 2361 2362void 2363rt_replace_ifa_matched_entries(sa_family_t family, 2364 int (*f)(struct rtentry *, void *), void *v, struct ifaddr *ifa) 2365{ 2366 2367 for (;;) { 2368 int s; 2369#ifdef NET_MPSAFE 2370 int error; 2371#endif 2372 struct rtentry *rt; 2373 2374 RT_RLOCK(); 2375 s = splsoftnet(); 2376 rt = rtbl_search_matched_entry(family, f, v); 2377 if (rt == NULL) { 2378 splx(s); 2379 RT_UNLOCK(); 2380 return; 2381 } 2382 rt_ref(rt); 2383 RT_REFCNT_TRACE(rt); 2384 splx(s); 2385 RT_UNLOCK(); 2386 2387#ifdef NET_MPSAFE 2388 error = rt_update_prepare(rt); 2389 if (error == 0) { 2390 rt_replace_ifa(rt, ifa); 2391 rt_update_finish(rt); 2392 rt_newmsg(RTM_CHANGE, rt); 2393 } else { 2394 /* 2395 * If error != 0, the rtentry is being 2396 * destroyed, so doing nothing doesn't 2397 * matter. 2398 */ 2399 } 2400#else 2401 rt_replace_ifa(rt, ifa); 2402 rt_newmsg(RTM_CHANGE, rt); 2403#endif 2404 rt_unref(rt); 2405 RT_REFCNT_TRACE(rt); 2406 } 2407} 2408 2409int 2410rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v) 2411{ 2412 int error; 2413 2414 RT_RLOCK(); 2415 error = rt_walktree_locked(family, f, v); 2416 RT_UNLOCK(); 2417 2418 return error; 2419} 2420 2421#ifdef DDB 2422 2423#include <machine/db_machdep.h> 2424#include <ddb/db_interface.h> 2425#include <ddb/db_output.h> 2426 2427#define rt_expire rt_rmx.rmx_expire 2428 2429static void 2430db_print_sa(const struct sockaddr *sa) 2431{ 2432 int len; 2433 const u_char *p; 2434 2435 if (sa == NULL) { 2436 db_printf("[NULL]"); 2437 return; 2438 } 2439 2440 p = (const u_char *)sa; 2441 len = sa->sa_len; 2442 db_printf("["); 2443 while (len > 0) { 2444 db_printf("%d", *p); 2445 p++; len--; 2446 if (len) db_printf(","); 2447 } 2448 db_printf("]\n"); 2449} 2450 2451static void 2452db_print_ifa(struct ifaddr *ifa) 2453{ 2454 if (ifa == NULL) 2455 return; 2456 db_printf(" ifa_addr="); 2457 db_print_sa(ifa->ifa_addr); 2458 db_printf(" ifa_dsta="); 2459 db_print_sa(ifa->ifa_dstaddr); 2460 db_printf(" ifa_mask="); 2461 db_print_sa(ifa->ifa_netmask); 2462 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n", 2463 ifa->ifa_flags, 2464 ifa->ifa_refcnt, 2465 ifa->ifa_metric); 2466} 2467 2468/* 2469 * Function to pass to rt_walktree(). 2470 * Return non-zero error to abort walk. 2471 */ 2472static int 2473db_show_rtentry(struct rtentry *rt, void *w) 2474{ 2475 db_printf("rtentry=%p", rt); 2476 2477 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n", 2478 rt->rt_flags, rt->rt_refcnt, 2479 rt->rt_use, (uint64_t)rt->rt_expire); 2480 2481 db_printf(" key="); db_print_sa(rt_getkey(rt)); 2482 db_printf(" mask="); db_print_sa(rt_mask(rt)); 2483 db_printf(" gw="); db_print_sa(rt->rt_gateway); 2484 2485 db_printf(" ifp=%p ", rt->rt_ifp); 2486 if (rt->rt_ifp) 2487 db_printf("(%s)", rt->rt_ifp->if_xname); 2488 else 2489 db_printf("(NULL)"); 2490 2491 db_printf(" ifa=%p\n", rt->rt_ifa); 2492 db_print_ifa(rt->rt_ifa); 2493 2494 db_printf(" gwroute=%p llinfo=%p\n", 2495 rt->rt_gwroute, rt->rt_llinfo); 2496 2497 return 0; 2498} 2499 2500/* 2501 * Function to print all the route trees. 2502 * Use this from ddb: "show routes" 2503 */ 2504void 2505db_show_routes(db_expr_t addr, bool have_addr, 2506 db_expr_t count, const char *modif) 2507{ 2508 2509 /* Taking RT_LOCK will fail if LOCKDEBUG is enabled. */ 2510 rt_walktree_locked(AF_INET, db_show_rtentry, NULL); 2511} 2512#endif 2513