if_tun.c revision 267985
1296177Sjhibbits/* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */ 2296177Sjhibbits 3296177Sjhibbits/*- 4296177Sjhibbits * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk> 5296177Sjhibbits * Nottingham University 1987. 6296177Sjhibbits * 7296177Sjhibbits * This source may be freely distributed, however I would be interested 8296177Sjhibbits * in any changes that are made. 9296177Sjhibbits * 10296177Sjhibbits * This driver takes packets off the IP i/f and hands them up to a 11296177Sjhibbits * user process to have its wicked way with. This driver has it's 12296177Sjhibbits * roots in a similar driver written by Phil Cockcroft (formerly) at 13296177Sjhibbits * UCL. This driver is based much more on read/write/poll mode of 14296177Sjhibbits * operation though. 15296177Sjhibbits * 16296177Sjhibbits * $FreeBSD: head/sys/net/if_tun.c 267985 2014-06-27 22:05:21Z gjb $ 17296177Sjhibbits */ 18296177Sjhibbits 19296177Sjhibbits#include "opt_inet.h" 20296177Sjhibbits#include "opt_inet6.h" 21296177Sjhibbits 22296177Sjhibbits#include <sys/param.h> 23296177Sjhibbits#include <sys/priv.h> 24296177Sjhibbits#include <sys/proc.h> 25296177Sjhibbits#include <sys/systm.h> 26296177Sjhibbits#include <sys/jail.h> 27296177Sjhibbits#include <sys/mbuf.h> 28296177Sjhibbits#include <sys/module.h> 29296177Sjhibbits#include <sys/socket.h> 30296177Sjhibbits#include <sys/fcntl.h> 31296177Sjhibbits#include <sys/filio.h> 32296177Sjhibbits#include <sys/sockio.h> 33296177Sjhibbits#include <sys/ttycom.h> 34296177Sjhibbits#include <sys/poll.h> 35296177Sjhibbits#include <sys/selinfo.h> 36296177Sjhibbits#include <sys/signalvar.h> 37296177Sjhibbits#include <sys/filedesc.h> 38296177Sjhibbits#include <sys/kernel.h> 39296177Sjhibbits#include <sys/sysctl.h> 40296177Sjhibbits#include <sys/conf.h> 41296177Sjhibbits#include <sys/uio.h> 42296177Sjhibbits#include <sys/malloc.h> 43296177Sjhibbits#include <sys/random.h> 44296177Sjhibbits 45296177Sjhibbits#include <net/if.h> 46296177Sjhibbits#include <net/if_var.h> 47296177Sjhibbits#include <net/if_clone.h> 48296177Sjhibbits#include <net/if_types.h> 49296177Sjhibbits#include <net/netisr.h> 50296177Sjhibbits#include <net/route.h> 51296177Sjhibbits#include <net/vnet.h> 52296177Sjhibbits#ifdef INET 53296177Sjhibbits#include <netinet/in.h> 54296177Sjhibbits#endif 55296177Sjhibbits#include <net/bpf.h> 56296177Sjhibbits#include <net/if_tun.h> 57296177Sjhibbits 58296177Sjhibbits#include <sys/queue.h> 59296177Sjhibbits#include <sys/condvar.h> 60296177Sjhibbits 61296177Sjhibbits#include <security/mac/mac_framework.h> 62296177Sjhibbits 63296177Sjhibbits/* 64296177Sjhibbits * tun_list is protected by global tunmtx. Other mutable fields are 65296177Sjhibbits * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is 66296177Sjhibbits * static for the duration of a tunnel interface. 67296177Sjhibbits */ 68296177Sjhibbitsstruct tun_softc { 69296177Sjhibbits TAILQ_ENTRY(tun_softc) tun_list; 70296177Sjhibbits struct cdev *tun_dev; 71296177Sjhibbits u_short tun_flags; /* misc flags */ 72296177Sjhibbits#define TUN_OPEN 0x0001 73296177Sjhibbits#define TUN_INITED 0x0002 74296177Sjhibbits#define TUN_RCOLL 0x0004 75296177Sjhibbits#define TUN_IASET 0x0008 76296177Sjhibbits#define TUN_DSTADDR 0x0010 77296177Sjhibbits#define TUN_LMODE 0x0020 78296177Sjhibbits#define TUN_RWAIT 0x0040 79296177Sjhibbits#define TUN_ASYNC 0x0080 80296177Sjhibbits#define TUN_IFHEAD 0x0100 81296177Sjhibbits 82296177Sjhibbits#define TUN_READY (TUN_OPEN | TUN_INITED) 83296177Sjhibbits 84296177Sjhibbits /* 85296177Sjhibbits * XXXRW: tun_pid is used to exclusively lock /dev/tun. Is this 86296177Sjhibbits * actually needed? Can we just return EBUSY if already open? 87296177Sjhibbits * Problem is that this involved inherent races when a tun device 88296177Sjhibbits * is handed off from one process to another, as opposed to just 89296177Sjhibbits * being slightly stale informationally. 90296177Sjhibbits */ 91296177Sjhibbits pid_t tun_pid; /* owning pid */ 92296177Sjhibbits struct ifnet *tun_ifp; /* the interface */ 93296177Sjhibbits struct sigio *tun_sigio; /* information for async I/O */ 94296177Sjhibbits struct selinfo tun_rsel; /* read select */ 95296177Sjhibbits struct mtx tun_mtx; /* protect mutable softc fields */ 96296177Sjhibbits struct cv tun_cv; /* protect against ref'd dev destroy */ 97296177Sjhibbits}; 98296177Sjhibbits#define TUN2IFP(sc) ((sc)->tun_ifp) 99296177Sjhibbits 100296177Sjhibbits#define TUNDEBUG if (tundebug) if_printf 101296177Sjhibbits 102296177Sjhibbits/* 103296177Sjhibbits * All mutable global variables in if_tun are locked using tunmtx, with 104296177Sjhibbits * the exception of tundebug, which is used unlocked, and tunclones, 105296177Sjhibbits * which is static after setup. 106296177Sjhibbits */ 107296177Sjhibbitsstatic struct mtx tunmtx; 108296177Sjhibbitsstatic const char tunname[] = "tun"; 109296177Sjhibbitsstatic MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface"); 110296177Sjhibbitsstatic int tundebug = 0; 111296177Sjhibbitsstatic int tundclone = 1; 112296177Sjhibbitsstatic struct clonedevs *tunclones; 113296177Sjhibbitsstatic TAILQ_HEAD(,tun_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead); 114296177SjhibbitsSYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); 115296177Sjhibbits 116296177SjhibbitsSYSCTL_DECL(_net_link); 117296177Sjhibbitsstatic SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0, 118296177Sjhibbits "IP tunnel software network interface."); 119296177SjhibbitsSYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tundclone, 0, 120296177Sjhibbits "Enable legacy devfs interface creation."); 121296177Sjhibbits 122296177SjhibbitsTUNABLE_INT("net.link.tun.devfs_cloning", &tundclone); 123296177Sjhibbits 124296177Sjhibbitsstatic void tunclone(void *arg, struct ucred *cred, char *name, 125296177Sjhibbits int namelen, struct cdev **dev); 126296177Sjhibbitsstatic void tuncreate(const char *name, struct cdev *dev); 127296177Sjhibbitsstatic int tunifioctl(struct ifnet *, u_long, caddr_t); 128296177Sjhibbitsstatic void tuninit(struct ifnet *); 129296177Sjhibbitsstatic int tunmodevent(module_t, int, void *); 130296177Sjhibbitsstatic int tunoutput(struct ifnet *, struct mbuf *, 131296177Sjhibbits const struct sockaddr *, struct route *ro); 132296177Sjhibbitsstatic void tunstart(struct ifnet *); 133296177Sjhibbits 134296177Sjhibbitsstatic int tun_clone_create(struct if_clone *, int, caddr_t); 135296177Sjhibbitsstatic void tun_clone_destroy(struct ifnet *); 136296177Sjhibbitsstatic struct if_clone *tun_cloner; 137296177Sjhibbits 138296177Sjhibbitsstatic d_open_t tunopen; 139296177Sjhibbitsstatic d_close_t tunclose; 140296177Sjhibbitsstatic d_read_t tunread; 141296177Sjhibbitsstatic d_write_t tunwrite; 142296177Sjhibbitsstatic d_ioctl_t tunioctl; 143296177Sjhibbitsstatic d_poll_t tunpoll; 144296177Sjhibbitsstatic d_kqfilter_t tunkqfilter; 145296177Sjhibbits 146296177Sjhibbitsstatic int tunkqread(struct knote *, long); 147296177Sjhibbitsstatic int tunkqwrite(struct knote *, long); 148296177Sjhibbitsstatic void tunkqdetach(struct knote *); 149296177Sjhibbits 150296177Sjhibbitsstatic struct filterops tun_read_filterops = { 151296177Sjhibbits .f_isfd = 1, 152296177Sjhibbits .f_attach = NULL, 153296177Sjhibbits .f_detach = tunkqdetach, 154296177Sjhibbits .f_event = tunkqread, 155296177Sjhibbits}; 156296177Sjhibbits 157296177Sjhibbitsstatic struct filterops tun_write_filterops = { 158296177Sjhibbits .f_isfd = 1, 159296177Sjhibbits .f_attach = NULL, 160296177Sjhibbits .f_detach = tunkqdetach, 161296177Sjhibbits .f_event = tunkqwrite, 162296177Sjhibbits}; 163296177Sjhibbits 164296177Sjhibbitsstatic struct cdevsw tun_cdevsw = { 165296177Sjhibbits .d_version = D_VERSION, 166296177Sjhibbits .d_flags = D_NEEDMINOR, 167296177Sjhibbits .d_open = tunopen, 168296177Sjhibbits .d_close = tunclose, 169296177Sjhibbits .d_read = tunread, 170296177Sjhibbits .d_write = tunwrite, 171296177Sjhibbits .d_ioctl = tunioctl, 172296177Sjhibbits .d_poll = tunpoll, 173296177Sjhibbits .d_kqfilter = tunkqfilter, 174296177Sjhibbits .d_name = tunname, 175296177Sjhibbits}; 176296177Sjhibbits 177296177Sjhibbitsstatic int 178296177Sjhibbitstun_clone_create(struct if_clone *ifc, int unit, caddr_t params) 179296177Sjhibbits{ 180296177Sjhibbits struct cdev *dev; 181296177Sjhibbits int i; 182296177Sjhibbits 183296177Sjhibbits /* find any existing device, or allocate new unit number */ 184296177Sjhibbits i = clone_create(&tunclones, &tun_cdevsw, &unit, &dev, 0); 185296177Sjhibbits if (i) { 186296177Sjhibbits /* No preexisting struct cdev *, create one */ 187296177Sjhibbits dev = make_dev(&tun_cdevsw, unit, 188296177Sjhibbits UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit); 189296177Sjhibbits } 190296177Sjhibbits tuncreate(tunname, dev); 191296177Sjhibbits 192296177Sjhibbits return (0); 193296177Sjhibbits} 194296177Sjhibbits 195296177Sjhibbitsstatic void 196296177Sjhibbitstunclone(void *arg, struct ucred *cred, char *name, int namelen, 197296177Sjhibbits struct cdev **dev) 198296177Sjhibbits{ 199296177Sjhibbits char devname[SPECNAMELEN + 1]; 200296177Sjhibbits int u, i, append_unit; 201296177Sjhibbits 202296177Sjhibbits if (*dev != NULL) 203296177Sjhibbits return; 204296177Sjhibbits 205296177Sjhibbits /* 206296177Sjhibbits * If tun cloning is enabled, only the superuser can create an 207296177Sjhibbits * interface. 208296177Sjhibbits */ 209296177Sjhibbits if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0) 210296177Sjhibbits return; 211296177Sjhibbits 212296177Sjhibbits if (strcmp(name, tunname) == 0) { 213296177Sjhibbits u = -1; 214296177Sjhibbits } else if (dev_stdclone(name, NULL, tunname, &u) != 1) 215296177Sjhibbits return; /* Don't recognise the name */ 216296177Sjhibbits if (u != -1 && u > IF_MAXUNIT) 217296177Sjhibbits return; /* Unit number too high */ 218296177Sjhibbits 219296177Sjhibbits if (u == -1) 220296177Sjhibbits append_unit = 1; 221296177Sjhibbits else 222296177Sjhibbits append_unit = 0; 223296177Sjhibbits 224296177Sjhibbits CURVNET_SET(CRED_TO_VNET(cred)); 225296177Sjhibbits /* find any existing device, or allocate new unit number */ 226296177Sjhibbits i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0); 227296177Sjhibbits if (i) { 228296177Sjhibbits if (append_unit) { 229296177Sjhibbits namelen = snprintf(devname, sizeof(devname), "%s%d", 230296177Sjhibbits name, u); 231296177Sjhibbits name = devname; 232296177Sjhibbits } 233296177Sjhibbits /* No preexisting struct cdev *, create one */ 234296177Sjhibbits *dev = make_dev_credf(MAKEDEV_REF, &tun_cdevsw, u, cred, 235296177Sjhibbits UID_UUCP, GID_DIALER, 0600, "%s", name); 236296177Sjhibbits } 237296177Sjhibbits 238296177Sjhibbits if_clone_create(name, namelen, NULL); 239296177Sjhibbits CURVNET_RESTORE(); 240296177Sjhibbits} 241296177Sjhibbits 242296177Sjhibbitsstatic void 243296177Sjhibbitstun_destroy(struct tun_softc *tp) 244296177Sjhibbits{ 245296177Sjhibbits struct cdev *dev; 246296177Sjhibbits 247296177Sjhibbits mtx_lock(&tp->tun_mtx); 248296177Sjhibbits if ((tp->tun_flags & TUN_OPEN) != 0) 249296177Sjhibbits cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx); 250296177Sjhibbits else 251296177Sjhibbits mtx_unlock(&tp->tun_mtx); 252296177Sjhibbits 253296177Sjhibbits CURVNET_SET(TUN2IFP(tp)->if_vnet); 254296177Sjhibbits dev = tp->tun_dev; 255296177Sjhibbits bpfdetach(TUN2IFP(tp)); 256296177Sjhibbits if_detach(TUN2IFP(tp)); 257296177Sjhibbits if_free(TUN2IFP(tp)); 258296177Sjhibbits destroy_dev(dev); 259296177Sjhibbits seldrain(&tp->tun_rsel); 260296177Sjhibbits knlist_clear(&tp->tun_rsel.si_note, 0); 261296177Sjhibbits knlist_destroy(&tp->tun_rsel.si_note); 262296177Sjhibbits mtx_destroy(&tp->tun_mtx); 263296177Sjhibbits cv_destroy(&tp->tun_cv); 264296177Sjhibbits free(tp, M_TUN); 265296177Sjhibbits CURVNET_RESTORE(); 266296177Sjhibbits} 267296177Sjhibbits 268296177Sjhibbitsstatic void 269296177Sjhibbitstun_clone_destroy(struct ifnet *ifp) 270296177Sjhibbits{ 271296177Sjhibbits struct tun_softc *tp = ifp->if_softc; 272296177Sjhibbits 273296177Sjhibbits mtx_lock(&tunmtx); 274296177Sjhibbits TAILQ_REMOVE(&tunhead, tp, tun_list); 275296177Sjhibbits mtx_unlock(&tunmtx); 276296177Sjhibbits tun_destroy(tp); 277296177Sjhibbits} 278296177Sjhibbits 279296177Sjhibbitsstatic int 280296177Sjhibbitstunmodevent(module_t mod, int type, void *data) 281296177Sjhibbits{ 282296177Sjhibbits static eventhandler_tag tag; 283296177Sjhibbits struct tun_softc *tp; 284296177Sjhibbits 285296177Sjhibbits switch (type) { 286296177Sjhibbits case MOD_LOAD: 287296177Sjhibbits mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF); 288296177Sjhibbits clone_setup(&tunclones); 289296177Sjhibbits tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000); 290296177Sjhibbits if (tag == NULL) 291296177Sjhibbits return (ENOMEM); 292296177Sjhibbits tun_cloner = if_clone_simple(tunname, tun_clone_create, 293296177Sjhibbits tun_clone_destroy, 0); 294296177Sjhibbits break; 295296177Sjhibbits case MOD_UNLOAD: 296296177Sjhibbits if_clone_detach(tun_cloner); 297296177Sjhibbits EVENTHANDLER_DEREGISTER(dev_clone, tag); 298296177Sjhibbits drain_dev_clone_events(); 299296177Sjhibbits 300296177Sjhibbits mtx_lock(&tunmtx); 301296177Sjhibbits while ((tp = TAILQ_FIRST(&tunhead)) != NULL) { 302296177Sjhibbits TAILQ_REMOVE(&tunhead, tp, tun_list); 303296177Sjhibbits mtx_unlock(&tunmtx); 304296177Sjhibbits tun_destroy(tp); 305296177Sjhibbits mtx_lock(&tunmtx); 306296177Sjhibbits } 307296177Sjhibbits mtx_unlock(&tunmtx); 308296177Sjhibbits clone_cleanup(&tunclones); 309296177Sjhibbits mtx_destroy(&tunmtx); 310296177Sjhibbits break; 311296177Sjhibbits default: 312296177Sjhibbits return EOPNOTSUPP; 313296177Sjhibbits } 314296177Sjhibbits return 0; 315296177Sjhibbits} 316296177Sjhibbits 317296177Sjhibbitsstatic moduledata_t tun_mod = { 318296177Sjhibbits "if_tun", 319296177Sjhibbits tunmodevent, 320296177Sjhibbits 0 321296177Sjhibbits}; 322296177Sjhibbits 323296177SjhibbitsDECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 324296177SjhibbitsMODULE_VERSION(if_tun, 1); 325296177Sjhibbits 326296177Sjhibbitsstatic void 327296177Sjhibbitstunstart(struct ifnet *ifp) 328296177Sjhibbits{ 329296177Sjhibbits struct tun_softc *tp = ifp->if_softc; 330296177Sjhibbits struct mbuf *m; 331296177Sjhibbits 332296177Sjhibbits TUNDEBUG(ifp,"%s starting\n", ifp->if_xname); 333296177Sjhibbits if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 334296177Sjhibbits IFQ_LOCK(&ifp->if_snd); 335296177Sjhibbits IFQ_POLL_NOLOCK(&ifp->if_snd, m); 336296177Sjhibbits if (m == NULL) { 337296177Sjhibbits IFQ_UNLOCK(&ifp->if_snd); 338296177Sjhibbits return; 339296177Sjhibbits } 340296177Sjhibbits IFQ_UNLOCK(&ifp->if_snd); 341296177Sjhibbits } 342296177Sjhibbits 343296177Sjhibbits mtx_lock(&tp->tun_mtx); 344296177Sjhibbits if (tp->tun_flags & TUN_RWAIT) { 345296177Sjhibbits tp->tun_flags &= ~TUN_RWAIT; 346296177Sjhibbits wakeup(tp); 347296177Sjhibbits } 348296177Sjhibbits selwakeuppri(&tp->tun_rsel, PZERO + 1); 349296177Sjhibbits KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); 350296177Sjhibbits if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) { 351296177Sjhibbits mtx_unlock(&tp->tun_mtx); 352296177Sjhibbits pgsigio(&tp->tun_sigio, SIGIO, 0); 353296177Sjhibbits } else 354296177Sjhibbits mtx_unlock(&tp->tun_mtx); 355296177Sjhibbits} 356296177Sjhibbits 357296177Sjhibbits/* XXX: should return an error code so it can fail. */ 358static void 359tuncreate(const char *name, struct cdev *dev) 360{ 361 struct tun_softc *sc; 362 struct ifnet *ifp; 363 364 sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO); 365 mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF); 366 cv_init(&sc->tun_cv, "tun_condvar"); 367 sc->tun_flags = TUN_INITED; 368 sc->tun_dev = dev; 369 mtx_lock(&tunmtx); 370 TAILQ_INSERT_TAIL(&tunhead, sc, tun_list); 371 mtx_unlock(&tunmtx); 372 373 ifp = sc->tun_ifp = if_alloc(IFT_PPP); 374 if (ifp == NULL) 375 panic("%s%d: failed to if_alloc() interface.\n", 376 name, dev2unit(dev)); 377 if_initname(ifp, name, dev2unit(dev)); 378 ifp->if_mtu = TUNMTU; 379 ifp->if_ioctl = tunifioctl; 380 ifp->if_output = tunoutput; 381 ifp->if_start = tunstart; 382 ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; 383 ifp->if_softc = sc; 384 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 385 ifp->if_snd.ifq_drv_maxlen = 0; 386 IFQ_SET_READY(&ifp->if_snd); 387 knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx); 388 ifp->if_capabilities |= IFCAP_LINKSTATE; 389 ifp->if_capenable |= IFCAP_LINKSTATE; 390 391 if_attach(ifp); 392 bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); 393 dev->si_drv1 = sc; 394 TUNDEBUG(ifp, "interface %s is created, minor = %#x\n", 395 ifp->if_xname, dev2unit(dev)); 396} 397 398static int 399tunopen(struct cdev *dev, int flag, int mode, struct thread *td) 400{ 401 struct ifnet *ifp; 402 struct tun_softc *tp; 403 404 /* 405 * XXXRW: Non-atomic test and set of dev->si_drv1 requires 406 * synchronization. 407 */ 408 tp = dev->si_drv1; 409 if (!tp) { 410 tuncreate(tunname, dev); 411 tp = dev->si_drv1; 412 } 413 414 /* 415 * XXXRW: This use of tun_pid is subject to error due to the 416 * fact that a reference to the tunnel can live beyond the 417 * death of the process that created it. Can we replace this 418 * with a simple busy flag? 419 */ 420 mtx_lock(&tp->tun_mtx); 421 if (tp->tun_pid != 0 && tp->tun_pid != td->td_proc->p_pid) { 422 mtx_unlock(&tp->tun_mtx); 423 return (EBUSY); 424 } 425 tp->tun_pid = td->td_proc->p_pid; 426 427 tp->tun_flags |= TUN_OPEN; 428 ifp = TUN2IFP(tp); 429 if_link_state_change(ifp, LINK_STATE_UP); 430 TUNDEBUG(ifp, "open\n"); 431 mtx_unlock(&tp->tun_mtx); 432 433 return (0); 434} 435 436/* 437 * tunclose - close the device - mark i/f down & delete 438 * routing info 439 */ 440static int 441tunclose(struct cdev *dev, int foo, int bar, struct thread *td) 442{ 443 struct tun_softc *tp; 444 struct ifnet *ifp; 445 446 tp = dev->si_drv1; 447 ifp = TUN2IFP(tp); 448 449 mtx_lock(&tp->tun_mtx); 450 tp->tun_flags &= ~TUN_OPEN; 451 tp->tun_pid = 0; 452 453 /* 454 * junk all pending output 455 */ 456 CURVNET_SET(ifp->if_vnet); 457 IFQ_PURGE(&ifp->if_snd); 458 459 if (ifp->if_flags & IFF_UP) { 460 mtx_unlock(&tp->tun_mtx); 461 if_down(ifp); 462 mtx_lock(&tp->tun_mtx); 463 } 464 465 /* Delete all addresses and routes which reference this interface. */ 466 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 467 struct ifaddr *ifa; 468 469 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 470 mtx_unlock(&tp->tun_mtx); 471 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 472 /* deal w/IPv4 PtP destination; unlocked read */ 473 if (ifa->ifa_addr->sa_family == AF_INET) { 474 rtinit(ifa, (int)RTM_DELETE, 475 tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0); 476 } else { 477 rtinit(ifa, (int)RTM_DELETE, 0); 478 } 479 } 480 if_purgeaddrs(ifp); 481 mtx_lock(&tp->tun_mtx); 482 } 483 if_link_state_change(ifp, LINK_STATE_DOWN); 484 CURVNET_RESTORE(); 485 486 funsetown(&tp->tun_sigio); 487 selwakeuppri(&tp->tun_rsel, PZERO + 1); 488 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); 489 TUNDEBUG (ifp, "closed\n"); 490 491 cv_broadcast(&tp->tun_cv); 492 mtx_unlock(&tp->tun_mtx); 493 return (0); 494} 495 496static void 497tuninit(struct ifnet *ifp) 498{ 499 struct tun_softc *tp = ifp->if_softc; 500#ifdef INET 501 struct ifaddr *ifa; 502#endif 503 504 TUNDEBUG(ifp, "tuninit\n"); 505 506 mtx_lock(&tp->tun_mtx); 507 ifp->if_flags |= IFF_UP; 508 ifp->if_drv_flags |= IFF_DRV_RUNNING; 509 getmicrotime(&ifp->if_lastchange); 510 511#ifdef INET 512 if_addr_rlock(ifp); 513 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 514 if (ifa->ifa_addr->sa_family == AF_INET) { 515 struct sockaddr_in *si; 516 517 si = (struct sockaddr_in *)ifa->ifa_addr; 518 if (si->sin_addr.s_addr) 519 tp->tun_flags |= TUN_IASET; 520 521 si = (struct sockaddr_in *)ifa->ifa_dstaddr; 522 if (si && si->sin_addr.s_addr) 523 tp->tun_flags |= TUN_DSTADDR; 524 } 525 } 526 if_addr_runlock(ifp); 527#endif 528 mtx_unlock(&tp->tun_mtx); 529} 530 531/* 532 * Process an ioctl request. 533 */ 534static int 535tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 536{ 537 struct ifreq *ifr = (struct ifreq *)data; 538 struct tun_softc *tp = ifp->if_softc; 539 struct ifstat *ifs; 540 int error = 0; 541 542 switch(cmd) { 543 case SIOCGIFSTATUS: 544 ifs = (struct ifstat *)data; 545 mtx_lock(&tp->tun_mtx); 546 if (tp->tun_pid) 547 snprintf(ifs->ascii, sizeof(ifs->ascii), 548 "\tOpened by PID %d\n", tp->tun_pid); 549 else 550 ifs->ascii[0] = '\0'; 551 mtx_unlock(&tp->tun_mtx); 552 break; 553 case SIOCSIFADDR: 554 tuninit(ifp); 555 TUNDEBUG(ifp, "address set\n"); 556 break; 557 case SIOCSIFMTU: 558 ifp->if_mtu = ifr->ifr_mtu; 559 TUNDEBUG(ifp, "mtu set\n"); 560 break; 561 case SIOCSIFFLAGS: 562 case SIOCADDMULTI: 563 case SIOCDELMULTI: 564 break; 565 default: 566 error = EINVAL; 567 } 568 return (error); 569} 570 571/* 572 * tunoutput - queue packets from higher level ready to put out. 573 */ 574static int 575tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, 576 struct route *ro) 577{ 578 struct tun_softc *tp = ifp->if_softc; 579 u_short cached_tun_flags; 580 int error; 581 u_int32_t af; 582 583 TUNDEBUG (ifp, "tunoutput\n"); 584 585#ifdef MAC 586 error = mac_ifnet_check_transmit(ifp, m0); 587 if (error) { 588 m_freem(m0); 589 return (error); 590 } 591#endif 592 593 /* Could be unlocked read? */ 594 mtx_lock(&tp->tun_mtx); 595 cached_tun_flags = tp->tun_flags; 596 mtx_unlock(&tp->tun_mtx); 597 if ((cached_tun_flags & TUN_READY) != TUN_READY) { 598 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); 599 m_freem (m0); 600 return (EHOSTDOWN); 601 } 602 603 if ((ifp->if_flags & IFF_UP) != IFF_UP) { 604 m_freem (m0); 605 return (EHOSTDOWN); 606 } 607 608 /* BPF writes need to be handled specially. */ 609 if (dst->sa_family == AF_UNSPEC) 610 bcopy(dst->sa_data, &af, sizeof(af)); 611 else 612 af = dst->sa_family; 613 614 if (bpf_peers_present(ifp->if_bpf)) 615 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0); 616 617 /* prepend sockaddr? this may abort if the mbuf allocation fails */ 618 if (cached_tun_flags & TUN_LMODE) { 619 /* allocate space for sockaddr */ 620 M_PREPEND(m0, dst->sa_len, M_NOWAIT); 621 622 /* if allocation failed drop packet */ 623 if (m0 == NULL) { 624 ifp->if_iqdrops++; 625 ifp->if_oerrors++; 626 return (ENOBUFS); 627 } else { 628 bcopy(dst, m0->m_data, dst->sa_len); 629 } 630 } 631 632 if (cached_tun_flags & TUN_IFHEAD) { 633 /* Prepend the address family */ 634 M_PREPEND(m0, 4, M_NOWAIT); 635 636 /* if allocation failed drop packet */ 637 if (m0 == NULL) { 638 ifp->if_iqdrops++; 639 ifp->if_oerrors++; 640 return (ENOBUFS); 641 } else 642 *(u_int32_t *)m0->m_data = htonl(af); 643 } else { 644#ifdef INET 645 if (af != AF_INET) 646#endif 647 { 648 m_freem(m0); 649 return (EAFNOSUPPORT); 650 } 651 } 652 653 error = (ifp->if_transmit)(ifp, m0); 654 if (error) 655 return (ENOBUFS); 656 ifp->if_opackets++; 657 return (0); 658} 659 660/* 661 * the cdevsw interface is now pretty minimal. 662 */ 663static int 664tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, 665 struct thread *td) 666{ 667 int error; 668 struct tun_softc *tp = dev->si_drv1; 669 struct tuninfo *tunp; 670 671 switch (cmd) { 672 case TUNSIFINFO: 673 tunp = (struct tuninfo *)data; 674 if (tunp->mtu < IF_MINMTU) 675 return (EINVAL); 676 if (TUN2IFP(tp)->if_mtu != tunp->mtu) { 677 error = priv_check(td, PRIV_NET_SETIFMTU); 678 if (error) 679 return (error); 680 } 681 mtx_lock(&tp->tun_mtx); 682 TUN2IFP(tp)->if_mtu = tunp->mtu; 683 TUN2IFP(tp)->if_type = tunp->type; 684 TUN2IFP(tp)->if_baudrate = tunp->baudrate; 685 mtx_unlock(&tp->tun_mtx); 686 break; 687 case TUNGIFINFO: 688 tunp = (struct tuninfo *)data; 689 mtx_lock(&tp->tun_mtx); 690 tunp->mtu = TUN2IFP(tp)->if_mtu; 691 tunp->type = TUN2IFP(tp)->if_type; 692 tunp->baudrate = TUN2IFP(tp)->if_baudrate; 693 mtx_unlock(&tp->tun_mtx); 694 break; 695 case TUNSDEBUG: 696 tundebug = *(int *)data; 697 break; 698 case TUNGDEBUG: 699 *(int *)data = tundebug; 700 break; 701 case TUNSLMODE: 702 mtx_lock(&tp->tun_mtx); 703 if (*(int *)data) { 704 tp->tun_flags |= TUN_LMODE; 705 tp->tun_flags &= ~TUN_IFHEAD; 706 } else 707 tp->tun_flags &= ~TUN_LMODE; 708 mtx_unlock(&tp->tun_mtx); 709 break; 710 case TUNSIFHEAD: 711 mtx_lock(&tp->tun_mtx); 712 if (*(int *)data) { 713 tp->tun_flags |= TUN_IFHEAD; 714 tp->tun_flags &= ~TUN_LMODE; 715 } else 716 tp->tun_flags &= ~TUN_IFHEAD; 717 mtx_unlock(&tp->tun_mtx); 718 break; 719 case TUNGIFHEAD: 720 mtx_lock(&tp->tun_mtx); 721 *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0; 722 mtx_unlock(&tp->tun_mtx); 723 break; 724 case TUNSIFMODE: 725 /* deny this if UP */ 726 if (TUN2IFP(tp)->if_flags & IFF_UP) 727 return(EBUSY); 728 729 switch (*(int *)data & ~IFF_MULTICAST) { 730 case IFF_POINTOPOINT: 731 case IFF_BROADCAST: 732 mtx_lock(&tp->tun_mtx); 733 TUN2IFP(tp)->if_flags &= 734 ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST); 735 TUN2IFP(tp)->if_flags |= *(int *)data; 736 mtx_unlock(&tp->tun_mtx); 737 break; 738 default: 739 return(EINVAL); 740 } 741 break; 742 case TUNSIFPID: 743 mtx_lock(&tp->tun_mtx); 744 tp->tun_pid = curthread->td_proc->p_pid; 745 mtx_unlock(&tp->tun_mtx); 746 break; 747 case FIONBIO: 748 break; 749 case FIOASYNC: 750 mtx_lock(&tp->tun_mtx); 751 if (*(int *)data) 752 tp->tun_flags |= TUN_ASYNC; 753 else 754 tp->tun_flags &= ~TUN_ASYNC; 755 mtx_unlock(&tp->tun_mtx); 756 break; 757 case FIONREAD: 758 if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) { 759 struct mbuf *mb; 760 IFQ_LOCK(&TUN2IFP(tp)->if_snd); 761 IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb); 762 for (*(int *)data = 0; mb != NULL; mb = mb->m_next) 763 *(int *)data += mb->m_len; 764 IFQ_UNLOCK(&TUN2IFP(tp)->if_snd); 765 } else 766 *(int *)data = 0; 767 break; 768 case FIOSETOWN: 769 return (fsetown(*(int *)data, &tp->tun_sigio)); 770 771 case FIOGETOWN: 772 *(int *)data = fgetown(&tp->tun_sigio); 773 return (0); 774 775 /* This is deprecated, FIOSETOWN should be used instead. */ 776 case TIOCSPGRP: 777 return (fsetown(-(*(int *)data), &tp->tun_sigio)); 778 779 /* This is deprecated, FIOGETOWN should be used instead. */ 780 case TIOCGPGRP: 781 *(int *)data = -fgetown(&tp->tun_sigio); 782 return (0); 783 784 default: 785 return (ENOTTY); 786 } 787 return (0); 788} 789 790/* 791 * The cdevsw read interface - reads a packet at a time, or at 792 * least as much of a packet as can be read. 793 */ 794static int 795tunread(struct cdev *dev, struct uio *uio, int flag) 796{ 797 struct tun_softc *tp = dev->si_drv1; 798 struct ifnet *ifp = TUN2IFP(tp); 799 struct mbuf *m; 800 int error=0, len; 801 802 TUNDEBUG (ifp, "read\n"); 803 mtx_lock(&tp->tun_mtx); 804 if ((tp->tun_flags & TUN_READY) != TUN_READY) { 805 mtx_unlock(&tp->tun_mtx); 806 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); 807 return (EHOSTDOWN); 808 } 809 810 tp->tun_flags &= ~TUN_RWAIT; 811 812 do { 813 IFQ_DEQUEUE(&ifp->if_snd, m); 814 if (m == NULL) { 815 if (flag & O_NONBLOCK) { 816 mtx_unlock(&tp->tun_mtx); 817 return (EWOULDBLOCK); 818 } 819 tp->tun_flags |= TUN_RWAIT; 820 error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1), 821 "tunread", 0); 822 if (error != 0) { 823 mtx_unlock(&tp->tun_mtx); 824 return (error); 825 } 826 } 827 } while (m == NULL); 828 mtx_unlock(&tp->tun_mtx); 829 830 while (m && uio->uio_resid > 0 && error == 0) { 831 len = min(uio->uio_resid, m->m_len); 832 if (len != 0) 833 error = uiomove(mtod(m, void *), len, uio); 834 m = m_free(m); 835 } 836 837 if (m) { 838 TUNDEBUG(ifp, "Dropping mbuf\n"); 839 m_freem(m); 840 } 841 return (error); 842} 843 844/* 845 * the cdevsw write interface - an atomic write is a packet - or else! 846 */ 847static int 848tunwrite(struct cdev *dev, struct uio *uio, int flag) 849{ 850 struct tun_softc *tp = dev->si_drv1; 851 struct ifnet *ifp = TUN2IFP(tp); 852 struct mbuf *m; 853 uint32_t family; 854 int isr; 855 856 TUNDEBUG(ifp, "tunwrite\n"); 857 858 if ((ifp->if_flags & IFF_UP) != IFF_UP) 859 /* ignore silently */ 860 return (0); 861 862 if (uio->uio_resid == 0) 863 return (0); 864 865 if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) { 866 TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid); 867 return (EIO); 868 } 869 870 if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) { 871 ifp->if_ierrors++; 872 return (ENOBUFS); 873 } 874 875 m->m_pkthdr.rcvif = ifp; 876#ifdef MAC 877 mac_ifnet_create_mbuf(ifp, m); 878#endif 879 880 /* Could be unlocked read? */ 881 mtx_lock(&tp->tun_mtx); 882 if (tp->tun_flags & TUN_IFHEAD) { 883 mtx_unlock(&tp->tun_mtx); 884 if (m->m_len < sizeof(family) && 885 (m = m_pullup(m, sizeof(family))) == NULL) 886 return (ENOBUFS); 887 family = ntohl(*mtod(m, u_int32_t *)); 888 m_adj(m, sizeof(family)); 889 } else { 890 mtx_unlock(&tp->tun_mtx); 891 family = AF_INET; 892 } 893 894 BPF_MTAP2(ifp, &family, sizeof(family), m); 895 896 switch (family) { 897#ifdef INET 898 case AF_INET: 899 isr = NETISR_IP; 900 break; 901#endif 902#ifdef INET6 903 case AF_INET6: 904 isr = NETISR_IPV6; 905 break; 906#endif 907 default: 908 m_freem(m); 909 return (EAFNOSUPPORT); 910 } 911 if (harvest.point_to_point) 912 random_harvest(&(m->m_data), 12, 2, RANDOM_NET_TUN); 913 ifp->if_ibytes += m->m_pkthdr.len; 914 ifp->if_ipackets++; 915 CURVNET_SET(ifp->if_vnet); 916 M_SETFIB(m, ifp->if_fib); 917 netisr_dispatch(isr, m); 918 CURVNET_RESTORE(); 919 return (0); 920} 921 922/* 923 * tunpoll - the poll interface, this is only useful on reads 924 * really. The write detect always returns true, write never blocks 925 * anyway, it either accepts the packet or drops it. 926 */ 927static int 928tunpoll(struct cdev *dev, int events, struct thread *td) 929{ 930 struct tun_softc *tp = dev->si_drv1; 931 struct ifnet *ifp = TUN2IFP(tp); 932 int revents = 0; 933 struct mbuf *m; 934 935 TUNDEBUG(ifp, "tunpoll\n"); 936 937 if (events & (POLLIN | POLLRDNORM)) { 938 IFQ_LOCK(&ifp->if_snd); 939 IFQ_POLL_NOLOCK(&ifp->if_snd, m); 940 if (m != NULL) { 941 TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len); 942 revents |= events & (POLLIN | POLLRDNORM); 943 } else { 944 TUNDEBUG(ifp, "tunpoll waiting\n"); 945 selrecord(td, &tp->tun_rsel); 946 } 947 IFQ_UNLOCK(&ifp->if_snd); 948 } 949 if (events & (POLLOUT | POLLWRNORM)) 950 revents |= events & (POLLOUT | POLLWRNORM); 951 952 return (revents); 953} 954 955/* 956 * tunkqfilter - support for the kevent() system call. 957 */ 958static int 959tunkqfilter(struct cdev *dev, struct knote *kn) 960{ 961 struct tun_softc *tp = dev->si_drv1; 962 struct ifnet *ifp = TUN2IFP(tp); 963 964 switch(kn->kn_filter) { 965 case EVFILT_READ: 966 TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n", 967 ifp->if_xname, dev2unit(dev)); 968 kn->kn_fop = &tun_read_filterops; 969 break; 970 971 case EVFILT_WRITE: 972 TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n", 973 ifp->if_xname, dev2unit(dev)); 974 kn->kn_fop = &tun_write_filterops; 975 break; 976 977 default: 978 TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n", 979 ifp->if_xname, dev2unit(dev)); 980 return(EINVAL); 981 } 982 983 kn->kn_hook = tp; 984 knlist_add(&tp->tun_rsel.si_note, kn, 0); 985 986 return (0); 987} 988 989/* 990 * Return true of there is data in the interface queue. 991 */ 992static int 993tunkqread(struct knote *kn, long hint) 994{ 995 int ret; 996 struct tun_softc *tp = kn->kn_hook; 997 struct cdev *dev = tp->tun_dev; 998 struct ifnet *ifp = TUN2IFP(tp); 999 1000 if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) { 1001 TUNDEBUG(ifp, 1002 "%s have data in the queue. Len = %d, minor = %#x\n", 1003 ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); 1004 ret = 1; 1005 } else { 1006 TUNDEBUG(ifp, 1007 "%s waiting for data, minor = %#x\n", ifp->if_xname, 1008 dev2unit(dev)); 1009 ret = 0; 1010 } 1011 1012 return (ret); 1013} 1014 1015/* 1016 * Always can write, always return MTU in kn->data. 1017 */ 1018static int 1019tunkqwrite(struct knote *kn, long hint) 1020{ 1021 struct tun_softc *tp = kn->kn_hook; 1022 struct ifnet *ifp = TUN2IFP(tp); 1023 1024 kn->kn_data = ifp->if_mtu; 1025 1026 return (1); 1027} 1028 1029static void 1030tunkqdetach(struct knote *kn) 1031{ 1032 struct tun_softc *tp = kn->kn_hook; 1033 1034 knlist_remove(&tp->tun_rsel.si_note, kn, 0); 1035} 1036