if_tap.c revision 236725
1/*- 2 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * BASED ON: 27 * ------------------------------------------------------------------------- 28 * 29 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk> 30 * Nottingham University 1987. 31 */ 32 33/* 34 * $FreeBSD: head/sys/net/if_tap.c 236725 2012-06-07 19:48:45Z trociny $ 35 * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $ 36 */ 37 38#include "opt_compat.h" 39#include "opt_inet.h" 40 41#include <sys/param.h> 42#include <sys/conf.h> 43#include <sys/fcntl.h> 44#include <sys/filio.h> 45#include <sys/jail.h> 46#include <sys/kernel.h> 47#include <sys/malloc.h> 48#include <sys/mbuf.h> 49#include <sys/module.h> 50#include <sys/poll.h> 51#include <sys/priv.h> 52#include <sys/proc.h> 53#include <sys/selinfo.h> 54#include <sys/signalvar.h> 55#include <sys/socket.h> 56#include <sys/sockio.h> 57#include <sys/sysctl.h> 58#include <sys/systm.h> 59#include <sys/ttycom.h> 60#include <sys/uio.h> 61#include <sys/queue.h> 62 63#include <net/bpf.h> 64#include <net/ethernet.h> 65#include <net/if.h> 66#include <net/if_clone.h> 67#include <net/if_dl.h> 68#include <net/if_types.h> 69#include <net/route.h> 70#include <net/vnet.h> 71 72#include <netinet/in.h> 73 74#include <net/if_tapvar.h> 75#include <net/if_tap.h> 76 77 78#define CDEV_NAME "tap" 79#define TAPDEBUG if (tapdebug) printf 80 81#define TAP "tap" 82#define VMNET "vmnet" 83#define TAPMAXUNIT 0x7fff 84#define VMNET_DEV_MASK CLONE_FLAG0 85 86/* module */ 87static int tapmodevent(module_t, int, void *); 88 89/* device */ 90static void tapclone(void *, struct ucred *, char *, int, 91 struct cdev **); 92static void tapcreate(struct cdev *); 93 94/* network interface */ 95static void tapifstart(struct ifnet *); 96static int tapifioctl(struct ifnet *, u_long, caddr_t); 97static void tapifinit(void *); 98 99static int tap_clone_create(struct if_clone *, int, caddr_t); 100static void tap_clone_destroy(struct ifnet *); 101static int vmnet_clone_create(struct if_clone *, int, caddr_t); 102static void vmnet_clone_destroy(struct ifnet *); 103 104IFC_SIMPLE_DECLARE(tap, 0); 105IFC_SIMPLE_DECLARE(vmnet, 0); 106 107/* character device */ 108static d_open_t tapopen; 109static d_close_t tapclose; 110static d_read_t tapread; 111static d_write_t tapwrite; 112static d_ioctl_t tapioctl; 113static d_poll_t tappoll; 114static d_kqfilter_t tapkqfilter; 115 116/* kqueue(2) */ 117static int tapkqread(struct knote *, long); 118static int tapkqwrite(struct knote *, long); 119static void tapkqdetach(struct knote *); 120 121static struct filterops tap_read_filterops = { 122 .f_isfd = 1, 123 .f_attach = NULL, 124 .f_detach = tapkqdetach, 125 .f_event = tapkqread, 126}; 127 128static struct filterops tap_write_filterops = { 129 .f_isfd = 1, 130 .f_attach = NULL, 131 .f_detach = tapkqdetach, 132 .f_event = tapkqwrite, 133}; 134 135static struct cdevsw tap_cdevsw = { 136 .d_version = D_VERSION, 137 .d_flags = D_NEEDMINOR, 138 .d_open = tapopen, 139 .d_close = tapclose, 140 .d_read = tapread, 141 .d_write = tapwrite, 142 .d_ioctl = tapioctl, 143 .d_poll = tappoll, 144 .d_name = CDEV_NAME, 145 .d_kqfilter = tapkqfilter, 146}; 147 148/* 149 * All global variables in if_tap.c are locked with tapmtx, with the 150 * exception of tapdebug, which is accessed unlocked; tapclones is 151 * static at runtime. 152 */ 153static struct mtx tapmtx; 154static int tapdebug = 0; /* debug flag */ 155static int tapuopen = 0; /* allow user open() */ 156static int tapuponopen = 0; /* IFF_UP on open() */ 157static int tapdclone = 1; /* enable devfs cloning */ 158static SLIST_HEAD(, tap_softc) taphead; /* first device */ 159static struct clonedevs *tapclones; 160 161MALLOC_DECLARE(M_TAP); 162MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface"); 163SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, ""); 164 165SYSCTL_DECL(_net_link); 166static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0, 167 "Ethernet tunnel software network interface"); 168SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0, 169 "Allow user to open /dev/tap (based on node permissions)"); 170SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0, 171 "Bring interface up when /dev/tap is opened"); 172SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tapdclone, 0, 173 "Enably legacy devfs interface creation"); 174SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, ""); 175 176TUNABLE_INT("net.link.tap.devfs_cloning", &tapdclone); 177 178DEV_MODULE(if_tap, tapmodevent, NULL); 179 180static int 181tap_clone_create(struct if_clone *ifc, int unit, caddr_t params) 182{ 183 struct cdev *dev; 184 int i; 185 int extra; 186 187 if (strcmp(ifc->ifc_name, VMNET) == 0) 188 extra = VMNET_DEV_MASK; 189 else 190 extra = 0; 191 192 /* find any existing device, or allocate new unit number */ 193 i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, extra); 194 if (i) { 195 dev = make_dev(&tap_cdevsw, unit | extra, 196 UID_ROOT, GID_WHEEL, 0600, "%s%d", ifc->ifc_name, unit); 197 } 198 199 tapcreate(dev); 200 return (0); 201} 202 203/* vmnet devices are tap devices in disguise */ 204static int 205vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params) 206{ 207 return tap_clone_create(ifc, unit, params); 208} 209 210static void 211tap_destroy(struct tap_softc *tp) 212{ 213 struct ifnet *ifp = tp->tap_ifp; 214 215 /* Unlocked read. */ 216 KASSERT(!(tp->tap_flags & TAP_OPEN), 217 ("%s flags is out of sync", ifp->if_xname)); 218 219 CURVNET_SET(ifp->if_vnet); 220 seldrain(&tp->tap_rsel); 221 knlist_destroy(&tp->tap_rsel.si_note); 222 destroy_dev(tp->tap_dev); 223 ether_ifdetach(ifp); 224 if_free(ifp); 225 226 mtx_destroy(&tp->tap_mtx); 227 free(tp, M_TAP); 228 CURVNET_RESTORE(); 229} 230 231static void 232tap_clone_destroy(struct ifnet *ifp) 233{ 234 struct tap_softc *tp = ifp->if_softc; 235 236 mtx_lock(&tapmtx); 237 SLIST_REMOVE(&taphead, tp, tap_softc, tap_next); 238 mtx_unlock(&tapmtx); 239 tap_destroy(tp); 240} 241 242/* vmnet devices are tap devices in disguise */ 243static void 244vmnet_clone_destroy(struct ifnet *ifp) 245{ 246 tap_clone_destroy(ifp); 247} 248 249/* 250 * tapmodevent 251 * 252 * module event handler 253 */ 254static int 255tapmodevent(module_t mod, int type, void *data) 256{ 257 static eventhandler_tag eh_tag = NULL; 258 struct tap_softc *tp = NULL; 259 struct ifnet *ifp = NULL; 260 261 switch (type) { 262 case MOD_LOAD: 263 264 /* intitialize device */ 265 266 mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF); 267 SLIST_INIT(&taphead); 268 269 clone_setup(&tapclones); 270 eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000); 271 if (eh_tag == NULL) { 272 clone_cleanup(&tapclones); 273 mtx_destroy(&tapmtx); 274 return (ENOMEM); 275 } 276 if_clone_attach(&tap_cloner); 277 if_clone_attach(&vmnet_cloner); 278 return (0); 279 280 case MOD_UNLOAD: 281 /* 282 * The EBUSY algorithm here can't quite atomically 283 * guarantee that this is race-free since we have to 284 * release the tap mtx to deregister the clone handler. 285 */ 286 mtx_lock(&tapmtx); 287 SLIST_FOREACH(tp, &taphead, tap_next) { 288 mtx_lock(&tp->tap_mtx); 289 if (tp->tap_flags & TAP_OPEN) { 290 mtx_unlock(&tp->tap_mtx); 291 mtx_unlock(&tapmtx); 292 return (EBUSY); 293 } 294 mtx_unlock(&tp->tap_mtx); 295 } 296 mtx_unlock(&tapmtx); 297 298 EVENTHANDLER_DEREGISTER(dev_clone, eh_tag); 299 if_clone_detach(&tap_cloner); 300 if_clone_detach(&vmnet_cloner); 301 drain_dev_clone_events(); 302 303 mtx_lock(&tapmtx); 304 while ((tp = SLIST_FIRST(&taphead)) != NULL) { 305 SLIST_REMOVE_HEAD(&taphead, tap_next); 306 mtx_unlock(&tapmtx); 307 308 ifp = tp->tap_ifp; 309 310 TAPDEBUG("detaching %s\n", ifp->if_xname); 311 312 tap_destroy(tp); 313 mtx_lock(&tapmtx); 314 } 315 mtx_unlock(&tapmtx); 316 clone_cleanup(&tapclones); 317 318 mtx_destroy(&tapmtx); 319 320 break; 321 322 default: 323 return (EOPNOTSUPP); 324 } 325 326 return (0); 327} /* tapmodevent */ 328 329 330/* 331 * DEVFS handler 332 * 333 * We need to support two kind of devices - tap and vmnet 334 */ 335static void 336tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev) 337{ 338 char devname[SPECNAMELEN + 1]; 339 int i, unit, append_unit; 340 int extra; 341 342 if (*dev != NULL) 343 return; 344 345 if (!tapdclone || 346 (!tapuopen && priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)) 347 return; 348 349 unit = 0; 350 append_unit = 0; 351 extra = 0; 352 353 /* We're interested in only tap/vmnet devices. */ 354 if (strcmp(name, TAP) == 0) { 355 unit = -1; 356 } else if (strcmp(name, VMNET) == 0) { 357 unit = -1; 358 extra = VMNET_DEV_MASK; 359 } else if (dev_stdclone(name, NULL, TAP, &unit) != 1) { 360 if (dev_stdclone(name, NULL, VMNET, &unit) != 1) { 361 return; 362 } else { 363 extra = VMNET_DEV_MASK; 364 } 365 } 366 367 if (unit == -1) 368 append_unit = 1; 369 370 CURVNET_SET(CRED_TO_VNET(cred)); 371 /* find any existing device, or allocate new unit number */ 372 i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra); 373 if (i) { 374 if (append_unit) { 375 /* 376 * We were passed 'tun' or 'tap', with no unit specified 377 * so we'll need to append it now. 378 */ 379 namelen = snprintf(devname, sizeof(devname), "%s%d", name, 380 unit); 381 name = devname; 382 } 383 384 *dev = make_dev_credf(MAKEDEV_REF, &tap_cdevsw, unit | extra, 385 cred, UID_ROOT, GID_WHEEL, 0600, "%s", name); 386 } 387 388 if_clone_create(name, namelen, NULL); 389 CURVNET_RESTORE(); 390} /* tapclone */ 391 392 393/* 394 * tapcreate 395 * 396 * to create interface 397 */ 398static void 399tapcreate(struct cdev *dev) 400{ 401 struct ifnet *ifp = NULL; 402 struct tap_softc *tp = NULL; 403 unsigned short macaddr_hi; 404 uint32_t macaddr_mid; 405 int unit; 406 char *name = NULL; 407 u_char eaddr[6]; 408 409 dev->si_flags &= ~SI_CHEAPCLONE; 410 411 /* allocate driver storage and create device */ 412 tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO); 413 mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF); 414 mtx_lock(&tapmtx); 415 SLIST_INSERT_HEAD(&taphead, tp, tap_next); 416 mtx_unlock(&tapmtx); 417 418 unit = dev2unit(dev); 419 420 /* select device: tap or vmnet */ 421 if (unit & VMNET_DEV_MASK) { 422 name = VMNET; 423 tp->tap_flags |= TAP_VMNET; 424 } else 425 name = TAP; 426 427 unit &= TAPMAXUNIT; 428 429 TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, dev2unit(dev)); 430 431 /* generate fake MAC address: 00 bd xx xx xx unit_no */ 432 macaddr_hi = htons(0x00bd); 433 macaddr_mid = (uint32_t) ticks; 434 bcopy(&macaddr_hi, eaddr, sizeof(short)); 435 bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t)); 436 eaddr[5] = (u_char)unit; 437 438 /* fill the rest and attach interface */ 439 ifp = tp->tap_ifp = if_alloc(IFT_ETHER); 440 if (ifp == NULL) 441 panic("%s%d: can not if_alloc()", name, unit); 442 ifp->if_softc = tp; 443 if_initname(ifp, name, unit); 444 ifp->if_init = tapifinit; 445 ifp->if_start = tapifstart; 446 ifp->if_ioctl = tapifioctl; 447 ifp->if_mtu = ETHERMTU; 448 ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST); 449 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 450 ifp->if_capabilities |= IFCAP_LINKSTATE; 451 ifp->if_capenable |= IFCAP_LINKSTATE; 452 453 dev->si_drv1 = tp; 454 tp->tap_dev = dev; 455 456 ether_ifattach(ifp, eaddr); 457 458 mtx_lock(&tp->tap_mtx); 459 tp->tap_flags |= TAP_INITED; 460 mtx_unlock(&tp->tap_mtx); 461 462 knlist_init_mtx(&tp->tap_rsel.si_note, &tp->tap_mtx); 463 464 TAPDEBUG("interface %s is created. minor = %#x\n", 465 ifp->if_xname, dev2unit(dev)); 466} /* tapcreate */ 467 468 469/* 470 * tapopen 471 * 472 * to open tunnel. must be superuser 473 */ 474static int 475tapopen(struct cdev *dev, int flag, int mode, struct thread *td) 476{ 477 struct tap_softc *tp = NULL; 478 struct ifnet *ifp = NULL; 479 int error; 480 481 if (tapuopen == 0) { 482 error = priv_check(td, PRIV_NET_TAP); 483 if (error) 484 return (error); 485 } 486 487 if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT) 488 return (ENXIO); 489 490 tp = dev->si_drv1; 491 492 mtx_lock(&tp->tap_mtx); 493 if (tp->tap_flags & TAP_OPEN) { 494 mtx_unlock(&tp->tap_mtx); 495 return (EBUSY); 496 } 497 498 bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr)); 499 tp->tap_pid = td->td_proc->p_pid; 500 tp->tap_flags |= TAP_OPEN; 501 ifp = tp->tap_ifp; 502 503 ifp->if_drv_flags |= IFF_DRV_RUNNING; 504 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 505 if (tapuponopen) 506 ifp->if_flags |= IFF_UP; 507 if_link_state_change(ifp, LINK_STATE_UP); 508 mtx_unlock(&tp->tap_mtx); 509 510 TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, dev2unit(dev)); 511 512 return (0); 513} /* tapopen */ 514 515 516/* 517 * tapclose 518 * 519 * close the device - mark i/f down & delete routing info 520 */ 521static int 522tapclose(struct cdev *dev, int foo, int bar, struct thread *td) 523{ 524 struct ifaddr *ifa; 525 struct tap_softc *tp = dev->si_drv1; 526 struct ifnet *ifp = tp->tap_ifp; 527 528 /* junk all pending output */ 529 mtx_lock(&tp->tap_mtx); 530 CURVNET_SET(ifp->if_vnet); 531 IF_DRAIN(&ifp->if_snd); 532 533 /* 534 * do not bring the interface down, and do not anything with 535 * interface, if we are in VMnet mode. just close the device. 536 */ 537 538 if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) { 539 mtx_unlock(&tp->tap_mtx); 540 if_down(ifp); 541 mtx_lock(&tp->tap_mtx); 542 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 543 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 544 mtx_unlock(&tp->tap_mtx); 545 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 546 rtinit(ifa, (int)RTM_DELETE, 0); 547 } 548 if_purgeaddrs(ifp); 549 mtx_lock(&tp->tap_mtx); 550 } 551 } 552 553 if_link_state_change(ifp, LINK_STATE_DOWN); 554 CURVNET_RESTORE(); 555 556 funsetown(&tp->tap_sigio); 557 selwakeuppri(&tp->tap_rsel, PZERO+1); 558 KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); 559 560 tp->tap_flags &= ~TAP_OPEN; 561 tp->tap_pid = 0; 562 mtx_unlock(&tp->tap_mtx); 563 564 TAPDEBUG("%s is closed. minor = %#x\n", 565 ifp->if_xname, dev2unit(dev)); 566 567 return (0); 568} /* tapclose */ 569 570 571/* 572 * tapifinit 573 * 574 * network interface initialization function 575 */ 576static void 577tapifinit(void *xtp) 578{ 579 struct tap_softc *tp = (struct tap_softc *)xtp; 580 struct ifnet *ifp = tp->tap_ifp; 581 582 TAPDEBUG("initializing %s\n", ifp->if_xname); 583 584 mtx_lock(&tp->tap_mtx); 585 ifp->if_drv_flags |= IFF_DRV_RUNNING; 586 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 587 mtx_unlock(&tp->tap_mtx); 588 589 /* attempt to start output */ 590 tapifstart(ifp); 591} /* tapifinit */ 592 593 594/* 595 * tapifioctl 596 * 597 * Process an ioctl request on network interface 598 */ 599static int 600tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 601{ 602 struct tap_softc *tp = ifp->if_softc; 603 struct ifreq *ifr = (struct ifreq *)data; 604 struct ifstat *ifs = NULL; 605 int dummy; 606 607 switch (cmd) { 608 case SIOCSIFFLAGS: /* XXX -- just like vmnet does */ 609 case SIOCADDMULTI: 610 case SIOCDELMULTI: 611 break; 612 613 case SIOCSIFMTU: 614 ifp->if_mtu = ifr->ifr_mtu; 615 break; 616 617 case SIOCGIFSTATUS: 618 ifs = (struct ifstat *)data; 619 dummy = strlen(ifs->ascii); 620 mtx_lock(&tp->tap_mtx); 621 if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii)) 622 snprintf(ifs->ascii + dummy, 623 sizeof(ifs->ascii) - dummy, 624 "\tOpened by PID %d\n", tp->tap_pid); 625 mtx_unlock(&tp->tap_mtx); 626 break; 627 628 default: 629 return (ether_ioctl(ifp, cmd, data)); 630 /* NOT REACHED */ 631 } 632 633 return (0); 634} /* tapifioctl */ 635 636 637/* 638 * tapifstart 639 * 640 * queue packets from higher level ready to put out 641 */ 642static void 643tapifstart(struct ifnet *ifp) 644{ 645 struct tap_softc *tp = ifp->if_softc; 646 647 TAPDEBUG("%s starting\n", ifp->if_xname); 648 649 /* 650 * do not junk pending output if we are in VMnet mode. 651 * XXX: can this do any harm because of queue overflow? 652 */ 653 654 mtx_lock(&tp->tap_mtx); 655 if (((tp->tap_flags & TAP_VMNET) == 0) && 656 ((tp->tap_flags & TAP_READY) != TAP_READY)) { 657 struct mbuf *m; 658 659 /* Unlocked read. */ 660 TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname, 661 tp->tap_flags); 662 663 for (;;) { 664 IF_DEQUEUE(&ifp->if_snd, m); 665 if (m != NULL) { 666 m_freem(m); 667 ifp->if_oerrors++; 668 } else 669 break; 670 } 671 mtx_unlock(&tp->tap_mtx); 672 673 return; 674 } 675 676 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 677 678 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 679 if (tp->tap_flags & TAP_RWAIT) { 680 tp->tap_flags &= ~TAP_RWAIT; 681 wakeup(tp); 682 } 683 684 if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) { 685 mtx_unlock(&tp->tap_mtx); 686 pgsigio(&tp->tap_sigio, SIGIO, 0); 687 mtx_lock(&tp->tap_mtx); 688 } 689 690 selwakeuppri(&tp->tap_rsel, PZERO+1); 691 KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); 692 ifp->if_opackets ++; /* obytes are counted in ether_output */ 693 } 694 695 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 696 mtx_unlock(&tp->tap_mtx); 697} /* tapifstart */ 698 699 700/* 701 * tapioctl 702 * 703 * the cdevsw interface is now pretty minimal 704 */ 705static int 706tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) 707{ 708 struct tap_softc *tp = dev->si_drv1; 709 struct ifnet *ifp = tp->tap_ifp; 710 struct tapinfo *tapp = NULL; 711 int f; 712#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 713 defined(COMPAT_FREEBSD4) 714 int ival; 715#endif 716 717 switch (cmd) { 718 case TAPSIFINFO: 719 tapp = (struct tapinfo *)data; 720 mtx_lock(&tp->tap_mtx); 721 ifp->if_mtu = tapp->mtu; 722 ifp->if_type = tapp->type; 723 ifp->if_baudrate = tapp->baudrate; 724 mtx_unlock(&tp->tap_mtx); 725 break; 726 727 case TAPGIFINFO: 728 tapp = (struct tapinfo *)data; 729 mtx_lock(&tp->tap_mtx); 730 tapp->mtu = ifp->if_mtu; 731 tapp->type = ifp->if_type; 732 tapp->baudrate = ifp->if_baudrate; 733 mtx_unlock(&tp->tap_mtx); 734 break; 735 736 case TAPSDEBUG: 737 tapdebug = *(int *)data; 738 break; 739 740 case TAPGDEBUG: 741 *(int *)data = tapdebug; 742 break; 743 744 case TAPGIFNAME: { 745 struct ifreq *ifr = (struct ifreq *) data; 746 747 strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 748 } break; 749 750 case FIONBIO: 751 break; 752 753 case FIOASYNC: 754 mtx_lock(&tp->tap_mtx); 755 if (*(int *)data) 756 tp->tap_flags |= TAP_ASYNC; 757 else 758 tp->tap_flags &= ~TAP_ASYNC; 759 mtx_unlock(&tp->tap_mtx); 760 break; 761 762 case FIONREAD: 763 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 764 struct mbuf *mb; 765 766 IFQ_LOCK(&ifp->if_snd); 767 IFQ_POLL_NOLOCK(&ifp->if_snd, mb); 768 for (*(int *)data = 0; mb != NULL; 769 mb = mb->m_next) 770 *(int *)data += mb->m_len; 771 IFQ_UNLOCK(&ifp->if_snd); 772 } else 773 *(int *)data = 0; 774 break; 775 776 case FIOSETOWN: 777 return (fsetown(*(int *)data, &tp->tap_sigio)); 778 779 case FIOGETOWN: 780 *(int *)data = fgetown(&tp->tap_sigio); 781 return (0); 782 783 /* this is deprecated, FIOSETOWN should be used instead */ 784 case TIOCSPGRP: 785 return (fsetown(-(*(int *)data), &tp->tap_sigio)); 786 787 /* this is deprecated, FIOGETOWN should be used instead */ 788 case TIOCGPGRP: 789 *(int *)data = -fgetown(&tp->tap_sigio); 790 return (0); 791 792 /* VMware/VMnet port ioctl's */ 793 794#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 795 defined(COMPAT_FREEBSD4) 796 case _IO('V', 0): 797 ival = IOCPARM_IVAL(data); 798 data = (caddr_t)&ival; 799 /* FALLTHROUGH */ 800#endif 801 case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */ 802 f = *(int *)data; 803 f &= 0x0fff; 804 f &= ~IFF_CANTCHANGE; 805 f |= IFF_UP; 806 807 mtx_lock(&tp->tap_mtx); 808 ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE); 809 mtx_unlock(&tp->tap_mtx); 810 break; 811 812 case OSIOCGIFADDR: /* get MAC address of the remote side */ 813 case SIOCGIFADDR: 814 mtx_lock(&tp->tap_mtx); 815 bcopy(tp->ether_addr, data, sizeof(tp->ether_addr)); 816 mtx_unlock(&tp->tap_mtx); 817 break; 818 819 case SIOCSIFADDR: /* set MAC address of the remote side */ 820 mtx_lock(&tp->tap_mtx); 821 bcopy(data, tp->ether_addr, sizeof(tp->ether_addr)); 822 mtx_unlock(&tp->tap_mtx); 823 break; 824 825 default: 826 return (ENOTTY); 827 } 828 return (0); 829} /* tapioctl */ 830 831 832/* 833 * tapread 834 * 835 * the cdevsw read interface - reads a packet at a time, or at 836 * least as much of a packet as can be read 837 */ 838static int 839tapread(struct cdev *dev, struct uio *uio, int flag) 840{ 841 struct tap_softc *tp = dev->si_drv1; 842 struct ifnet *ifp = tp->tap_ifp; 843 struct mbuf *m = NULL; 844 int error = 0, len; 845 846 TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, dev2unit(dev)); 847 848 mtx_lock(&tp->tap_mtx); 849 if ((tp->tap_flags & TAP_READY) != TAP_READY) { 850 mtx_unlock(&tp->tap_mtx); 851 852 /* Unlocked read. */ 853 TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n", 854 ifp->if_xname, dev2unit(dev), tp->tap_flags); 855 856 return (EHOSTDOWN); 857 } 858 859 tp->tap_flags &= ~TAP_RWAIT; 860 861 /* sleep until we get a packet */ 862 do { 863 IF_DEQUEUE(&ifp->if_snd, m); 864 865 if (m == NULL) { 866 if (flag & O_NONBLOCK) { 867 mtx_unlock(&tp->tap_mtx); 868 return (EWOULDBLOCK); 869 } 870 871 tp->tap_flags |= TAP_RWAIT; 872 error = mtx_sleep(tp, &tp->tap_mtx, PCATCH | (PZERO + 1), 873 "taprd", 0); 874 if (error) { 875 mtx_unlock(&tp->tap_mtx); 876 return (error); 877 } 878 } 879 } while (m == NULL); 880 mtx_unlock(&tp->tap_mtx); 881 882 /* feed packet to bpf */ 883 BPF_MTAP(ifp, m); 884 885 /* xfer packet to user space */ 886 while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) { 887 len = min(uio->uio_resid, m->m_len); 888 if (len == 0) 889 break; 890 891 error = uiomove(mtod(m, void *), len, uio); 892 m = m_free(m); 893 } 894 895 if (m != NULL) { 896 TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname, 897 dev2unit(dev)); 898 m_freem(m); 899 } 900 901 return (error); 902} /* tapread */ 903 904 905/* 906 * tapwrite 907 * 908 * the cdevsw write interface - an atomic write is a packet - or else! 909 */ 910static int 911tapwrite(struct cdev *dev, struct uio *uio, int flag) 912{ 913 struct ether_header *eh; 914 struct tap_softc *tp = dev->si_drv1; 915 struct ifnet *ifp = tp->tap_ifp; 916 struct mbuf *m; 917 918 TAPDEBUG("%s writting, minor = %#x\n", 919 ifp->if_xname, dev2unit(dev)); 920 921 if (uio->uio_resid == 0) 922 return (0); 923 924 if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) { 925 TAPDEBUG("%s invalid packet len = %zd, minor = %#x\n", 926 ifp->if_xname, uio->uio_resid, dev2unit(dev)); 927 928 return (EIO); 929 } 930 931 if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN, 932 M_PKTHDR)) == NULL) { 933 ifp->if_ierrors ++; 934 return (ENOBUFS); 935 } 936 937 m->m_pkthdr.rcvif = ifp; 938 939 /* 940 * Only pass a unicast frame to ether_input(), if it would actually 941 * have been received by non-virtual hardware. 942 */ 943 if (m->m_len < sizeof(struct ether_header)) { 944 m_freem(m); 945 return (0); 946 } 947 eh = mtod(m, struct ether_header *); 948 949 if (eh && (ifp->if_flags & IFF_PROMISC) == 0 && 950 !ETHER_IS_MULTICAST(eh->ether_dhost) && 951 bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) { 952 m_freem(m); 953 return (0); 954 } 955 956 /* Pass packet up to parent. */ 957 CURVNET_SET(ifp->if_vnet); 958 (*ifp->if_input)(ifp, m); 959 CURVNET_RESTORE(); 960 ifp->if_ipackets ++; /* ibytes are counted in parent */ 961 962 return (0); 963} /* tapwrite */ 964 965 966/* 967 * tappoll 968 * 969 * the poll interface, this is only useful on reads 970 * really. the write detect always returns true, write never blocks 971 * anyway, it either accepts the packet or drops it 972 */ 973static int 974tappoll(struct cdev *dev, int events, struct thread *td) 975{ 976 struct tap_softc *tp = dev->si_drv1; 977 struct ifnet *ifp = tp->tap_ifp; 978 int revents = 0; 979 980 TAPDEBUG("%s polling, minor = %#x\n", 981 ifp->if_xname, dev2unit(dev)); 982 983 if (events & (POLLIN | POLLRDNORM)) { 984 IFQ_LOCK(&ifp->if_snd); 985 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 986 TAPDEBUG("%s have data in queue. len = %d, " \ 987 "minor = %#x\n", ifp->if_xname, 988 ifp->if_snd.ifq_len, dev2unit(dev)); 989 990 revents |= (events & (POLLIN | POLLRDNORM)); 991 } else { 992 TAPDEBUG("%s waiting for data, minor = %#x\n", 993 ifp->if_xname, dev2unit(dev)); 994 995 selrecord(td, &tp->tap_rsel); 996 } 997 IFQ_UNLOCK(&ifp->if_snd); 998 } 999 1000 if (events & (POLLOUT | POLLWRNORM)) 1001 revents |= (events & (POLLOUT | POLLWRNORM)); 1002 1003 return (revents); 1004} /* tappoll */ 1005 1006 1007/* 1008 * tap_kqfilter 1009 * 1010 * support for kevent() system call 1011 */ 1012static int 1013tapkqfilter(struct cdev *dev, struct knote *kn) 1014{ 1015 struct tap_softc *tp = dev->si_drv1; 1016 struct ifnet *ifp = tp->tap_ifp; 1017 1018 switch (kn->kn_filter) { 1019 case EVFILT_READ: 1020 TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n", 1021 ifp->if_xname, dev2unit(dev)); 1022 kn->kn_fop = &tap_read_filterops; 1023 break; 1024 1025 case EVFILT_WRITE: 1026 TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n", 1027 ifp->if_xname, dev2unit(dev)); 1028 kn->kn_fop = &tap_write_filterops; 1029 break; 1030 1031 default: 1032 TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n", 1033 ifp->if_xname, dev2unit(dev)); 1034 return (EINVAL); 1035 /* NOT REACHED */ 1036 } 1037 1038 kn->kn_hook = tp; 1039 knlist_add(&tp->tap_rsel.si_note, kn, 0); 1040 1041 return (0); 1042} /* tapkqfilter */ 1043 1044 1045/* 1046 * tap_kqread 1047 * 1048 * Return true if there is data in the interface queue 1049 */ 1050static int 1051tapkqread(struct knote *kn, long hint) 1052{ 1053 int ret; 1054 struct tap_softc *tp = kn->kn_hook; 1055 struct cdev *dev = tp->tap_dev; 1056 struct ifnet *ifp = tp->tap_ifp; 1057 1058 if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) { 1059 TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n", 1060 ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); 1061 ret = 1; 1062 } else { 1063 TAPDEBUG("%s waiting for data, minor = %#x\n", 1064 ifp->if_xname, dev2unit(dev)); 1065 ret = 0; 1066 } 1067 1068 return (ret); 1069} /* tapkqread */ 1070 1071 1072/* 1073 * tap_kqwrite 1074 * 1075 * Always can write. Return the MTU in kn->data 1076 */ 1077static int 1078tapkqwrite(struct knote *kn, long hint) 1079{ 1080 struct tap_softc *tp = kn->kn_hook; 1081 struct ifnet *ifp = tp->tap_ifp; 1082 1083 kn->kn_data = ifp->if_mtu; 1084 1085 return (1); 1086} /* tapkqwrite */ 1087 1088 1089static void 1090tapkqdetach(struct knote *kn) 1091{ 1092 struct tap_softc *tp = kn->kn_hook; 1093 1094 knlist_remove(&tp->tap_rsel.si_note, kn, 0); 1095} /* tapkqdetach */ 1096 1097