if_tap.c revision 226500
1/*- 2 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * BASED ON: 27 * ------------------------------------------------------------------------- 28 * 29 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk> 30 * Nottingham University 1987. 31 */ 32 33/* 34 * $FreeBSD: head/sys/net/if_tap.c 226500 2011-10-18 08:09:44Z ed $ 35 * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $ 36 */ 37 38#include "opt_compat.h" 39#include "opt_inet.h" 40 41#include <sys/param.h> 42#include <sys/conf.h> 43#include <sys/fcntl.h> 44#include <sys/filio.h> 45#include <sys/kernel.h> 46#include <sys/malloc.h> 47#include <sys/mbuf.h> 48#include <sys/module.h> 49#include <sys/poll.h> 50#include <sys/priv.h> 51#include <sys/proc.h> 52#include <sys/selinfo.h> 53#include <sys/signalvar.h> 54#include <sys/socket.h> 55#include <sys/sockio.h> 56#include <sys/sysctl.h> 57#include <sys/systm.h> 58#include <sys/ttycom.h> 59#include <sys/uio.h> 60#include <sys/queue.h> 61 62#include <net/bpf.h> 63#include <net/ethernet.h> 64#include <net/if.h> 65#include <net/if_clone.h> 66#include <net/if_dl.h> 67#include <net/route.h> 68#include <net/if_types.h> 69 70#include <netinet/in.h> 71 72#include <net/if_tapvar.h> 73#include <net/if_tap.h> 74 75 76#define CDEV_NAME "tap" 77#define TAPDEBUG if (tapdebug) printf 78 79#define TAP "tap" 80#define VMNET "vmnet" 81#define TAPMAXUNIT 0x7fff 82#define VMNET_DEV_MASK CLONE_FLAG0 83 84/* module */ 85static int tapmodevent(module_t, int, void *); 86 87/* device */ 88static void tapclone(void *, struct ucred *, char *, int, 89 struct cdev **); 90static void tapcreate(struct cdev *); 91 92/* network interface */ 93static void tapifstart(struct ifnet *); 94static int tapifioctl(struct ifnet *, u_long, caddr_t); 95static void tapifinit(void *); 96 97static int tap_clone_create(struct if_clone *, int, caddr_t); 98static void tap_clone_destroy(struct ifnet *); 99static int vmnet_clone_create(struct if_clone *, int, caddr_t); 100static void vmnet_clone_destroy(struct ifnet *); 101 102IFC_SIMPLE_DECLARE(tap, 0); 103IFC_SIMPLE_DECLARE(vmnet, 0); 104 105/* character device */ 106static d_open_t tapopen; 107static d_close_t tapclose; 108static d_read_t tapread; 109static d_write_t tapwrite; 110static d_ioctl_t tapioctl; 111static d_poll_t tappoll; 112static d_kqfilter_t tapkqfilter; 113 114/* kqueue(2) */ 115static int tapkqread(struct knote *, long); 116static int tapkqwrite(struct knote *, long); 117static void tapkqdetach(struct knote *); 118 119static struct filterops tap_read_filterops = { 120 .f_isfd = 1, 121 .f_attach = NULL, 122 .f_detach = tapkqdetach, 123 .f_event = tapkqread, 124}; 125 126static struct filterops tap_write_filterops = { 127 .f_isfd = 1, 128 .f_attach = NULL, 129 .f_detach = tapkqdetach, 130 .f_event = tapkqwrite, 131}; 132 133static struct cdevsw tap_cdevsw = { 134 .d_version = D_VERSION, 135 .d_flags = D_NEEDMINOR, 136 .d_open = tapopen, 137 .d_close = tapclose, 138 .d_read = tapread, 139 .d_write = tapwrite, 140 .d_ioctl = tapioctl, 141 .d_poll = tappoll, 142 .d_name = CDEV_NAME, 143 .d_kqfilter = tapkqfilter, 144}; 145 146/* 147 * All global variables in if_tap.c are locked with tapmtx, with the 148 * exception of tapdebug, which is accessed unlocked; tapclones is 149 * static at runtime. 150 */ 151static struct mtx tapmtx; 152static int tapdebug = 0; /* debug flag */ 153static int tapuopen = 0; /* allow user open() */ 154static int tapuponopen = 0; /* IFF_UP on open() */ 155static int tapdclone = 1; /* enable devfs cloning */ 156static SLIST_HEAD(, tap_softc) taphead; /* first device */ 157static struct clonedevs *tapclones; 158 159MALLOC_DECLARE(M_TAP); 160MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface"); 161SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, ""); 162 163SYSCTL_DECL(_net_link); 164SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0, 165 "Ethernet tunnel software network interface"); 166SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0, 167 "Allow user to open /dev/tap (based on node permissions)"); 168SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0, 169 "Bring interface up when /dev/tap is opened"); 170SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tapdclone, 0, 171 "Enably legacy devfs interface creation"); 172SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, ""); 173 174TUNABLE_INT("net.link.tap.devfs_cloning", &tapdclone); 175 176DEV_MODULE(if_tap, tapmodevent, NULL); 177 178static int 179tap_clone_create(struct if_clone *ifc, int unit, caddr_t params) 180{ 181 struct cdev *dev; 182 int i; 183 int extra; 184 185 if (strcmp(ifc->ifc_name, VMNET) == 0) 186 extra = VMNET_DEV_MASK; 187 else 188 extra = 0; 189 190 /* find any existing device, or allocate new unit number */ 191 i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, extra); 192 if (i) { 193 dev = make_dev(&tap_cdevsw, unit | extra, 194 UID_ROOT, GID_WHEEL, 0600, "%s%d", ifc->ifc_name, unit); 195 } 196 197 tapcreate(dev); 198 return (0); 199} 200 201/* vmnet devices are tap devices in disguise */ 202static int 203vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params) 204{ 205 return tap_clone_create(ifc, unit, params); 206} 207 208static void 209tap_destroy(struct tap_softc *tp) 210{ 211 struct ifnet *ifp = tp->tap_ifp; 212 213 /* Unlocked read. */ 214 KASSERT(!(tp->tap_flags & TAP_OPEN), 215 ("%s flags is out of sync", ifp->if_xname)); 216 217 seldrain(&tp->tap_rsel); 218 knlist_destroy(&tp->tap_rsel.si_note); 219 destroy_dev(tp->tap_dev); 220 ether_ifdetach(ifp); 221 if_free_type(ifp, IFT_ETHER); 222 223 mtx_destroy(&tp->tap_mtx); 224 free(tp, M_TAP); 225} 226 227static void 228tap_clone_destroy(struct ifnet *ifp) 229{ 230 struct tap_softc *tp = ifp->if_softc; 231 232 mtx_lock(&tapmtx); 233 SLIST_REMOVE(&taphead, tp, tap_softc, tap_next); 234 mtx_unlock(&tapmtx); 235 tap_destroy(tp); 236} 237 238/* vmnet devices are tap devices in disguise */ 239static void 240vmnet_clone_destroy(struct ifnet *ifp) 241{ 242 tap_clone_destroy(ifp); 243} 244 245/* 246 * tapmodevent 247 * 248 * module event handler 249 */ 250static int 251tapmodevent(module_t mod, int type, void *data) 252{ 253 static eventhandler_tag eh_tag = NULL; 254 struct tap_softc *tp = NULL; 255 struct ifnet *ifp = NULL; 256 257 switch (type) { 258 case MOD_LOAD: 259 260 /* intitialize device */ 261 262 mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF); 263 SLIST_INIT(&taphead); 264 265 clone_setup(&tapclones); 266 eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000); 267 if (eh_tag == NULL) { 268 clone_cleanup(&tapclones); 269 mtx_destroy(&tapmtx); 270 return (ENOMEM); 271 } 272 if_clone_attach(&tap_cloner); 273 if_clone_attach(&vmnet_cloner); 274 return (0); 275 276 case MOD_UNLOAD: 277 /* 278 * The EBUSY algorithm here can't quite atomically 279 * guarantee that this is race-free since we have to 280 * release the tap mtx to deregister the clone handler. 281 */ 282 mtx_lock(&tapmtx); 283 SLIST_FOREACH(tp, &taphead, tap_next) { 284 mtx_lock(&tp->tap_mtx); 285 if (tp->tap_flags & TAP_OPEN) { 286 mtx_unlock(&tp->tap_mtx); 287 mtx_unlock(&tapmtx); 288 return (EBUSY); 289 } 290 mtx_unlock(&tp->tap_mtx); 291 } 292 mtx_unlock(&tapmtx); 293 294 EVENTHANDLER_DEREGISTER(dev_clone, eh_tag); 295 if_clone_detach(&tap_cloner); 296 if_clone_detach(&vmnet_cloner); 297 drain_dev_clone_events(); 298 299 mtx_lock(&tapmtx); 300 while ((tp = SLIST_FIRST(&taphead)) != NULL) { 301 SLIST_REMOVE_HEAD(&taphead, tap_next); 302 mtx_unlock(&tapmtx); 303 304 ifp = tp->tap_ifp; 305 306 TAPDEBUG("detaching %s\n", ifp->if_xname); 307 308 tap_destroy(tp); 309 mtx_lock(&tapmtx); 310 } 311 mtx_unlock(&tapmtx); 312 clone_cleanup(&tapclones); 313 314 mtx_destroy(&tapmtx); 315 316 break; 317 318 default: 319 return (EOPNOTSUPP); 320 } 321 322 return (0); 323} /* tapmodevent */ 324 325 326/* 327 * DEVFS handler 328 * 329 * We need to support two kind of devices - tap and vmnet 330 */ 331static void 332tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev) 333{ 334 char devname[SPECNAMELEN + 1]; 335 int i, unit, append_unit; 336 int extra; 337 338 if (*dev != NULL) 339 return; 340 341 if (!tapdclone || 342 (!tapuopen && priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)) 343 return; 344 345 unit = 0; 346 append_unit = 0; 347 extra = 0; 348 349 /* We're interested in only tap/vmnet devices. */ 350 if (strcmp(name, TAP) == 0) { 351 unit = -1; 352 } else if (strcmp(name, VMNET) == 0) { 353 unit = -1; 354 extra = VMNET_DEV_MASK; 355 } else if (dev_stdclone(name, NULL, TAP, &unit) != 1) { 356 if (dev_stdclone(name, NULL, VMNET, &unit) != 1) { 357 return; 358 } else { 359 extra = VMNET_DEV_MASK; 360 } 361 } 362 363 if (unit == -1) 364 append_unit = 1; 365 366 /* find any existing device, or allocate new unit number */ 367 i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra); 368 if (i) { 369 if (append_unit) { 370 /* 371 * We were passed 'tun' or 'tap', with no unit specified 372 * so we'll need to append it now. 373 */ 374 namelen = snprintf(devname, sizeof(devname), "%s%d", name, 375 unit); 376 name = devname; 377 } 378 379 *dev = make_dev_credf(MAKEDEV_REF, &tap_cdevsw, unit | extra, 380 cred, UID_ROOT, GID_WHEEL, 0600, "%s", name); 381 } 382 383 if_clone_create(name, namelen, NULL); 384} /* tapclone */ 385 386 387/* 388 * tapcreate 389 * 390 * to create interface 391 */ 392static void 393tapcreate(struct cdev *dev) 394{ 395 struct ifnet *ifp = NULL; 396 struct tap_softc *tp = NULL; 397 unsigned short macaddr_hi; 398 uint32_t macaddr_mid; 399 int unit; 400 char *name = NULL; 401 u_char eaddr[6]; 402 403 dev->si_flags &= ~SI_CHEAPCLONE; 404 405 /* allocate driver storage and create device */ 406 tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO); 407 mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF); 408 mtx_lock(&tapmtx); 409 SLIST_INSERT_HEAD(&taphead, tp, tap_next); 410 mtx_unlock(&tapmtx); 411 412 unit = dev2unit(dev); 413 414 /* select device: tap or vmnet */ 415 if (unit & VMNET_DEV_MASK) { 416 name = VMNET; 417 tp->tap_flags |= TAP_VMNET; 418 } else 419 name = TAP; 420 421 unit &= TAPMAXUNIT; 422 423 TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, dev2unit(dev)); 424 425 /* generate fake MAC address: 00 bd xx xx xx unit_no */ 426 macaddr_hi = htons(0x00bd); 427 macaddr_mid = (uint32_t) ticks; 428 bcopy(&macaddr_hi, eaddr, sizeof(short)); 429 bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t)); 430 eaddr[5] = (u_char)unit; 431 432 /* fill the rest and attach interface */ 433 ifp = tp->tap_ifp = if_alloc(IFT_ETHER); 434 if (ifp == NULL) 435 panic("%s%d: can not if_alloc()", name, unit); 436 ifp->if_softc = tp; 437 if_initname(ifp, name, unit); 438 ifp->if_init = tapifinit; 439 ifp->if_start = tapifstart; 440 ifp->if_ioctl = tapifioctl; 441 ifp->if_mtu = ETHERMTU; 442 ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST); 443 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 444 ifp->if_capabilities |= IFCAP_LINKSTATE; 445 ifp->if_capenable |= IFCAP_LINKSTATE; 446 447 dev->si_drv1 = tp; 448 tp->tap_dev = dev; 449 450 ether_ifattach(ifp, eaddr); 451 452 mtx_lock(&tp->tap_mtx); 453 tp->tap_flags |= TAP_INITED; 454 mtx_unlock(&tp->tap_mtx); 455 456 knlist_init_mtx(&tp->tap_rsel.si_note, &tp->tap_mtx); 457 458 TAPDEBUG("interface %s is created. minor = %#x\n", 459 ifp->if_xname, dev2unit(dev)); 460} /* tapcreate */ 461 462 463/* 464 * tapopen 465 * 466 * to open tunnel. must be superuser 467 */ 468static int 469tapopen(struct cdev *dev, int flag, int mode, struct thread *td) 470{ 471 struct tap_softc *tp = NULL; 472 struct ifnet *ifp = NULL; 473 int error; 474 475 if (tapuopen == 0) { 476 error = priv_check(td, PRIV_NET_TAP); 477 if (error) 478 return (error); 479 } 480 481 if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT) 482 return (ENXIO); 483 484 tp = dev->si_drv1; 485 486 mtx_lock(&tp->tap_mtx); 487 if (tp->tap_flags & TAP_OPEN) { 488 mtx_unlock(&tp->tap_mtx); 489 return (EBUSY); 490 } 491 492 bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr)); 493 tp->tap_pid = td->td_proc->p_pid; 494 tp->tap_flags |= TAP_OPEN; 495 ifp = tp->tap_ifp; 496 497 ifp->if_drv_flags |= IFF_DRV_RUNNING; 498 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 499 if (tapuponopen) 500 ifp->if_flags |= IFF_UP; 501 if_link_state_change(ifp, LINK_STATE_UP); 502 mtx_unlock(&tp->tap_mtx); 503 504 TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, dev2unit(dev)); 505 506 return (0); 507} /* tapopen */ 508 509 510/* 511 * tapclose 512 * 513 * close the device - mark i/f down & delete routing info 514 */ 515static int 516tapclose(struct cdev *dev, int foo, int bar, struct thread *td) 517{ 518 struct ifaddr *ifa; 519 struct tap_softc *tp = dev->si_drv1; 520 struct ifnet *ifp = tp->tap_ifp; 521 522 /* junk all pending output */ 523 mtx_lock(&tp->tap_mtx); 524 IF_DRAIN(&ifp->if_snd); 525 526 /* 527 * do not bring the interface down, and do not anything with 528 * interface, if we are in VMnet mode. just close the device. 529 */ 530 531 if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) { 532 mtx_unlock(&tp->tap_mtx); 533 if_down(ifp); 534 mtx_lock(&tp->tap_mtx); 535 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 536 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 537 mtx_unlock(&tp->tap_mtx); 538 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 539 rtinit(ifa, (int)RTM_DELETE, 0); 540 } 541 if_purgeaddrs(ifp); 542 mtx_lock(&tp->tap_mtx); 543 } 544 } 545 546 if_link_state_change(ifp, LINK_STATE_DOWN); 547 funsetown(&tp->tap_sigio); 548 selwakeuppri(&tp->tap_rsel, PZERO+1); 549 KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); 550 551 tp->tap_flags &= ~TAP_OPEN; 552 tp->tap_pid = 0; 553 mtx_unlock(&tp->tap_mtx); 554 555 TAPDEBUG("%s is closed. minor = %#x\n", 556 ifp->if_xname, dev2unit(dev)); 557 558 return (0); 559} /* tapclose */ 560 561 562/* 563 * tapifinit 564 * 565 * network interface initialization function 566 */ 567static void 568tapifinit(void *xtp) 569{ 570 struct tap_softc *tp = (struct tap_softc *)xtp; 571 struct ifnet *ifp = tp->tap_ifp; 572 573 TAPDEBUG("initializing %s\n", ifp->if_xname); 574 575 mtx_lock(&tp->tap_mtx); 576 ifp->if_drv_flags |= IFF_DRV_RUNNING; 577 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 578 mtx_unlock(&tp->tap_mtx); 579 580 /* attempt to start output */ 581 tapifstart(ifp); 582} /* tapifinit */ 583 584 585/* 586 * tapifioctl 587 * 588 * Process an ioctl request on network interface 589 */ 590static int 591tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 592{ 593 struct tap_softc *tp = ifp->if_softc; 594 struct ifreq *ifr = (struct ifreq *)data; 595 struct ifstat *ifs = NULL; 596 int dummy; 597 598 switch (cmd) { 599 case SIOCSIFFLAGS: /* XXX -- just like vmnet does */ 600 case SIOCADDMULTI: 601 case SIOCDELMULTI: 602 break; 603 604 case SIOCSIFMTU: 605 ifp->if_mtu = ifr->ifr_mtu; 606 break; 607 608 case SIOCGIFSTATUS: 609 ifs = (struct ifstat *)data; 610 dummy = strlen(ifs->ascii); 611 mtx_lock(&tp->tap_mtx); 612 if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii)) 613 snprintf(ifs->ascii + dummy, 614 sizeof(ifs->ascii) - dummy, 615 "\tOpened by PID %d\n", tp->tap_pid); 616 mtx_unlock(&tp->tap_mtx); 617 break; 618 619 default: 620 return (ether_ioctl(ifp, cmd, data)); 621 /* NOT REACHED */ 622 } 623 624 return (0); 625} /* tapifioctl */ 626 627 628/* 629 * tapifstart 630 * 631 * queue packets from higher level ready to put out 632 */ 633static void 634tapifstart(struct ifnet *ifp) 635{ 636 struct tap_softc *tp = ifp->if_softc; 637 638 TAPDEBUG("%s starting\n", ifp->if_xname); 639 640 /* 641 * do not junk pending output if we are in VMnet mode. 642 * XXX: can this do any harm because of queue overflow? 643 */ 644 645 mtx_lock(&tp->tap_mtx); 646 if (((tp->tap_flags & TAP_VMNET) == 0) && 647 ((tp->tap_flags & TAP_READY) != TAP_READY)) { 648 struct mbuf *m; 649 650 /* Unlocked read. */ 651 TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname, 652 tp->tap_flags); 653 654 for (;;) { 655 IF_DEQUEUE(&ifp->if_snd, m); 656 if (m != NULL) { 657 m_freem(m); 658 ifp->if_oerrors++; 659 } else 660 break; 661 } 662 mtx_unlock(&tp->tap_mtx); 663 664 return; 665 } 666 667 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 668 669 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 670 if (tp->tap_flags & TAP_RWAIT) { 671 tp->tap_flags &= ~TAP_RWAIT; 672 wakeup(tp); 673 } 674 675 if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) { 676 mtx_unlock(&tp->tap_mtx); 677 pgsigio(&tp->tap_sigio, SIGIO, 0); 678 mtx_lock(&tp->tap_mtx); 679 } 680 681 selwakeuppri(&tp->tap_rsel, PZERO+1); 682 KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); 683 ifp->if_opackets ++; /* obytes are counted in ether_output */ 684 } 685 686 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 687 mtx_unlock(&tp->tap_mtx); 688} /* tapifstart */ 689 690 691/* 692 * tapioctl 693 * 694 * the cdevsw interface is now pretty minimal 695 */ 696static int 697tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) 698{ 699 struct tap_softc *tp = dev->si_drv1; 700 struct ifnet *ifp = tp->tap_ifp; 701 struct tapinfo *tapp = NULL; 702 int f; 703#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 704 defined(COMPAT_FREEBSD4) 705 int ival; 706#endif 707 708 switch (cmd) { 709 case TAPSIFINFO: 710 tapp = (struct tapinfo *)data; 711 mtx_lock(&tp->tap_mtx); 712 ifp->if_mtu = tapp->mtu; 713 ifp->if_type = tapp->type; 714 ifp->if_baudrate = tapp->baudrate; 715 mtx_unlock(&tp->tap_mtx); 716 break; 717 718 case TAPGIFINFO: 719 tapp = (struct tapinfo *)data; 720 mtx_lock(&tp->tap_mtx); 721 tapp->mtu = ifp->if_mtu; 722 tapp->type = ifp->if_type; 723 tapp->baudrate = ifp->if_baudrate; 724 mtx_unlock(&tp->tap_mtx); 725 break; 726 727 case TAPSDEBUG: 728 tapdebug = *(int *)data; 729 break; 730 731 case TAPGDEBUG: 732 *(int *)data = tapdebug; 733 break; 734 735 case TAPGIFNAME: { 736 struct ifreq *ifr = (struct ifreq *) data; 737 738 strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 739 } break; 740 741 case FIONBIO: 742 break; 743 744 case FIOASYNC: 745 mtx_lock(&tp->tap_mtx); 746 if (*(int *)data) 747 tp->tap_flags |= TAP_ASYNC; 748 else 749 tp->tap_flags &= ~TAP_ASYNC; 750 mtx_unlock(&tp->tap_mtx); 751 break; 752 753 case FIONREAD: 754 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 755 struct mbuf *mb; 756 757 IFQ_LOCK(&ifp->if_snd); 758 IFQ_POLL_NOLOCK(&ifp->if_snd, mb); 759 for (*(int *)data = 0; mb != NULL; 760 mb = mb->m_next) 761 *(int *)data += mb->m_len; 762 IFQ_UNLOCK(&ifp->if_snd); 763 } else 764 *(int *)data = 0; 765 break; 766 767 case FIOSETOWN: 768 return (fsetown(*(int *)data, &tp->tap_sigio)); 769 770 case FIOGETOWN: 771 *(int *)data = fgetown(&tp->tap_sigio); 772 return (0); 773 774 /* this is deprecated, FIOSETOWN should be used instead */ 775 case TIOCSPGRP: 776 return (fsetown(-(*(int *)data), &tp->tap_sigio)); 777 778 /* this is deprecated, FIOGETOWN should be used instead */ 779 case TIOCGPGRP: 780 *(int *)data = -fgetown(&tp->tap_sigio); 781 return (0); 782 783 /* VMware/VMnet port ioctl's */ 784 785#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 786 defined(COMPAT_FREEBSD4) 787 case _IO('V', 0): 788 ival = IOCPARM_IVAL(data); 789 data = (caddr_t)&ival; 790 /* FALLTHROUGH */ 791#endif 792 case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */ 793 f = *(int *)data; 794 f &= 0x0fff; 795 f &= ~IFF_CANTCHANGE; 796 f |= IFF_UP; 797 798 mtx_lock(&tp->tap_mtx); 799 ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE); 800 mtx_unlock(&tp->tap_mtx); 801 break; 802 803 case OSIOCGIFADDR: /* get MAC address of the remote side */ 804 case SIOCGIFADDR: 805 mtx_lock(&tp->tap_mtx); 806 bcopy(tp->ether_addr, data, sizeof(tp->ether_addr)); 807 mtx_unlock(&tp->tap_mtx); 808 break; 809 810 case SIOCSIFADDR: /* set MAC address of the remote side */ 811 mtx_lock(&tp->tap_mtx); 812 bcopy(data, tp->ether_addr, sizeof(tp->ether_addr)); 813 mtx_unlock(&tp->tap_mtx); 814 break; 815 816 default: 817 return (ENOTTY); 818 } 819 return (0); 820} /* tapioctl */ 821 822 823/* 824 * tapread 825 * 826 * the cdevsw read interface - reads a packet at a time, or at 827 * least as much of a packet as can be read 828 */ 829static int 830tapread(struct cdev *dev, struct uio *uio, int flag) 831{ 832 struct tap_softc *tp = dev->si_drv1; 833 struct ifnet *ifp = tp->tap_ifp; 834 struct mbuf *m = NULL; 835 int error = 0, len; 836 837 TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, dev2unit(dev)); 838 839 mtx_lock(&tp->tap_mtx); 840 if ((tp->tap_flags & TAP_READY) != TAP_READY) { 841 mtx_unlock(&tp->tap_mtx); 842 843 /* Unlocked read. */ 844 TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n", 845 ifp->if_xname, dev2unit(dev), tp->tap_flags); 846 847 return (EHOSTDOWN); 848 } 849 850 tp->tap_flags &= ~TAP_RWAIT; 851 852 /* sleep until we get a packet */ 853 do { 854 IF_DEQUEUE(&ifp->if_snd, m); 855 856 if (m == NULL) { 857 if (flag & O_NONBLOCK) { 858 mtx_unlock(&tp->tap_mtx); 859 return (EWOULDBLOCK); 860 } 861 862 tp->tap_flags |= TAP_RWAIT; 863 error = mtx_sleep(tp, &tp->tap_mtx, PCATCH | (PZERO + 1), 864 "taprd", 0); 865 if (error) { 866 mtx_unlock(&tp->tap_mtx); 867 return (error); 868 } 869 } 870 } while (m == NULL); 871 mtx_unlock(&tp->tap_mtx); 872 873 /* feed packet to bpf */ 874 BPF_MTAP(ifp, m); 875 876 /* xfer packet to user space */ 877 while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) { 878 len = min(uio->uio_resid, m->m_len); 879 if (len == 0) 880 break; 881 882 error = uiomove(mtod(m, void *), len, uio); 883 m = m_free(m); 884 } 885 886 if (m != NULL) { 887 TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname, 888 dev2unit(dev)); 889 m_freem(m); 890 } 891 892 return (error); 893} /* tapread */ 894 895 896/* 897 * tapwrite 898 * 899 * the cdevsw write interface - an atomic write is a packet - or else! 900 */ 901static int 902tapwrite(struct cdev *dev, struct uio *uio, int flag) 903{ 904 struct ether_header *eh; 905 struct tap_softc *tp = dev->si_drv1; 906 struct ifnet *ifp = tp->tap_ifp; 907 struct mbuf *m; 908 909 TAPDEBUG("%s writting, minor = %#x\n", 910 ifp->if_xname, dev2unit(dev)); 911 912 if (uio->uio_resid == 0) 913 return (0); 914 915 if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) { 916 TAPDEBUG("%s invalid packet len = %zd, minor = %#x\n", 917 ifp->if_xname, uio->uio_resid, dev2unit(dev)); 918 919 return (EIO); 920 } 921 922 if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN, 923 M_PKTHDR)) == NULL) { 924 ifp->if_ierrors ++; 925 return (ENOBUFS); 926 } 927 928 m->m_pkthdr.rcvif = ifp; 929 930 /* 931 * Only pass a unicast frame to ether_input(), if it would actually 932 * have been received by non-virtual hardware. 933 */ 934 if (m->m_len < sizeof(struct ether_header)) { 935 m_freem(m); 936 return (0); 937 } 938 eh = mtod(m, struct ether_header *); 939 940 if (eh && (ifp->if_flags & IFF_PROMISC) == 0 && 941 !ETHER_IS_MULTICAST(eh->ether_dhost) && 942 bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) { 943 m_freem(m); 944 return (0); 945 } 946 947 /* Pass packet up to parent. */ 948 (*ifp->if_input)(ifp, m); 949 ifp->if_ipackets ++; /* ibytes are counted in parent */ 950 951 return (0); 952} /* tapwrite */ 953 954 955/* 956 * tappoll 957 * 958 * the poll interface, this is only useful on reads 959 * really. the write detect always returns true, write never blocks 960 * anyway, it either accepts the packet or drops it 961 */ 962static int 963tappoll(struct cdev *dev, int events, struct thread *td) 964{ 965 struct tap_softc *tp = dev->si_drv1; 966 struct ifnet *ifp = tp->tap_ifp; 967 int revents = 0; 968 969 TAPDEBUG("%s polling, minor = %#x\n", 970 ifp->if_xname, dev2unit(dev)); 971 972 if (events & (POLLIN | POLLRDNORM)) { 973 IFQ_LOCK(&ifp->if_snd); 974 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 975 TAPDEBUG("%s have data in queue. len = %d, " \ 976 "minor = %#x\n", ifp->if_xname, 977 ifp->if_snd.ifq_len, dev2unit(dev)); 978 979 revents |= (events & (POLLIN | POLLRDNORM)); 980 } else { 981 TAPDEBUG("%s waiting for data, minor = %#x\n", 982 ifp->if_xname, dev2unit(dev)); 983 984 selrecord(td, &tp->tap_rsel); 985 } 986 IFQ_UNLOCK(&ifp->if_snd); 987 } 988 989 if (events & (POLLOUT | POLLWRNORM)) 990 revents |= (events & (POLLOUT | POLLWRNORM)); 991 992 return (revents); 993} /* tappoll */ 994 995 996/* 997 * tap_kqfilter 998 * 999 * support for kevent() system call 1000 */ 1001static int 1002tapkqfilter(struct cdev *dev, struct knote *kn) 1003{ 1004 struct tap_softc *tp = dev->si_drv1; 1005 struct ifnet *ifp = tp->tap_ifp; 1006 1007 switch (kn->kn_filter) { 1008 case EVFILT_READ: 1009 TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n", 1010 ifp->if_xname, dev2unit(dev)); 1011 kn->kn_fop = &tap_read_filterops; 1012 break; 1013 1014 case EVFILT_WRITE: 1015 TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n", 1016 ifp->if_xname, dev2unit(dev)); 1017 kn->kn_fop = &tap_write_filterops; 1018 break; 1019 1020 default: 1021 TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n", 1022 ifp->if_xname, dev2unit(dev)); 1023 return (EINVAL); 1024 /* NOT REACHED */ 1025 } 1026 1027 kn->kn_hook = tp; 1028 knlist_add(&tp->tap_rsel.si_note, kn, 0); 1029 1030 return (0); 1031} /* tapkqfilter */ 1032 1033 1034/* 1035 * tap_kqread 1036 * 1037 * Return true if there is data in the interface queue 1038 */ 1039static int 1040tapkqread(struct knote *kn, long hint) 1041{ 1042 int ret; 1043 struct tap_softc *tp = kn->kn_hook; 1044 struct cdev *dev = tp->tap_dev; 1045 struct ifnet *ifp = tp->tap_ifp; 1046 1047 if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) { 1048 TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n", 1049 ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); 1050 ret = 1; 1051 } else { 1052 TAPDEBUG("%s waiting for data, minor = %#x\n", 1053 ifp->if_xname, dev2unit(dev)); 1054 ret = 0; 1055 } 1056 1057 return (ret); 1058} /* tapkqread */ 1059 1060 1061/* 1062 * tap_kqwrite 1063 * 1064 * Always can write. Return the MTU in kn->data 1065 */ 1066static int 1067tapkqwrite(struct knote *kn, long hint) 1068{ 1069 struct tap_softc *tp = kn->kn_hook; 1070 struct ifnet *ifp = tp->tap_ifp; 1071 1072 kn->kn_data = ifp->if_mtu; 1073 1074 return (1); 1075} /* tapkqwrite */ 1076 1077 1078static void 1079tapkqdetach(struct knote *kn) 1080{ 1081 struct tap_softc *tp = kn->kn_hook; 1082 1083 knlist_remove(&tp->tap_rsel.si_note, kn, 0); 1084} /* tapkqdetach */ 1085 1086