if_tap.c revision 346803
1/*- 2 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * BASED ON: 27 * ------------------------------------------------------------------------- 28 * 29 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk> 30 * Nottingham University 1987. 31 */ 32 33/* 34 * $FreeBSD: stable/11/sys/net/if_tap.c 346803 2019-04-28 03:51:08Z kevans $ 35 * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $ 36 */ 37 38#include "opt_compat.h" 39#include "opt_inet.h" 40 41#include <sys/param.h> 42#include <sys/conf.h> 43#include <sys/fcntl.h> 44#include <sys/filio.h> 45#include <sys/jail.h> 46#include <sys/kernel.h> 47#include <sys/malloc.h> 48#include <sys/mbuf.h> 49#include <sys/module.h> 50#include <sys/poll.h> 51#include <sys/priv.h> 52#include <sys/proc.h> 53#include <sys/selinfo.h> 54#include <sys/signalvar.h> 55#include <sys/socket.h> 56#include <sys/sockio.h> 57#include <sys/sysctl.h> 58#include <sys/systm.h> 59#include <sys/ttycom.h> 60#include <sys/uio.h> 61#include <sys/queue.h> 62 63#include <net/bpf.h> 64#include <net/ethernet.h> 65#include <net/if.h> 66#include <net/if_var.h> 67#include <net/if_clone.h> 68#include <net/if_dl.h> 69#include <net/if_media.h> 70#include <net/if_types.h> 71#include <net/route.h> 72#include <net/vnet.h> 73 74#include <netinet/in.h> 75 76#include <net/if_tapvar.h> 77#include <net/if_tap.h> 78 79 80#define CDEV_NAME "tap" 81#define TAPDEBUG if (tapdebug) printf 82 83static const char tapname[] = "tap"; 84static const char vmnetname[] = "vmnet"; 85#define TAPMAXUNIT 0x7fff 86#define VMNET_DEV_MASK CLONE_FLAG0 87 88/* module */ 89static int tapmodevent(module_t, int, void *); 90 91/* device */ 92static void tapclone(void *, struct ucred *, char *, int, 93 struct cdev **); 94static void tapcreate(struct cdev *); 95 96/* network interface */ 97static void tapifstart(struct ifnet *); 98static int tapifioctl(struct ifnet *, u_long, caddr_t); 99static void tapifinit(void *); 100 101static int tap_clone_create(struct if_clone *, int, caddr_t); 102static void tap_clone_destroy(struct ifnet *); 103static struct if_clone *tap_cloner; 104static int vmnet_clone_create(struct if_clone *, int, caddr_t); 105static void vmnet_clone_destroy(struct ifnet *); 106static struct if_clone *vmnet_cloner; 107 108/* character device */ 109static d_open_t tapopen; 110static d_close_t tapclose; 111static d_read_t tapread; 112static d_write_t tapwrite; 113static d_ioctl_t tapioctl; 114static d_poll_t tappoll; 115static d_kqfilter_t tapkqfilter; 116 117/* kqueue(2) */ 118static int tapkqread(struct knote *, long); 119static int tapkqwrite(struct knote *, long); 120static void tapkqdetach(struct knote *); 121 122static struct filterops tap_read_filterops = { 123 .f_isfd = 1, 124 .f_attach = NULL, 125 .f_detach = tapkqdetach, 126 .f_event = tapkqread, 127}; 128 129static struct filterops tap_write_filterops = { 130 .f_isfd = 1, 131 .f_attach = NULL, 132 .f_detach = tapkqdetach, 133 .f_event = tapkqwrite, 134}; 135 136static struct cdevsw tap_cdevsw = { 137 .d_version = D_VERSION, 138 .d_flags = D_NEEDMINOR, 139 .d_open = tapopen, 140 .d_close = tapclose, 141 .d_read = tapread, 142 .d_write = tapwrite, 143 .d_ioctl = tapioctl, 144 .d_poll = tappoll, 145 .d_name = CDEV_NAME, 146 .d_kqfilter = tapkqfilter, 147}; 148 149/* 150 * All global variables in if_tap.c are locked with tapmtx, with the 151 * exception of tapdebug, which is accessed unlocked; tapclones is 152 * static at runtime. 153 */ 154static struct mtx tapmtx; 155static int tapdebug = 0; /* debug flag */ 156static int tapuopen = 0; /* allow user open() */ 157static int tapuponopen = 0; /* IFF_UP on open() */ 158static int tapdclone = 1; /* enable devfs cloning */ 159static SLIST_HEAD(, tap_softc) taphead; /* first device */ 160static struct clonedevs *tapclones; 161 162MALLOC_DECLARE(M_TAP); 163MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface"); 164SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, ""); 165 166SYSCTL_DECL(_net_link); 167static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0, 168 "Ethernet tunnel software network interface"); 169SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0, 170 "Allow user to open /dev/tap (based on node permissions)"); 171SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0, 172 "Bring interface up when /dev/tap is opened"); 173SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0, 174 "Enable legacy devfs interface creation"); 175SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, ""); 176 177DEV_MODULE(if_tap, tapmodevent, NULL); 178MODULE_VERSION(if_tap, 1); 179 180static int 181tap_clone_create(struct if_clone *ifc, int unit, caddr_t params) 182{ 183 struct cdev *dev; 184 int i; 185 186 /* Find any existing device, or allocate new unit number. */ 187 i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, 0); 188 if (i) { 189 dev = make_dev(&tap_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600, 190 "%s%d", tapname, unit); 191 } 192 193 tapcreate(dev); 194 return (0); 195} 196 197/* vmnet devices are tap devices in disguise */ 198static int 199vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params) 200{ 201 struct cdev *dev; 202 int i; 203 204 /* Find any existing device, or allocate new unit number. */ 205 i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, VMNET_DEV_MASK); 206 if (i) { 207 dev = make_dev(&tap_cdevsw, unit | VMNET_DEV_MASK, UID_ROOT, 208 GID_WHEEL, 0600, "%s%d", vmnetname, unit); 209 } 210 211 tapcreate(dev); 212 return (0); 213} 214 215static void 216tap_destroy(struct tap_softc *tp) 217{ 218 struct ifnet *ifp = tp->tap_ifp; 219 220 CURVNET_SET(ifp->if_vnet); 221 destroy_dev(tp->tap_dev); 222 seldrain(&tp->tap_rsel); 223 knlist_clear(&tp->tap_rsel.si_note, 0); 224 knlist_destroy(&tp->tap_rsel.si_note); 225 ether_ifdetach(ifp); 226 if_free(ifp); 227 228 mtx_destroy(&tp->tap_mtx); 229 free(tp, M_TAP); 230 CURVNET_RESTORE(); 231} 232 233static void 234tap_clone_destroy(struct ifnet *ifp) 235{ 236 struct tap_softc *tp = ifp->if_softc; 237 238 mtx_lock(&tapmtx); 239 SLIST_REMOVE(&taphead, tp, tap_softc, tap_next); 240 mtx_unlock(&tapmtx); 241 tap_destroy(tp); 242} 243 244/* vmnet devices are tap devices in disguise */ 245static void 246vmnet_clone_destroy(struct ifnet *ifp) 247{ 248 tap_clone_destroy(ifp); 249} 250 251/* 252 * tapmodevent 253 * 254 * module event handler 255 */ 256static int 257tapmodevent(module_t mod, int type, void *data) 258{ 259 static eventhandler_tag eh_tag = NULL; 260 struct tap_softc *tp = NULL; 261 struct ifnet *ifp = NULL; 262 263 switch (type) { 264 case MOD_LOAD: 265 266 /* intitialize device */ 267 268 mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF); 269 SLIST_INIT(&taphead); 270 271 clone_setup(&tapclones); 272 eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000); 273 if (eh_tag == NULL) { 274 clone_cleanup(&tapclones); 275 mtx_destroy(&tapmtx); 276 return (ENOMEM); 277 } 278 tap_cloner = if_clone_simple(tapname, tap_clone_create, 279 tap_clone_destroy, 0); 280 vmnet_cloner = if_clone_simple(vmnetname, vmnet_clone_create, 281 vmnet_clone_destroy, 0); 282 return (0); 283 284 case MOD_UNLOAD: 285 /* 286 * The EBUSY algorithm here can't quite atomically 287 * guarantee that this is race-free since we have to 288 * release the tap mtx to deregister the clone handler. 289 */ 290 mtx_lock(&tapmtx); 291 SLIST_FOREACH(tp, &taphead, tap_next) { 292 mtx_lock(&tp->tap_mtx); 293 if (tp->tap_flags & TAP_OPEN) { 294 mtx_unlock(&tp->tap_mtx); 295 mtx_unlock(&tapmtx); 296 return (EBUSY); 297 } 298 mtx_unlock(&tp->tap_mtx); 299 } 300 mtx_unlock(&tapmtx); 301 302 EVENTHANDLER_DEREGISTER(dev_clone, eh_tag); 303 if_clone_detach(tap_cloner); 304 if_clone_detach(vmnet_cloner); 305 drain_dev_clone_events(); 306 307 mtx_lock(&tapmtx); 308 while ((tp = SLIST_FIRST(&taphead)) != NULL) { 309 SLIST_REMOVE_HEAD(&taphead, tap_next); 310 mtx_unlock(&tapmtx); 311 312 ifp = tp->tap_ifp; 313 314 TAPDEBUG("detaching %s\n", ifp->if_xname); 315 316 tap_destroy(tp); 317 mtx_lock(&tapmtx); 318 } 319 mtx_unlock(&tapmtx); 320 clone_cleanup(&tapclones); 321 322 mtx_destroy(&tapmtx); 323 324 break; 325 326 default: 327 return (EOPNOTSUPP); 328 } 329 330 return (0); 331} /* tapmodevent */ 332 333 334/* 335 * DEVFS handler 336 * 337 * We need to support two kind of devices - tap and vmnet 338 */ 339static void 340tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev) 341{ 342 char devname[SPECNAMELEN + 1]; 343 int i, unit, append_unit; 344 int extra; 345 346 if (*dev != NULL) 347 return; 348 349 if (!tapdclone || 350 (!tapuopen && priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)) 351 return; 352 353 unit = 0; 354 append_unit = 0; 355 extra = 0; 356 357 /* We're interested in only tap/vmnet devices. */ 358 if (strcmp(name, tapname) == 0) { 359 unit = -1; 360 } else if (strcmp(name, vmnetname) == 0) { 361 unit = -1; 362 extra = VMNET_DEV_MASK; 363 } else if (dev_stdclone(name, NULL, tapname, &unit) != 1) { 364 if (dev_stdclone(name, NULL, vmnetname, &unit) != 1) { 365 return; 366 } else { 367 extra = VMNET_DEV_MASK; 368 } 369 } 370 371 if (unit == -1) 372 append_unit = 1; 373 374 CURVNET_SET(CRED_TO_VNET(cred)); 375 /* find any existing device, or allocate new unit number */ 376 i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra); 377 if (i) { 378 if (append_unit) { 379 /* 380 * We were passed 'tun' or 'tap', with no unit specified 381 * so we'll need to append it now. 382 */ 383 namelen = snprintf(devname, sizeof(devname), "%s%d", name, 384 unit); 385 name = devname; 386 } 387 388 *dev = make_dev_credf(MAKEDEV_REF, &tap_cdevsw, unit | extra, 389 cred, UID_ROOT, GID_WHEEL, 0600, "%s", name); 390 } 391 392 if_clone_create(name, namelen, NULL); 393 CURVNET_RESTORE(); 394} /* tapclone */ 395 396 397/* 398 * tapcreate 399 * 400 * to create interface 401 */ 402static void 403tapcreate(struct cdev *dev) 404{ 405 struct ifnet *ifp = NULL; 406 struct tap_softc *tp = NULL; 407 unsigned short macaddr_hi; 408 uint32_t macaddr_mid; 409 int unit; 410 const char *name = NULL; 411 u_char eaddr[6]; 412 413 /* allocate driver storage and create device */ 414 tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO); 415 mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF); 416 mtx_lock(&tapmtx); 417 SLIST_INSERT_HEAD(&taphead, tp, tap_next); 418 mtx_unlock(&tapmtx); 419 420 unit = dev2unit(dev); 421 422 /* select device: tap or vmnet */ 423 if (unit & VMNET_DEV_MASK) { 424 name = vmnetname; 425 tp->tap_flags |= TAP_VMNET; 426 } else 427 name = tapname; 428 429 unit &= TAPMAXUNIT; 430 431 TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, dev2unit(dev)); 432 433 /* generate fake MAC address: 00 bd xx xx xx unit_no */ 434 macaddr_hi = htons(0x00bd); 435 macaddr_mid = (uint32_t) ticks; 436 bcopy(&macaddr_hi, eaddr, sizeof(short)); 437 bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t)); 438 eaddr[5] = (u_char)unit; 439 440 /* fill the rest and attach interface */ 441 ifp = tp->tap_ifp = if_alloc(IFT_ETHER); 442 if (ifp == NULL) 443 panic("%s%d: can not if_alloc()", name, unit); 444 ifp->if_softc = tp; 445 if_initname(ifp, name, unit); 446 ifp->if_init = tapifinit; 447 ifp->if_start = tapifstart; 448 ifp->if_ioctl = tapifioctl; 449 ifp->if_mtu = ETHERMTU; 450 ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST); 451 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 452 ifp->if_capabilities |= IFCAP_LINKSTATE; 453 ifp->if_capenable |= IFCAP_LINKSTATE; 454 455 dev->si_drv1 = tp; 456 tp->tap_dev = dev; 457 458 ether_ifattach(ifp, eaddr); 459 460 mtx_lock(&tp->tap_mtx); 461 tp->tap_flags |= TAP_INITED; 462 mtx_unlock(&tp->tap_mtx); 463 464 knlist_init_mtx(&tp->tap_rsel.si_note, &tp->tap_mtx); 465 466 TAPDEBUG("interface %s is created. minor = %#x\n", 467 ifp->if_xname, dev2unit(dev)); 468} /* tapcreate */ 469 470 471/* 472 * tapopen 473 * 474 * to open tunnel. must be superuser 475 */ 476static int 477tapopen(struct cdev *dev, int flag, int mode, struct thread *td) 478{ 479 struct tap_softc *tp = NULL; 480 struct ifnet *ifp = NULL; 481 int error; 482 483 if (tapuopen == 0) { 484 error = priv_check(td, PRIV_NET_TAP); 485 if (error) 486 return (error); 487 } 488 489 if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT) 490 return (ENXIO); 491 492 tp = dev->si_drv1; 493 494 mtx_lock(&tp->tap_mtx); 495 if (tp->tap_flags & TAP_OPEN) { 496 mtx_unlock(&tp->tap_mtx); 497 return (EBUSY); 498 } 499 500 bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr)); 501 tp->tap_pid = td->td_proc->p_pid; 502 tp->tap_flags |= TAP_OPEN; 503 ifp = tp->tap_ifp; 504 505 ifp->if_drv_flags |= IFF_DRV_RUNNING; 506 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 507 if (tapuponopen) 508 ifp->if_flags |= IFF_UP; 509 if_link_state_change(ifp, LINK_STATE_UP); 510 mtx_unlock(&tp->tap_mtx); 511 512 TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, dev2unit(dev)); 513 514 return (0); 515} /* tapopen */ 516 517 518/* 519 * tapclose 520 * 521 * close the device - mark i/f down & delete routing info 522 */ 523static int 524tapclose(struct cdev *dev, int foo, int bar, struct thread *td) 525{ 526 struct ifaddr *ifa; 527 struct tap_softc *tp = dev->si_drv1; 528 struct ifnet *ifp = tp->tap_ifp; 529 530 /* junk all pending output */ 531 mtx_lock(&tp->tap_mtx); 532 CURVNET_SET(ifp->if_vnet); 533 IF_DRAIN(&ifp->if_snd); 534 535 /* 536 * Do not bring the interface down, and do not anything with 537 * interface, if we are in VMnet mode. Just close the device. 538 */ 539 if (((tp->tap_flags & TAP_VMNET) == 0) && 540 (ifp->if_flags & (IFF_UP | IFF_LINK0)) == IFF_UP) { 541 mtx_unlock(&tp->tap_mtx); 542 if_down(ifp); 543 mtx_lock(&tp->tap_mtx); 544 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 545 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 546 mtx_unlock(&tp->tap_mtx); 547 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 548 rtinit(ifa, (int)RTM_DELETE, 0); 549 } 550 if_purgeaddrs(ifp); 551 mtx_lock(&tp->tap_mtx); 552 } 553 } 554 555 if_link_state_change(ifp, LINK_STATE_DOWN); 556 CURVNET_RESTORE(); 557 558 funsetown(&tp->tap_sigio); 559 selwakeuppri(&tp->tap_rsel, PZERO+1); 560 KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); 561 562 tp->tap_flags &= ~TAP_OPEN; 563 tp->tap_pid = 0; 564 mtx_unlock(&tp->tap_mtx); 565 566 TAPDEBUG("%s is closed. minor = %#x\n", 567 ifp->if_xname, dev2unit(dev)); 568 569 return (0); 570} /* tapclose */ 571 572 573/* 574 * tapifinit 575 * 576 * network interface initialization function 577 */ 578static void 579tapifinit(void *xtp) 580{ 581 struct tap_softc *tp = (struct tap_softc *)xtp; 582 struct ifnet *ifp = tp->tap_ifp; 583 584 TAPDEBUG("initializing %s\n", ifp->if_xname); 585 586 mtx_lock(&tp->tap_mtx); 587 ifp->if_drv_flags |= IFF_DRV_RUNNING; 588 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 589 mtx_unlock(&tp->tap_mtx); 590 591 /* attempt to start output */ 592 tapifstart(ifp); 593} /* tapifinit */ 594 595 596/* 597 * tapifioctl 598 * 599 * Process an ioctl request on network interface 600 */ 601static int 602tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 603{ 604 struct tap_softc *tp = ifp->if_softc; 605 struct ifreq *ifr = (struct ifreq *)data; 606 struct ifstat *ifs = NULL; 607 struct ifmediareq *ifmr = NULL; 608 int dummy, error = 0; 609 610 switch (cmd) { 611 case SIOCSIFFLAGS: /* XXX -- just like vmnet does */ 612 case SIOCADDMULTI: 613 case SIOCDELMULTI: 614 break; 615 616 case SIOCGIFMEDIA: 617 ifmr = (struct ifmediareq *)data; 618 dummy = ifmr->ifm_count; 619 ifmr->ifm_count = 1; 620 ifmr->ifm_status = IFM_AVALID; 621 ifmr->ifm_active = IFM_ETHER; 622 if (tp->tap_flags & TAP_OPEN) 623 ifmr->ifm_status |= IFM_ACTIVE; 624 ifmr->ifm_current = ifmr->ifm_active; 625 if (dummy >= 1) { 626 int media = IFM_ETHER; 627 error = copyout(&media, ifmr->ifm_ulist, 628 sizeof(int)); 629 } 630 break; 631 632 case SIOCSIFMTU: 633 ifp->if_mtu = ifr->ifr_mtu; 634 break; 635 636 case SIOCGIFSTATUS: 637 ifs = (struct ifstat *)data; 638 mtx_lock(&tp->tap_mtx); 639 if (tp->tap_pid != 0) 640 snprintf(ifs->ascii, sizeof(ifs->ascii), 641 "\tOpened by PID %d\n", tp->tap_pid); 642 else 643 ifs->ascii[0] = '\0'; 644 mtx_unlock(&tp->tap_mtx); 645 break; 646 647 default: 648 error = ether_ioctl(ifp, cmd, data); 649 break; 650 } 651 652 return (error); 653} /* tapifioctl */ 654 655 656/* 657 * tapifstart 658 * 659 * queue packets from higher level ready to put out 660 */ 661static void 662tapifstart(struct ifnet *ifp) 663{ 664 struct tap_softc *tp = ifp->if_softc; 665 666 TAPDEBUG("%s starting\n", ifp->if_xname); 667 668 /* 669 * do not junk pending output if we are in VMnet mode. 670 * XXX: can this do any harm because of queue overflow? 671 */ 672 673 mtx_lock(&tp->tap_mtx); 674 if (((tp->tap_flags & TAP_VMNET) == 0) && 675 ((tp->tap_flags & TAP_READY) != TAP_READY)) { 676 struct mbuf *m; 677 678 /* Unlocked read. */ 679 TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname, 680 tp->tap_flags); 681 682 for (;;) { 683 IF_DEQUEUE(&ifp->if_snd, m); 684 if (m != NULL) { 685 m_freem(m); 686 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 687 } else 688 break; 689 } 690 mtx_unlock(&tp->tap_mtx); 691 692 return; 693 } 694 695 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 696 697 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 698 if (tp->tap_flags & TAP_RWAIT) { 699 tp->tap_flags &= ~TAP_RWAIT; 700 wakeup(tp); 701 } 702 703 if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) { 704 mtx_unlock(&tp->tap_mtx); 705 pgsigio(&tp->tap_sigio, SIGIO, 0); 706 mtx_lock(&tp->tap_mtx); 707 } 708 709 selwakeuppri(&tp->tap_rsel, PZERO+1); 710 KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); 711 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */ 712 } 713 714 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 715 mtx_unlock(&tp->tap_mtx); 716} /* tapifstart */ 717 718 719/* 720 * tapioctl 721 * 722 * the cdevsw interface is now pretty minimal 723 */ 724static int 725tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) 726{ 727 struct ifreq ifr; 728 struct tap_softc *tp = dev->si_drv1; 729 struct ifnet *ifp = tp->tap_ifp; 730 struct tapinfo *tapp = NULL; 731 int f; 732 int error; 733#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 734 defined(COMPAT_FREEBSD4) 735 int ival; 736#endif 737 738 switch (cmd) { 739 case TAPSIFINFO: 740 tapp = (struct tapinfo *)data; 741 if (ifp->if_type != tapp->type) 742 return (EPROTOTYPE); 743 mtx_lock(&tp->tap_mtx); 744 if (ifp->if_mtu != tapp->mtu) { 745 strncpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ); 746 ifr.ifr_mtu = tapp->mtu; 747 CURVNET_SET(ifp->if_vnet); 748 error = ifhwioctl(SIOCSIFMTU, ifp, 749 (caddr_t)&ifr, td); 750 CURVNET_RESTORE(); 751 if (error) { 752 mtx_unlock(&tp->tap_mtx); 753 return (error); 754 } 755 } 756 ifp->if_baudrate = tapp->baudrate; 757 mtx_unlock(&tp->tap_mtx); 758 break; 759 760 case TAPGIFINFO: 761 tapp = (struct tapinfo *)data; 762 mtx_lock(&tp->tap_mtx); 763 tapp->mtu = ifp->if_mtu; 764 tapp->type = ifp->if_type; 765 tapp->baudrate = ifp->if_baudrate; 766 mtx_unlock(&tp->tap_mtx); 767 break; 768 769 case TAPSDEBUG: 770 tapdebug = *(int *)data; 771 break; 772 773 case TAPGDEBUG: 774 *(int *)data = tapdebug; 775 break; 776 777 case TAPGIFNAME: { 778 struct ifreq *ifr = (struct ifreq *) data; 779 780 strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 781 } break; 782 783 case FIONBIO: 784 break; 785 786 case FIOASYNC: 787 mtx_lock(&tp->tap_mtx); 788 if (*(int *)data) 789 tp->tap_flags |= TAP_ASYNC; 790 else 791 tp->tap_flags &= ~TAP_ASYNC; 792 mtx_unlock(&tp->tap_mtx); 793 break; 794 795 case FIONREAD: 796 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 797 struct mbuf *mb; 798 799 IFQ_LOCK(&ifp->if_snd); 800 IFQ_POLL_NOLOCK(&ifp->if_snd, mb); 801 for (*(int *)data = 0; mb != NULL; 802 mb = mb->m_next) 803 *(int *)data += mb->m_len; 804 IFQ_UNLOCK(&ifp->if_snd); 805 } else 806 *(int *)data = 0; 807 break; 808 809 case FIOSETOWN: 810 return (fsetown(*(int *)data, &tp->tap_sigio)); 811 812 case FIOGETOWN: 813 *(int *)data = fgetown(&tp->tap_sigio); 814 return (0); 815 816 /* this is deprecated, FIOSETOWN should be used instead */ 817 case TIOCSPGRP: 818 return (fsetown(-(*(int *)data), &tp->tap_sigio)); 819 820 /* this is deprecated, FIOGETOWN should be used instead */ 821 case TIOCGPGRP: 822 *(int *)data = -fgetown(&tp->tap_sigio); 823 return (0); 824 825 /* VMware/VMnet port ioctl's */ 826 827#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 828 defined(COMPAT_FREEBSD4) 829 case _IO('V', 0): 830 ival = IOCPARM_IVAL(data); 831 data = (caddr_t)&ival; 832 /* FALLTHROUGH */ 833#endif 834 case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */ 835 f = *(int *)data; 836 f &= 0x0fff; 837 f &= ~IFF_CANTCHANGE; 838 f |= IFF_UP; 839 840 mtx_lock(&tp->tap_mtx); 841 ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE); 842 mtx_unlock(&tp->tap_mtx); 843 break; 844 845 case SIOCGIFADDR: /* get MAC address of the remote side */ 846 mtx_lock(&tp->tap_mtx); 847 bcopy(tp->ether_addr, data, sizeof(tp->ether_addr)); 848 mtx_unlock(&tp->tap_mtx); 849 break; 850 851 case SIOCSIFADDR: /* set MAC address of the remote side */ 852 mtx_lock(&tp->tap_mtx); 853 bcopy(data, tp->ether_addr, sizeof(tp->ether_addr)); 854 mtx_unlock(&tp->tap_mtx); 855 break; 856 857 default: 858 return (ENOTTY); 859 } 860 return (0); 861} /* tapioctl */ 862 863 864/* 865 * tapread 866 * 867 * the cdevsw read interface - reads a packet at a time, or at 868 * least as much of a packet as can be read 869 */ 870static int 871tapread(struct cdev *dev, struct uio *uio, int flag) 872{ 873 struct tap_softc *tp = dev->si_drv1; 874 struct ifnet *ifp = tp->tap_ifp; 875 struct mbuf *m = NULL; 876 int error = 0, len; 877 878 TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, dev2unit(dev)); 879 880 mtx_lock(&tp->tap_mtx); 881 if ((tp->tap_flags & TAP_READY) != TAP_READY) { 882 mtx_unlock(&tp->tap_mtx); 883 884 /* Unlocked read. */ 885 TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n", 886 ifp->if_xname, dev2unit(dev), tp->tap_flags); 887 888 return (EHOSTDOWN); 889 } 890 891 tp->tap_flags &= ~TAP_RWAIT; 892 893 /* sleep until we get a packet */ 894 do { 895 IF_DEQUEUE(&ifp->if_snd, m); 896 897 if (m == NULL) { 898 if (flag & O_NONBLOCK) { 899 mtx_unlock(&tp->tap_mtx); 900 return (EWOULDBLOCK); 901 } 902 903 tp->tap_flags |= TAP_RWAIT; 904 error = mtx_sleep(tp, &tp->tap_mtx, PCATCH | (PZERO + 1), 905 "taprd", 0); 906 if (error) { 907 mtx_unlock(&tp->tap_mtx); 908 return (error); 909 } 910 } 911 } while (m == NULL); 912 mtx_unlock(&tp->tap_mtx); 913 914 /* feed packet to bpf */ 915 BPF_MTAP(ifp, m); 916 917 /* xfer packet to user space */ 918 while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) { 919 len = min(uio->uio_resid, m->m_len); 920 if (len == 0) 921 break; 922 923 error = uiomove(mtod(m, void *), len, uio); 924 m = m_free(m); 925 } 926 927 if (m != NULL) { 928 TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname, 929 dev2unit(dev)); 930 m_freem(m); 931 } 932 933 return (error); 934} /* tapread */ 935 936 937/* 938 * tapwrite 939 * 940 * the cdevsw write interface - an atomic write is a packet - or else! 941 */ 942static int 943tapwrite(struct cdev *dev, struct uio *uio, int flag) 944{ 945 struct ether_header *eh; 946 struct tap_softc *tp = dev->si_drv1; 947 struct ifnet *ifp = tp->tap_ifp; 948 struct mbuf *m; 949 950 TAPDEBUG("%s writing, minor = %#x\n", 951 ifp->if_xname, dev2unit(dev)); 952 953 if (uio->uio_resid == 0) 954 return (0); 955 956 if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) { 957 TAPDEBUG("%s invalid packet len = %zd, minor = %#x\n", 958 ifp->if_xname, uio->uio_resid, dev2unit(dev)); 959 960 return (EIO); 961 } 962 963 if ((m = m_uiotombuf(uio, M_NOWAIT, 0, ETHER_ALIGN, 964 M_PKTHDR)) == NULL) { 965 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 966 return (ENOBUFS); 967 } 968 969 m->m_pkthdr.rcvif = ifp; 970 971 /* 972 * Only pass a unicast frame to ether_input(), if it would actually 973 * have been received by non-virtual hardware. 974 */ 975 if (m->m_len < sizeof(struct ether_header)) { 976 m_freem(m); 977 return (0); 978 } 979 eh = mtod(m, struct ether_header *); 980 981 if (eh && (ifp->if_flags & IFF_PROMISC) == 0 && 982 !ETHER_IS_MULTICAST(eh->ether_dhost) && 983 bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) { 984 m_freem(m); 985 return (0); 986 } 987 988 /* Pass packet up to parent. */ 989 CURVNET_SET(ifp->if_vnet); 990 (*ifp->if_input)(ifp, m); 991 CURVNET_RESTORE(); 992 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* ibytes are counted in parent */ 993 994 return (0); 995} /* tapwrite */ 996 997 998/* 999 * tappoll 1000 * 1001 * the poll interface, this is only useful on reads 1002 * really. the write detect always returns true, write never blocks 1003 * anyway, it either accepts the packet or drops it 1004 */ 1005static int 1006tappoll(struct cdev *dev, int events, struct thread *td) 1007{ 1008 struct tap_softc *tp = dev->si_drv1; 1009 struct ifnet *ifp = tp->tap_ifp; 1010 int revents = 0; 1011 1012 TAPDEBUG("%s polling, minor = %#x\n", 1013 ifp->if_xname, dev2unit(dev)); 1014 1015 if (events & (POLLIN | POLLRDNORM)) { 1016 IFQ_LOCK(&ifp->if_snd); 1017 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 1018 TAPDEBUG("%s have data in queue. len = %d, " \ 1019 "minor = %#x\n", ifp->if_xname, 1020 ifp->if_snd.ifq_len, dev2unit(dev)); 1021 1022 revents |= (events & (POLLIN | POLLRDNORM)); 1023 } else { 1024 TAPDEBUG("%s waiting for data, minor = %#x\n", 1025 ifp->if_xname, dev2unit(dev)); 1026 1027 selrecord(td, &tp->tap_rsel); 1028 } 1029 IFQ_UNLOCK(&ifp->if_snd); 1030 } 1031 1032 if (events & (POLLOUT | POLLWRNORM)) 1033 revents |= (events & (POLLOUT | POLLWRNORM)); 1034 1035 return (revents); 1036} /* tappoll */ 1037 1038 1039/* 1040 * tap_kqfilter 1041 * 1042 * support for kevent() system call 1043 */ 1044static int 1045tapkqfilter(struct cdev *dev, struct knote *kn) 1046{ 1047 struct tap_softc *tp = dev->si_drv1; 1048 struct ifnet *ifp = tp->tap_ifp; 1049 1050 switch (kn->kn_filter) { 1051 case EVFILT_READ: 1052 TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n", 1053 ifp->if_xname, dev2unit(dev)); 1054 kn->kn_fop = &tap_read_filterops; 1055 break; 1056 1057 case EVFILT_WRITE: 1058 TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n", 1059 ifp->if_xname, dev2unit(dev)); 1060 kn->kn_fop = &tap_write_filterops; 1061 break; 1062 1063 default: 1064 TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n", 1065 ifp->if_xname, dev2unit(dev)); 1066 return (EINVAL); 1067 /* NOT REACHED */ 1068 } 1069 1070 kn->kn_hook = tp; 1071 knlist_add(&tp->tap_rsel.si_note, kn, 0); 1072 1073 return (0); 1074} /* tapkqfilter */ 1075 1076 1077/* 1078 * tap_kqread 1079 * 1080 * Return true if there is data in the interface queue 1081 */ 1082static int 1083tapkqread(struct knote *kn, long hint) 1084{ 1085 int ret; 1086 struct tap_softc *tp = kn->kn_hook; 1087 struct cdev *dev = tp->tap_dev; 1088 struct ifnet *ifp = tp->tap_ifp; 1089 1090 if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) { 1091 TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n", 1092 ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); 1093 ret = 1; 1094 } else { 1095 TAPDEBUG("%s waiting for data, minor = %#x\n", 1096 ifp->if_xname, dev2unit(dev)); 1097 ret = 0; 1098 } 1099 1100 return (ret); 1101} /* tapkqread */ 1102 1103 1104/* 1105 * tap_kqwrite 1106 * 1107 * Always can write. Return the MTU in kn->data 1108 */ 1109static int 1110tapkqwrite(struct knote *kn, long hint) 1111{ 1112 struct tap_softc *tp = kn->kn_hook; 1113 struct ifnet *ifp = tp->tap_ifp; 1114 1115 kn->kn_data = ifp->if_mtu; 1116 1117 return (1); 1118} /* tapkqwrite */ 1119 1120 1121static void 1122tapkqdetach(struct knote *kn) 1123{ 1124 struct tap_softc *tp = kn->kn_hook; 1125 1126 knlist_remove(&tp->tap_rsel.si_note, kn, 0); 1127} /* tapkqdetach */ 1128 1129