1/*- 2 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * BASED ON: 27 * ------------------------------------------------------------------------- 28 * 29 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk> 30 * Nottingham University 1987. 31 */ 32 33/* 34 * $FreeBSD: stable/11/sys/net/if_tap.c 348126 2019-05-22 22:56:05Z kevans $ 35 * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $ 36 */ 37 38#include "opt_compat.h" 39#include "opt_inet.h" 40 41#include <sys/param.h> 42#include <sys/conf.h> 43#include <sys/lock.h> 44#include <sys/fcntl.h> 45#include <sys/filio.h> 46#include <sys/jail.h> 47#include <sys/kernel.h> 48#include <sys/malloc.h> 49#include <sys/mbuf.h> 50#include <sys/module.h> 51#include <sys/poll.h> 52#include <sys/priv.h> 53#include <sys/proc.h> 54#include <sys/selinfo.h> 55#include <sys/signalvar.h> 56#include <sys/socket.h> 57#include <sys/sockio.h> 58#include <sys/sx.h> 59#include <sys/sysctl.h> 60#include <sys/systm.h> 61#include <sys/ttycom.h> 62#include <sys/uio.h> 63#include <sys/queue.h> 64 65#include <net/bpf.h> 66#include <net/ethernet.h> 67#include <net/if.h> 68#include <net/if_var.h> 69#include <net/if_clone.h> 70#include <net/if_dl.h> 71#include <net/if_media.h> 72#include <net/if_types.h> 73#include <net/route.h> 74#include <net/vnet.h> 75 76#include <netinet/in.h> 77 78#include <net/if_tapvar.h> 79#include <net/if_tap.h> 80 81 82#define CDEV_NAME "tap" 83#define TAPDEBUG if (tapdebug) printf 84 85static const char tapname[] = "tap"; 86static const char vmnetname[] = "vmnet"; 87#define TAPMAXUNIT 0x7fff 88#define VMNET_DEV_MASK CLONE_FLAG0 89 90/* module */ 91static int tapmodevent(module_t, int, void *); 92 93/* device */ 94static void tapclone(void *, struct ucred *, char *, int, 95 struct cdev **); 96static void tapcreate(struct cdev *); 97 98/* network interface */ 99static void tapifstart(struct ifnet *); 100static int tapifioctl(struct ifnet *, u_long, caddr_t); 101static void tapifinit(void *); 102 103static int tap_clone_create(struct if_clone *, int, caddr_t); 104static void tap_clone_destroy(struct ifnet *); 105static struct if_clone *tap_cloner; 106static int vmnet_clone_create(struct if_clone *, int, caddr_t); 107static void vmnet_clone_destroy(struct ifnet *); 108static struct if_clone *vmnet_cloner; 109 110/* character device */ 111static d_open_t tapopen; 112static d_close_t tapclose; 113static d_read_t tapread; 114static d_write_t tapwrite; 115static d_ioctl_t tapioctl; 116static d_poll_t tappoll; 117static d_kqfilter_t tapkqfilter; 118 119/* kqueue(2) */ 120static int tapkqread(struct knote *, long); 121static int tapkqwrite(struct knote *, long); 122static void tapkqdetach(struct knote *); 123 124static struct filterops tap_read_filterops = { 125 .f_isfd = 1, 126 .f_attach = NULL, 127 .f_detach = tapkqdetach, 128 .f_event = tapkqread, 129}; 130 131static struct filterops tap_write_filterops = { 132 .f_isfd = 1, 133 .f_attach = NULL, 134 .f_detach = tapkqdetach, 135 .f_event = tapkqwrite, 136}; 137 138static struct cdevsw tap_cdevsw = { 139 .d_version = D_VERSION, 140 .d_flags = D_NEEDMINOR, 141 .d_open = tapopen, 142 .d_close = tapclose, 143 .d_read = tapread, 144 .d_write = tapwrite, 145 .d_ioctl = tapioctl, 146 .d_poll = tappoll, 147 .d_name = CDEV_NAME, 148 .d_kqfilter = tapkqfilter, 149}; 150 151/* 152 * All global variables in if_tap.c are locked with tapmtx, with the 153 * exception of tapdebug, which is accessed unlocked; tapclones is 154 * static at runtime. 155 */ 156static struct mtx tapmtx; 157static int tapdebug = 0; /* debug flag */ 158static int tapuopen = 0; /* allow user open() */ 159static int tapuponopen = 0; /* IFF_UP on open() */ 160static int tapdclone = 1; /* enable devfs cloning */ 161static SLIST_HEAD(, tap_softc) taphead; /* first device */ 162static struct clonedevs *tapclones; 163 164MALLOC_DECLARE(M_TAP); 165MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface"); 166SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, ""); 167 168static struct sx tap_ioctl_sx; 169SX_SYSINIT(tap_ioctl_sx, &tap_ioctl_sx, "tap_ioctl"); 170 171SYSCTL_DECL(_net_link); 172static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0, 173 "Ethernet tunnel software network interface"); 174SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0, 175 "Allow user to open /dev/tap (based on node permissions)"); 176SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0, 177 "Bring interface up when /dev/tap is opened"); 178SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0, 179 "Enable legacy devfs interface creation"); 180SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, ""); 181 182DEV_MODULE(if_tap, tapmodevent, NULL); 183MODULE_VERSION(if_tap, 1); 184 185static int 186tap_clone_create(struct if_clone *ifc, int unit, caddr_t params) 187{ 188 struct cdev *dev; 189 int i; 190 191 /* Find any existing device, or allocate new unit number. */ 192 i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, 0); 193 if (i) { 194 dev = make_dev(&tap_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600, 195 "%s%d", tapname, unit); 196 } 197 198 tapcreate(dev); 199 return (0); 200} 201 202/* vmnet devices are tap devices in disguise */ 203static int 204vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params) 205{ 206 struct cdev *dev; 207 int i; 208 209 /* Find any existing device, or allocate new unit number. */ 210 i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, VMNET_DEV_MASK); 211 if (i) { 212 dev = make_dev(&tap_cdevsw, unit | VMNET_DEV_MASK, UID_ROOT, 213 GID_WHEEL, 0600, "%s%d", vmnetname, unit); 214 } 215 216 tapcreate(dev); 217 return (0); 218} 219 220static void 221tap_destroy(struct tap_softc *tp) 222{ 223 struct ifnet *ifp = tp->tap_ifp; 224 225 CURVNET_SET(ifp->if_vnet); 226 227 destroy_dev(tp->tap_dev); 228 seldrain(&tp->tap_rsel); 229 knlist_clear(&tp->tap_rsel.si_note, 0); 230 knlist_destroy(&tp->tap_rsel.si_note); 231 ether_ifdetach(ifp); 232 233 sx_xlock(&tap_ioctl_sx); 234 ifp->if_softc = NULL; 235 sx_xunlock(&tap_ioctl_sx); 236 237 if_free(ifp); 238 239 mtx_destroy(&tp->tap_mtx); 240 free(tp, M_TAP); 241 CURVNET_RESTORE(); 242} 243 244static void 245tap_clone_destroy(struct ifnet *ifp) 246{ 247 struct tap_softc *tp = ifp->if_softc; 248 249 mtx_lock(&tapmtx); 250 SLIST_REMOVE(&taphead, tp, tap_softc, tap_next); 251 mtx_unlock(&tapmtx); 252 tap_destroy(tp); 253} 254 255/* vmnet devices are tap devices in disguise */ 256static void 257vmnet_clone_destroy(struct ifnet *ifp) 258{ 259 tap_clone_destroy(ifp); 260} 261 262/* 263 * tapmodevent 264 * 265 * module event handler 266 */ 267static int 268tapmodevent(module_t mod, int type, void *data) 269{ 270 static eventhandler_tag eh_tag = NULL; 271 struct tap_softc *tp = NULL; 272 struct ifnet *ifp = NULL; 273 274 switch (type) { 275 case MOD_LOAD: 276 277 /* intitialize device */ 278 279 mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF); 280 SLIST_INIT(&taphead); 281 282 clone_setup(&tapclones); 283 eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000); 284 if (eh_tag == NULL) { 285 clone_cleanup(&tapclones); 286 mtx_destroy(&tapmtx); 287 return (ENOMEM); 288 } 289 tap_cloner = if_clone_simple(tapname, tap_clone_create, 290 tap_clone_destroy, 0); 291 vmnet_cloner = if_clone_simple(vmnetname, vmnet_clone_create, 292 vmnet_clone_destroy, 0); 293 return (0); 294 295 case MOD_UNLOAD: 296 /* 297 * The EBUSY algorithm here can't quite atomically 298 * guarantee that this is race-free since we have to 299 * release the tap mtx to deregister the clone handler. 300 */ 301 mtx_lock(&tapmtx); 302 SLIST_FOREACH(tp, &taphead, tap_next) { 303 mtx_lock(&tp->tap_mtx); 304 if (tp->tap_flags & TAP_OPEN) { 305 mtx_unlock(&tp->tap_mtx); 306 mtx_unlock(&tapmtx); 307 return (EBUSY); 308 } 309 mtx_unlock(&tp->tap_mtx); 310 } 311 mtx_unlock(&tapmtx); 312 313 EVENTHANDLER_DEREGISTER(dev_clone, eh_tag); 314 if_clone_detach(tap_cloner); 315 if_clone_detach(vmnet_cloner); 316 drain_dev_clone_events(); 317 318 mtx_lock(&tapmtx); 319 while ((tp = SLIST_FIRST(&taphead)) != NULL) { 320 SLIST_REMOVE_HEAD(&taphead, tap_next); 321 mtx_unlock(&tapmtx); 322 323 ifp = tp->tap_ifp; 324 325 TAPDEBUG("detaching %s\n", ifp->if_xname); 326 327 tap_destroy(tp); 328 mtx_lock(&tapmtx); 329 } 330 mtx_unlock(&tapmtx); 331 clone_cleanup(&tapclones); 332 333 mtx_destroy(&tapmtx); 334 335 break; 336 337 default: 338 return (EOPNOTSUPP); 339 } 340 341 return (0); 342} /* tapmodevent */ 343 344 345/* 346 * DEVFS handler 347 * 348 * We need to support two kind of devices - tap and vmnet 349 */ 350static void 351tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev) 352{ 353 char devname[SPECNAMELEN + 1]; 354 int i, unit, append_unit; 355 int extra; 356 357 if (*dev != NULL) 358 return; 359 360 if (!tapdclone || 361 (!tapuopen && priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)) 362 return; 363 364 unit = 0; 365 append_unit = 0; 366 extra = 0; 367 368 /* We're interested in only tap/vmnet devices. */ 369 if (strcmp(name, tapname) == 0) { 370 unit = -1; 371 } else if (strcmp(name, vmnetname) == 0) { 372 unit = -1; 373 extra = VMNET_DEV_MASK; 374 } else if (dev_stdclone(name, NULL, tapname, &unit) != 1) { 375 if (dev_stdclone(name, NULL, vmnetname, &unit) != 1) { 376 return; 377 } else { 378 extra = VMNET_DEV_MASK; 379 } 380 } 381 382 if (unit == -1) 383 append_unit = 1; 384 385 CURVNET_SET(CRED_TO_VNET(cred)); 386 /* find any existing device, or allocate new unit number */ 387 i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra); 388 if (i) { 389 if (append_unit) { 390 /* 391 * We were passed 'tun' or 'tap', with no unit specified 392 * so we'll need to append it now. 393 */ 394 namelen = snprintf(devname, sizeof(devname), "%s%d", name, 395 unit); 396 name = devname; 397 } 398 399 *dev = make_dev_credf(MAKEDEV_REF, &tap_cdevsw, unit | extra, 400 cred, UID_ROOT, GID_WHEEL, 0600, "%s", name); 401 } 402 403 if_clone_create(name, namelen, NULL); 404 CURVNET_RESTORE(); 405} /* tapclone */ 406 407 408/* 409 * tapcreate 410 * 411 * to create interface 412 */ 413static void 414tapcreate(struct cdev *dev) 415{ 416 struct ifnet *ifp = NULL; 417 struct tap_softc *tp = NULL; 418 unsigned short macaddr_hi; 419 uint32_t macaddr_mid; 420 int unit; 421 const char *name = NULL; 422 u_char eaddr[6]; 423 424 /* allocate driver storage and create device */ 425 tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO); 426 mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF); 427 mtx_lock(&tapmtx); 428 SLIST_INSERT_HEAD(&taphead, tp, tap_next); 429 mtx_unlock(&tapmtx); 430 431 unit = dev2unit(dev); 432 433 /* select device: tap or vmnet */ 434 if (unit & VMNET_DEV_MASK) { 435 name = vmnetname; 436 tp->tap_flags |= TAP_VMNET; 437 } else 438 name = tapname; 439 440 unit &= TAPMAXUNIT; 441 442 TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, dev2unit(dev)); 443 444 /* generate fake MAC address: 00 bd xx xx xx unit_no */ 445 macaddr_hi = htons(0x00bd); 446 macaddr_mid = (uint32_t) ticks; 447 bcopy(&macaddr_hi, eaddr, sizeof(short)); 448 bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t)); 449 eaddr[5] = (u_char)unit; 450 451 /* fill the rest and attach interface */ 452 ifp = tp->tap_ifp = if_alloc(IFT_ETHER); 453 if (ifp == NULL) 454 panic("%s%d: can not if_alloc()", name, unit); 455 ifp->if_softc = tp; 456 if_initname(ifp, name, unit); 457 ifp->if_init = tapifinit; 458 ifp->if_start = tapifstart; 459 ifp->if_ioctl = tapifioctl; 460 ifp->if_mtu = ETHERMTU; 461 ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST); 462 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 463 ifp->if_capabilities |= IFCAP_LINKSTATE; 464 ifp->if_capenable |= IFCAP_LINKSTATE; 465 466 dev->si_drv1 = tp; 467 tp->tap_dev = dev; 468 469 ether_ifattach(ifp, eaddr); 470 471 mtx_lock(&tp->tap_mtx); 472 tp->tap_flags |= TAP_INITED; 473 mtx_unlock(&tp->tap_mtx); 474 475 knlist_init_mtx(&tp->tap_rsel.si_note, &tp->tap_mtx); 476 477 TAPDEBUG("interface %s is created. minor = %#x\n", 478 ifp->if_xname, dev2unit(dev)); 479} /* tapcreate */ 480 481 482/* 483 * tapopen 484 * 485 * to open tunnel. must be superuser 486 */ 487static int 488tapopen(struct cdev *dev, int flag, int mode, struct thread *td) 489{ 490 struct tap_softc *tp = NULL; 491 struct ifnet *ifp = NULL; 492 int error; 493 494 if (tapuopen == 0) { 495 error = priv_check(td, PRIV_NET_TAP); 496 if (error) 497 return (error); 498 } 499 500 if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT) 501 return (ENXIO); 502 503 tp = dev->si_drv1; 504 505 mtx_lock(&tp->tap_mtx); 506 if (tp->tap_flags & TAP_OPEN) { 507 mtx_unlock(&tp->tap_mtx); 508 return (EBUSY); 509 } 510 511 bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr)); 512 tp->tap_pid = td->td_proc->p_pid; 513 tp->tap_flags |= TAP_OPEN; 514 ifp = tp->tap_ifp; 515 516 ifp->if_drv_flags |= IFF_DRV_RUNNING; 517 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 518 if (tapuponopen) 519 ifp->if_flags |= IFF_UP; 520 if_link_state_change(ifp, LINK_STATE_UP); 521 mtx_unlock(&tp->tap_mtx); 522 523 TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, dev2unit(dev)); 524 525 return (0); 526} /* tapopen */ 527 528 529/* 530 * tapclose 531 * 532 * close the device - mark i/f down & delete routing info 533 */ 534static int 535tapclose(struct cdev *dev, int foo, int bar, struct thread *td) 536{ 537 struct ifaddr *ifa; 538 struct tap_softc *tp = dev->si_drv1; 539 struct ifnet *ifp = tp->tap_ifp; 540 541 /* junk all pending output */ 542 mtx_lock(&tp->tap_mtx); 543 CURVNET_SET(ifp->if_vnet); 544 IF_DRAIN(&ifp->if_snd); 545 546 /* 547 * Do not bring the interface down, and do not anything with 548 * interface, if we are in VMnet mode. Just close the device. 549 */ 550 if (((tp->tap_flags & TAP_VMNET) == 0) && 551 (ifp->if_flags & (IFF_UP | IFF_LINK0)) == IFF_UP) { 552 mtx_unlock(&tp->tap_mtx); 553 if_down(ifp); 554 mtx_lock(&tp->tap_mtx); 555 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 556 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 557 mtx_unlock(&tp->tap_mtx); 558 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 559 rtinit(ifa, (int)RTM_DELETE, 0); 560 } 561 if_purgeaddrs(ifp); 562 mtx_lock(&tp->tap_mtx); 563 } 564 } 565 566 if_link_state_change(ifp, LINK_STATE_DOWN); 567 CURVNET_RESTORE(); 568 569 funsetown(&tp->tap_sigio); 570 selwakeuppri(&tp->tap_rsel, PZERO+1); 571 KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); 572 573 tp->tap_flags &= ~TAP_OPEN; 574 tp->tap_pid = 0; 575 mtx_unlock(&tp->tap_mtx); 576 577 TAPDEBUG("%s is closed. minor = %#x\n", 578 ifp->if_xname, dev2unit(dev)); 579 580 return (0); 581} /* tapclose */ 582 583 584/* 585 * tapifinit 586 * 587 * network interface initialization function 588 */ 589static void 590tapifinit(void *xtp) 591{ 592 struct tap_softc *tp = (struct tap_softc *)xtp; 593 struct ifnet *ifp = tp->tap_ifp; 594 595 TAPDEBUG("initializing %s\n", ifp->if_xname); 596 597 mtx_lock(&tp->tap_mtx); 598 ifp->if_drv_flags |= IFF_DRV_RUNNING; 599 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 600 mtx_unlock(&tp->tap_mtx); 601 602 /* attempt to start output */ 603 tapifstart(ifp); 604} /* tapifinit */ 605 606 607/* 608 * tapifioctl 609 * 610 * Process an ioctl request on network interface 611 */ 612static int 613tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 614{ 615 struct tap_softc *tp; 616 struct ifreq *ifr = (struct ifreq *)data; 617 struct ifstat *ifs = NULL; 618 struct ifmediareq *ifmr = NULL; 619 int dummy, error = 0; 620 621 sx_xlock(&tap_ioctl_sx); 622 tp = ifp->if_softc; 623 if (tp == NULL) { 624 error = ENXIO; 625 goto bad; 626 } 627 switch (cmd) { 628 case SIOCSIFFLAGS: /* XXX -- just like vmnet does */ 629 case SIOCADDMULTI: 630 case SIOCDELMULTI: 631 break; 632 633 case SIOCGIFMEDIA: 634 ifmr = (struct ifmediareq *)data; 635 dummy = ifmr->ifm_count; 636 ifmr->ifm_count = 1; 637 ifmr->ifm_status = IFM_AVALID; 638 ifmr->ifm_active = IFM_ETHER; 639 if (tp->tap_flags & TAP_OPEN) 640 ifmr->ifm_status |= IFM_ACTIVE; 641 ifmr->ifm_current = ifmr->ifm_active; 642 if (dummy >= 1) { 643 int media = IFM_ETHER; 644 error = copyout(&media, ifmr->ifm_ulist, 645 sizeof(int)); 646 } 647 break; 648 649 case SIOCSIFMTU: 650 ifp->if_mtu = ifr->ifr_mtu; 651 break; 652 653 case SIOCGIFSTATUS: 654 ifs = (struct ifstat *)data; 655 mtx_lock(&tp->tap_mtx); 656 if (tp->tap_pid != 0) 657 snprintf(ifs->ascii, sizeof(ifs->ascii), 658 "\tOpened by PID %d\n", tp->tap_pid); 659 else 660 ifs->ascii[0] = '\0'; 661 mtx_unlock(&tp->tap_mtx); 662 break; 663 664 default: 665 error = ether_ioctl(ifp, cmd, data); 666 break; 667 } 668 669bad: 670 sx_xunlock(&tap_ioctl_sx); 671 return (error); 672} /* tapifioctl */ 673 674 675/* 676 * tapifstart 677 * 678 * queue packets from higher level ready to put out 679 */ 680static void 681tapifstart(struct ifnet *ifp) 682{ 683 struct tap_softc *tp = ifp->if_softc; 684 685 TAPDEBUG("%s starting\n", ifp->if_xname); 686 687 /* 688 * do not junk pending output if we are in VMnet mode. 689 * XXX: can this do any harm because of queue overflow? 690 */ 691 692 mtx_lock(&tp->tap_mtx); 693 if (((tp->tap_flags & TAP_VMNET) == 0) && 694 ((tp->tap_flags & TAP_READY) != TAP_READY)) { 695 struct mbuf *m; 696 697 /* Unlocked read. */ 698 TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname, 699 tp->tap_flags); 700 701 for (;;) { 702 IF_DEQUEUE(&ifp->if_snd, m); 703 if (m != NULL) { 704 m_freem(m); 705 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 706 } else 707 break; 708 } 709 mtx_unlock(&tp->tap_mtx); 710 711 return; 712 } 713 714 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 715 716 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 717 if (tp->tap_flags & TAP_RWAIT) { 718 tp->tap_flags &= ~TAP_RWAIT; 719 wakeup(tp); 720 } 721 722 if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) { 723 mtx_unlock(&tp->tap_mtx); 724 pgsigio(&tp->tap_sigio, SIGIO, 0); 725 mtx_lock(&tp->tap_mtx); 726 } 727 728 selwakeuppri(&tp->tap_rsel, PZERO+1); 729 KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); 730 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */ 731 } 732 733 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 734 mtx_unlock(&tp->tap_mtx); 735} /* tapifstart */ 736 737 738/* 739 * tapioctl 740 * 741 * the cdevsw interface is now pretty minimal 742 */ 743static int 744tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) 745{ 746 struct ifreq ifr; 747 struct tap_softc *tp = dev->si_drv1; 748 struct ifnet *ifp = tp->tap_ifp; 749 struct tapinfo *tapp = NULL; 750 int f; 751 int error; 752#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 753 defined(COMPAT_FREEBSD4) 754 int ival; 755#endif 756 757 switch (cmd) { 758 case TAPSIFINFO: 759 tapp = (struct tapinfo *)data; 760 if (ifp->if_type != tapp->type) 761 return (EPROTOTYPE); 762 mtx_lock(&tp->tap_mtx); 763 if (ifp->if_mtu != tapp->mtu) { 764 strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ); 765 ifr.ifr_mtu = tapp->mtu; 766 CURVNET_SET(ifp->if_vnet); 767 error = ifhwioctl(SIOCSIFMTU, ifp, 768 (caddr_t)&ifr, td); 769 CURVNET_RESTORE(); 770 if (error) { 771 mtx_unlock(&tp->tap_mtx); 772 return (error); 773 } 774 } 775 ifp->if_baudrate = tapp->baudrate; 776 mtx_unlock(&tp->tap_mtx); 777 break; 778 779 case TAPGIFINFO: 780 tapp = (struct tapinfo *)data; 781 mtx_lock(&tp->tap_mtx); 782 tapp->mtu = ifp->if_mtu; 783 tapp->type = ifp->if_type; 784 tapp->baudrate = ifp->if_baudrate; 785 mtx_unlock(&tp->tap_mtx); 786 break; 787 788 case TAPSDEBUG: 789 tapdebug = *(int *)data; 790 break; 791 792 case TAPGDEBUG: 793 *(int *)data = tapdebug; 794 break; 795 796 case TAPGIFNAME: { 797 struct ifreq *ifr = (struct ifreq *) data; 798 799 strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 800 } break; 801 802 case FIONBIO: 803 break; 804 805 case FIOASYNC: 806 mtx_lock(&tp->tap_mtx); 807 if (*(int *)data) 808 tp->tap_flags |= TAP_ASYNC; 809 else 810 tp->tap_flags &= ~TAP_ASYNC; 811 mtx_unlock(&tp->tap_mtx); 812 break; 813 814 case FIONREAD: 815 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 816 struct mbuf *mb; 817 818 IFQ_LOCK(&ifp->if_snd); 819 IFQ_POLL_NOLOCK(&ifp->if_snd, mb); 820 for (*(int *)data = 0; mb != NULL; 821 mb = mb->m_next) 822 *(int *)data += mb->m_len; 823 IFQ_UNLOCK(&ifp->if_snd); 824 } else 825 *(int *)data = 0; 826 break; 827 828 case FIOSETOWN: 829 return (fsetown(*(int *)data, &tp->tap_sigio)); 830 831 case FIOGETOWN: 832 *(int *)data = fgetown(&tp->tap_sigio); 833 return (0); 834 835 /* this is deprecated, FIOSETOWN should be used instead */ 836 case TIOCSPGRP: 837 return (fsetown(-(*(int *)data), &tp->tap_sigio)); 838 839 /* this is deprecated, FIOGETOWN should be used instead */ 840 case TIOCGPGRP: 841 *(int *)data = -fgetown(&tp->tap_sigio); 842 return (0); 843 844 /* VMware/VMnet port ioctl's */ 845 846#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 847 defined(COMPAT_FREEBSD4) 848 case _IO('V', 0): 849 ival = IOCPARM_IVAL(data); 850 data = (caddr_t)&ival; 851 /* FALLTHROUGH */ 852#endif 853 case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */ 854 f = *(int *)data; 855 f &= 0x0fff; 856 f &= ~IFF_CANTCHANGE; 857 f |= IFF_UP; 858 859 mtx_lock(&tp->tap_mtx); 860 ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE); 861 mtx_unlock(&tp->tap_mtx); 862 break; 863 864 case SIOCGIFADDR: /* get MAC address of the remote side */ 865 mtx_lock(&tp->tap_mtx); 866 bcopy(tp->ether_addr, data, sizeof(tp->ether_addr)); 867 mtx_unlock(&tp->tap_mtx); 868 break; 869 870 case SIOCSIFADDR: /* set MAC address of the remote side */ 871 mtx_lock(&tp->tap_mtx); 872 bcopy(data, tp->ether_addr, sizeof(tp->ether_addr)); 873 mtx_unlock(&tp->tap_mtx); 874 break; 875 876 default: 877 return (ENOTTY); 878 } 879 return (0); 880} /* tapioctl */ 881 882 883/* 884 * tapread 885 * 886 * the cdevsw read interface - reads a packet at a time, or at 887 * least as much of a packet as can be read 888 */ 889static int 890tapread(struct cdev *dev, struct uio *uio, int flag) 891{ 892 struct tap_softc *tp = dev->si_drv1; 893 struct ifnet *ifp = tp->tap_ifp; 894 struct mbuf *m = NULL; 895 int error = 0, len; 896 897 TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, dev2unit(dev)); 898 899 mtx_lock(&tp->tap_mtx); 900 if ((tp->tap_flags & TAP_READY) != TAP_READY) { 901 mtx_unlock(&tp->tap_mtx); 902 903 /* Unlocked read. */ 904 TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n", 905 ifp->if_xname, dev2unit(dev), tp->tap_flags); 906 907 return (EHOSTDOWN); 908 } 909 910 tp->tap_flags &= ~TAP_RWAIT; 911 912 /* sleep until we get a packet */ 913 do { 914 IF_DEQUEUE(&ifp->if_snd, m); 915 916 if (m == NULL) { 917 if (flag & O_NONBLOCK) { 918 mtx_unlock(&tp->tap_mtx); 919 return (EWOULDBLOCK); 920 } 921 922 tp->tap_flags |= TAP_RWAIT; 923 error = mtx_sleep(tp, &tp->tap_mtx, PCATCH | (PZERO + 1), 924 "taprd", 0); 925 if (error) { 926 mtx_unlock(&tp->tap_mtx); 927 return (error); 928 } 929 } 930 } while (m == NULL); 931 mtx_unlock(&tp->tap_mtx); 932 933 /* feed packet to bpf */ 934 BPF_MTAP(ifp, m); 935 936 /* xfer packet to user space */ 937 while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) { 938 len = min(uio->uio_resid, m->m_len); 939 if (len == 0) 940 break; 941 942 error = uiomove(mtod(m, void *), len, uio); 943 m = m_free(m); 944 } 945 946 if (m != NULL) { 947 TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname, 948 dev2unit(dev)); 949 m_freem(m); 950 } 951 952 return (error); 953} /* tapread */ 954 955 956/* 957 * tapwrite 958 * 959 * the cdevsw write interface - an atomic write is a packet - or else! 960 */ 961static int 962tapwrite(struct cdev *dev, struct uio *uio, int flag) 963{ 964 struct ether_header *eh; 965 struct tap_softc *tp = dev->si_drv1; 966 struct ifnet *ifp = tp->tap_ifp; 967 struct mbuf *m; 968 969 TAPDEBUG("%s writing, minor = %#x\n", 970 ifp->if_xname, dev2unit(dev)); 971 972 if (uio->uio_resid == 0) 973 return (0); 974 975 if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) { 976 TAPDEBUG("%s invalid packet len = %zd, minor = %#x\n", 977 ifp->if_xname, uio->uio_resid, dev2unit(dev)); 978 979 return (EIO); 980 } 981 982 if ((m = m_uiotombuf(uio, M_NOWAIT, 0, ETHER_ALIGN, 983 M_PKTHDR)) == NULL) { 984 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 985 return (ENOBUFS); 986 } 987 988 m->m_pkthdr.rcvif = ifp; 989 990 /* 991 * Only pass a unicast frame to ether_input(), if it would actually 992 * have been received by non-virtual hardware. 993 */ 994 if (m->m_len < sizeof(struct ether_header)) { 995 m_freem(m); 996 return (0); 997 } 998 eh = mtod(m, struct ether_header *); 999 1000 if (eh && (ifp->if_flags & IFF_PROMISC) == 0 && 1001 !ETHER_IS_MULTICAST(eh->ether_dhost) && 1002 bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) { 1003 m_freem(m); 1004 return (0); 1005 } 1006 1007 /* Pass packet up to parent. */ 1008 CURVNET_SET(ifp->if_vnet); 1009 (*ifp->if_input)(ifp, m); 1010 CURVNET_RESTORE(); 1011 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* ibytes are counted in parent */ 1012 1013 return (0); 1014} /* tapwrite */ 1015 1016 1017/* 1018 * tappoll 1019 * 1020 * the poll interface, this is only useful on reads 1021 * really. the write detect always returns true, write never blocks 1022 * anyway, it either accepts the packet or drops it 1023 */ 1024static int 1025tappoll(struct cdev *dev, int events, struct thread *td) 1026{ 1027 struct tap_softc *tp = dev->si_drv1; 1028 struct ifnet *ifp = tp->tap_ifp; 1029 int revents = 0; 1030 1031 TAPDEBUG("%s polling, minor = %#x\n", 1032 ifp->if_xname, dev2unit(dev)); 1033 1034 if (events & (POLLIN | POLLRDNORM)) { 1035 IFQ_LOCK(&ifp->if_snd); 1036 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 1037 TAPDEBUG("%s have data in queue. len = %d, " \ 1038 "minor = %#x\n", ifp->if_xname, 1039 ifp->if_snd.ifq_len, dev2unit(dev)); 1040 1041 revents |= (events & (POLLIN | POLLRDNORM)); 1042 } else { 1043 TAPDEBUG("%s waiting for data, minor = %#x\n", 1044 ifp->if_xname, dev2unit(dev)); 1045 1046 selrecord(td, &tp->tap_rsel); 1047 } 1048 IFQ_UNLOCK(&ifp->if_snd); 1049 } 1050 1051 if (events & (POLLOUT | POLLWRNORM)) 1052 revents |= (events & (POLLOUT | POLLWRNORM)); 1053 1054 return (revents); 1055} /* tappoll */ 1056 1057 1058/* 1059 * tap_kqfilter 1060 * 1061 * support for kevent() system call 1062 */ 1063static int 1064tapkqfilter(struct cdev *dev, struct knote *kn) 1065{ 1066 struct tap_softc *tp = dev->si_drv1; 1067 struct ifnet *ifp = tp->tap_ifp; 1068 1069 switch (kn->kn_filter) { 1070 case EVFILT_READ: 1071 TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n", 1072 ifp->if_xname, dev2unit(dev)); 1073 kn->kn_fop = &tap_read_filterops; 1074 break; 1075 1076 case EVFILT_WRITE: 1077 TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n", 1078 ifp->if_xname, dev2unit(dev)); 1079 kn->kn_fop = &tap_write_filterops; 1080 break; 1081 1082 default: 1083 TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n", 1084 ifp->if_xname, dev2unit(dev)); 1085 return (EINVAL); 1086 /* NOT REACHED */ 1087 } 1088 1089 kn->kn_hook = tp; 1090 knlist_add(&tp->tap_rsel.si_note, kn, 0); 1091 1092 return (0); 1093} /* tapkqfilter */ 1094 1095 1096/* 1097 * tap_kqread 1098 * 1099 * Return true if there is data in the interface queue 1100 */ 1101static int 1102tapkqread(struct knote *kn, long hint) 1103{ 1104 int ret; 1105 struct tap_softc *tp = kn->kn_hook; 1106 struct cdev *dev = tp->tap_dev; 1107 struct ifnet *ifp = tp->tap_ifp; 1108 1109 if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) { 1110 TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n", 1111 ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); 1112 ret = 1; 1113 } else { 1114 TAPDEBUG("%s waiting for data, minor = %#x\n", 1115 ifp->if_xname, dev2unit(dev)); 1116 ret = 0; 1117 } 1118 1119 return (ret); 1120} /* tapkqread */ 1121 1122 1123/* 1124 * tap_kqwrite 1125 * 1126 * Always can write. Return the MTU in kn->data 1127 */ 1128static int 1129tapkqwrite(struct knote *kn, long hint) 1130{ 1131 struct tap_softc *tp = kn->kn_hook; 1132 struct ifnet *ifp = tp->tap_ifp; 1133 1134 kn->kn_data = ifp->if_mtu; 1135 1136 return (1); 1137} /* tapkqwrite */ 1138 1139 1140static void 1141tapkqdetach(struct knote *kn) 1142{ 1143 struct tap_softc *tp = kn->kn_hook; 1144 1145 knlist_remove(&tp->tap_rsel.si_note, kn, 0); 1146} /* tapkqdetach */ 1147 1148