if_tap.c revision 274966
1/*- 2 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * BASED ON: 27 * ------------------------------------------------------------------------- 28 * 29 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk> 30 * Nottingham University 1987. 31 */ 32 33/* 34 * $FreeBSD: head/sys/net/if_tap.c 274966 2014-11-24 14:00:27Z philip $ 35 * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $ 36 */ 37 38#include "opt_compat.h" 39#include "opt_inet.h" 40 41#include <sys/param.h> 42#include <sys/conf.h> 43#include <sys/fcntl.h> 44#include <sys/filio.h> 45#include <sys/jail.h> 46#include <sys/kernel.h> 47#include <sys/malloc.h> 48#include <sys/mbuf.h> 49#include <sys/module.h> 50#include <sys/poll.h> 51#include <sys/priv.h> 52#include <sys/proc.h> 53#include <sys/selinfo.h> 54#include <sys/signalvar.h> 55#include <sys/socket.h> 56#include <sys/sockio.h> 57#include <sys/sysctl.h> 58#include <sys/systm.h> 59#include <sys/ttycom.h> 60#include <sys/uio.h> 61#include <sys/queue.h> 62 63#include <net/bpf.h> 64#include <net/ethernet.h> 65#include <net/if.h> 66#include <net/if_var.h> 67#include <net/if_clone.h> 68#include <net/if_dl.h> 69#include <net/if_media.h> 70#include <net/if_types.h> 71#include <net/route.h> 72#include <net/vnet.h> 73 74#include <netinet/in.h> 75 76#include <net/if_tapvar.h> 77#include <net/if_tap.h> 78 79 80#define CDEV_NAME "tap" 81#define TAPDEBUG if (tapdebug) printf 82 83static const char tapname[] = "tap"; 84static const char vmnetname[] = "vmnet"; 85#define TAPMAXUNIT 0x7fff 86#define VMNET_DEV_MASK CLONE_FLAG0 87 88/* module */ 89static int tapmodevent(module_t, int, void *); 90 91/* device */ 92static void tapclone(void *, struct ucred *, char *, int, 93 struct cdev **); 94static void tapcreate(struct cdev *); 95 96/* network interface */ 97static void tapifstart(struct ifnet *); 98static int tapifioctl(struct ifnet *, u_long, caddr_t); 99static void tapifinit(void *); 100 101static int tap_clone_create(struct if_clone *, int, caddr_t); 102static void tap_clone_destroy(struct ifnet *); 103static struct if_clone *tap_cloner; 104static int vmnet_clone_create(struct if_clone *, int, caddr_t); 105static void vmnet_clone_destroy(struct ifnet *); 106static struct if_clone *vmnet_cloner; 107 108/* character device */ 109static d_open_t tapopen; 110static d_close_t tapclose; 111static d_read_t tapread; 112static d_write_t tapwrite; 113static d_ioctl_t tapioctl; 114static d_poll_t tappoll; 115static d_kqfilter_t tapkqfilter; 116 117/* kqueue(2) */ 118static int tapkqread(struct knote *, long); 119static int tapkqwrite(struct knote *, long); 120static void tapkqdetach(struct knote *); 121 122static struct filterops tap_read_filterops = { 123 .f_isfd = 1, 124 .f_attach = NULL, 125 .f_detach = tapkqdetach, 126 .f_event = tapkqread, 127}; 128 129static struct filterops tap_write_filterops = { 130 .f_isfd = 1, 131 .f_attach = NULL, 132 .f_detach = tapkqdetach, 133 .f_event = tapkqwrite, 134}; 135 136static struct cdevsw tap_cdevsw = { 137 .d_version = D_VERSION, 138 .d_flags = D_NEEDMINOR, 139 .d_open = tapopen, 140 .d_close = tapclose, 141 .d_read = tapread, 142 .d_write = tapwrite, 143 .d_ioctl = tapioctl, 144 .d_poll = tappoll, 145 .d_name = CDEV_NAME, 146 .d_kqfilter = tapkqfilter, 147}; 148 149/* 150 * All global variables in if_tap.c are locked with tapmtx, with the 151 * exception of tapdebug, which is accessed unlocked; tapclones is 152 * static at runtime. 153 */ 154static struct mtx tapmtx; 155static int tapdebug = 0; /* debug flag */ 156static int tapuopen = 0; /* allow user open() */ 157static int tapuponopen = 0; /* IFF_UP on open() */ 158static int tapdclone = 1; /* enable devfs cloning */ 159static int tapclosedeladdrs = 1; /* del addrs on close */ 160static SLIST_HEAD(, tap_softc) taphead; /* first device */ 161static struct clonedevs *tapclones; 162 163MALLOC_DECLARE(M_TAP); 164MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface"); 165SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, ""); 166 167SYSCTL_DECL(_net_link); 168static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0, 169 "Ethernet tunnel software network interface"); 170SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0, 171 "Allow user to open /dev/tap (based on node permissions)"); 172SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0, 173 "Bring interface up when /dev/tap is opened"); 174SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0, 175 "Enably legacy devfs interface creation"); 176SYSCTL_INT(_net_link_tap, OID_AUTO, deladdrs_on_close, CTLFLAG_RW, 177 &tapclosedeladdrs, 0, "Delete addresses and routes when /dev/tap is " 178 "closed"); 179SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, ""); 180 181DEV_MODULE(if_tap, tapmodevent, NULL); 182 183static int 184tap_clone_create(struct if_clone *ifc, int unit, caddr_t params) 185{ 186 struct cdev *dev; 187 int i; 188 189 /* Find any existing device, or allocate new unit number. */ 190 i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, 0); 191 if (i) { 192 dev = make_dev(&tap_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600, 193 "%s%d", tapname, unit); 194 } 195 196 tapcreate(dev); 197 return (0); 198} 199 200/* vmnet devices are tap devices in disguise */ 201static int 202vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params) 203{ 204 struct cdev *dev; 205 int i; 206 207 /* Find any existing device, or allocate new unit number. */ 208 i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, VMNET_DEV_MASK); 209 if (i) { 210 dev = make_dev(&tap_cdevsw, unit | VMNET_DEV_MASK, UID_ROOT, 211 GID_WHEEL, 0600, "%s%d", vmnetname, unit); 212 } 213 214 tapcreate(dev); 215 return (0); 216} 217 218static void 219tap_destroy(struct tap_softc *tp) 220{ 221 struct ifnet *ifp = tp->tap_ifp; 222 223 CURVNET_SET(ifp->if_vnet); 224 destroy_dev(tp->tap_dev); 225 seldrain(&tp->tap_rsel); 226 knlist_clear(&tp->tap_rsel.si_note, 0); 227 knlist_destroy(&tp->tap_rsel.si_note); 228 ether_ifdetach(ifp); 229 if_free(ifp); 230 231 mtx_destroy(&tp->tap_mtx); 232 free(tp, M_TAP); 233 CURVNET_RESTORE(); 234} 235 236static void 237tap_clone_destroy(struct ifnet *ifp) 238{ 239 struct tap_softc *tp = ifp->if_softc; 240 241 mtx_lock(&tapmtx); 242 SLIST_REMOVE(&taphead, tp, tap_softc, tap_next); 243 mtx_unlock(&tapmtx); 244 tap_destroy(tp); 245} 246 247/* vmnet devices are tap devices in disguise */ 248static void 249vmnet_clone_destroy(struct ifnet *ifp) 250{ 251 tap_clone_destroy(ifp); 252} 253 254/* 255 * tapmodevent 256 * 257 * module event handler 258 */ 259static int 260tapmodevent(module_t mod, int type, void *data) 261{ 262 static eventhandler_tag eh_tag = NULL; 263 struct tap_softc *tp = NULL; 264 struct ifnet *ifp = NULL; 265 266 switch (type) { 267 case MOD_LOAD: 268 269 /* intitialize device */ 270 271 mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF); 272 SLIST_INIT(&taphead); 273 274 clone_setup(&tapclones); 275 eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000); 276 if (eh_tag == NULL) { 277 clone_cleanup(&tapclones); 278 mtx_destroy(&tapmtx); 279 return (ENOMEM); 280 } 281 tap_cloner = if_clone_simple(tapname, tap_clone_create, 282 tap_clone_destroy, 0); 283 vmnet_cloner = if_clone_simple(vmnetname, vmnet_clone_create, 284 vmnet_clone_destroy, 0); 285 return (0); 286 287 case MOD_UNLOAD: 288 /* 289 * The EBUSY algorithm here can't quite atomically 290 * guarantee that this is race-free since we have to 291 * release the tap mtx to deregister the clone handler. 292 */ 293 mtx_lock(&tapmtx); 294 SLIST_FOREACH(tp, &taphead, tap_next) { 295 mtx_lock(&tp->tap_mtx); 296 if (tp->tap_flags & TAP_OPEN) { 297 mtx_unlock(&tp->tap_mtx); 298 mtx_unlock(&tapmtx); 299 return (EBUSY); 300 } 301 mtx_unlock(&tp->tap_mtx); 302 } 303 mtx_unlock(&tapmtx); 304 305 EVENTHANDLER_DEREGISTER(dev_clone, eh_tag); 306 if_clone_detach(tap_cloner); 307 if_clone_detach(vmnet_cloner); 308 drain_dev_clone_events(); 309 310 mtx_lock(&tapmtx); 311 while ((tp = SLIST_FIRST(&taphead)) != NULL) { 312 SLIST_REMOVE_HEAD(&taphead, tap_next); 313 mtx_unlock(&tapmtx); 314 315 ifp = tp->tap_ifp; 316 317 TAPDEBUG("detaching %s\n", ifp->if_xname); 318 319 tap_destroy(tp); 320 mtx_lock(&tapmtx); 321 } 322 mtx_unlock(&tapmtx); 323 clone_cleanup(&tapclones); 324 325 mtx_destroy(&tapmtx); 326 327 break; 328 329 default: 330 return (EOPNOTSUPP); 331 } 332 333 return (0); 334} /* tapmodevent */ 335 336 337/* 338 * DEVFS handler 339 * 340 * We need to support two kind of devices - tap and vmnet 341 */ 342static void 343tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev) 344{ 345 char devname[SPECNAMELEN + 1]; 346 int i, unit, append_unit; 347 int extra; 348 349 if (*dev != NULL) 350 return; 351 352 if (!tapdclone || 353 (!tapuopen && priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)) 354 return; 355 356 unit = 0; 357 append_unit = 0; 358 extra = 0; 359 360 /* We're interested in only tap/vmnet devices. */ 361 if (strcmp(name, tapname) == 0) { 362 unit = -1; 363 } else if (strcmp(name, vmnetname) == 0) { 364 unit = -1; 365 extra = VMNET_DEV_MASK; 366 } else if (dev_stdclone(name, NULL, tapname, &unit) != 1) { 367 if (dev_stdclone(name, NULL, vmnetname, &unit) != 1) { 368 return; 369 } else { 370 extra = VMNET_DEV_MASK; 371 } 372 } 373 374 if (unit == -1) 375 append_unit = 1; 376 377 CURVNET_SET(CRED_TO_VNET(cred)); 378 /* find any existing device, or allocate new unit number */ 379 i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra); 380 if (i) { 381 if (append_unit) { 382 /* 383 * We were passed 'tun' or 'tap', with no unit specified 384 * so we'll need to append it now. 385 */ 386 namelen = snprintf(devname, sizeof(devname), "%s%d", name, 387 unit); 388 name = devname; 389 } 390 391 *dev = make_dev_credf(MAKEDEV_REF, &tap_cdevsw, unit | extra, 392 cred, UID_ROOT, GID_WHEEL, 0600, "%s", name); 393 } 394 395 if_clone_create(name, namelen, NULL); 396 CURVNET_RESTORE(); 397} /* tapclone */ 398 399 400/* 401 * tapcreate 402 * 403 * to create interface 404 */ 405static void 406tapcreate(struct cdev *dev) 407{ 408 struct ifnet *ifp = NULL; 409 struct tap_softc *tp = NULL; 410 unsigned short macaddr_hi; 411 uint32_t macaddr_mid; 412 int unit; 413 const char *name = NULL; 414 u_char eaddr[6]; 415 416 /* allocate driver storage and create device */ 417 tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO); 418 mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF); 419 mtx_lock(&tapmtx); 420 SLIST_INSERT_HEAD(&taphead, tp, tap_next); 421 mtx_unlock(&tapmtx); 422 423 unit = dev2unit(dev); 424 425 /* select device: tap or vmnet */ 426 if (unit & VMNET_DEV_MASK) { 427 name = vmnetname; 428 tp->tap_flags |= TAP_VMNET; 429 } else 430 name = tapname; 431 432 unit &= TAPMAXUNIT; 433 434 TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, dev2unit(dev)); 435 436 /* generate fake MAC address: 00 bd xx xx xx unit_no */ 437 macaddr_hi = htons(0x00bd); 438 macaddr_mid = (uint32_t) ticks; 439 bcopy(&macaddr_hi, eaddr, sizeof(short)); 440 bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t)); 441 eaddr[5] = (u_char)unit; 442 443 /* fill the rest and attach interface */ 444 ifp = tp->tap_ifp = if_alloc(IFT_ETHER); 445 if (ifp == NULL) 446 panic("%s%d: can not if_alloc()", name, unit); 447 ifp->if_softc = tp; 448 if_initname(ifp, name, unit); 449 ifp->if_init = tapifinit; 450 ifp->if_start = tapifstart; 451 ifp->if_ioctl = tapifioctl; 452 ifp->if_mtu = ETHERMTU; 453 ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST); 454 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 455 ifp->if_capabilities |= IFCAP_LINKSTATE; 456 ifp->if_capenable |= IFCAP_LINKSTATE; 457 458 dev->si_drv1 = tp; 459 tp->tap_dev = dev; 460 461 ether_ifattach(ifp, eaddr); 462 463 mtx_lock(&tp->tap_mtx); 464 tp->tap_flags |= TAP_INITED; 465 mtx_unlock(&tp->tap_mtx); 466 467 knlist_init_mtx(&tp->tap_rsel.si_note, &tp->tap_mtx); 468 469 TAPDEBUG("interface %s is created. minor = %#x\n", 470 ifp->if_xname, dev2unit(dev)); 471} /* tapcreate */ 472 473 474/* 475 * tapopen 476 * 477 * to open tunnel. must be superuser 478 */ 479static int 480tapopen(struct cdev *dev, int flag, int mode, struct thread *td) 481{ 482 struct tap_softc *tp = NULL; 483 struct ifnet *ifp = NULL; 484 int error; 485 486 if (tapuopen == 0) { 487 error = priv_check(td, PRIV_NET_TAP); 488 if (error) 489 return (error); 490 } 491 492 if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT) 493 return (ENXIO); 494 495 tp = dev->si_drv1; 496 497 mtx_lock(&tp->tap_mtx); 498 if (tp->tap_flags & TAP_OPEN) { 499 mtx_unlock(&tp->tap_mtx); 500 return (EBUSY); 501 } 502 503 bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr)); 504 tp->tap_pid = td->td_proc->p_pid; 505 tp->tap_flags |= TAP_OPEN; 506 ifp = tp->tap_ifp; 507 508 ifp->if_drv_flags |= IFF_DRV_RUNNING; 509 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 510 if (tapuponopen) 511 ifp->if_flags |= IFF_UP; 512 if_link_state_change(ifp, LINK_STATE_UP); 513 mtx_unlock(&tp->tap_mtx); 514 515 TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, dev2unit(dev)); 516 517 return (0); 518} /* tapopen */ 519 520 521/* 522 * tapclose 523 * 524 * close the device - mark i/f down & delete routing info 525 */ 526static int 527tapclose(struct cdev *dev, int foo, int bar, struct thread *td) 528{ 529 struct ifaddr *ifa; 530 struct tap_softc *tp = dev->si_drv1; 531 struct ifnet *ifp = tp->tap_ifp; 532 533 /* junk all pending output */ 534 mtx_lock(&tp->tap_mtx); 535 CURVNET_SET(ifp->if_vnet); 536 IF_DRAIN(&ifp->if_snd); 537 538 /* 539 * do not bring the interface down, and do not anything with 540 * interface, if we are in VMnet mode. just close the device. 541 */ 542 543 if (tapclosedeladdrs == 1 && ((tp->tap_flags & TAP_VMNET) == 0) && 544 (ifp->if_flags & IFF_UP)) { 545 mtx_unlock(&tp->tap_mtx); 546 if_down(ifp); 547 mtx_lock(&tp->tap_mtx); 548 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 549 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 550 mtx_unlock(&tp->tap_mtx); 551 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 552 rtinit(ifa, (int)RTM_DELETE, 0); 553 } 554 if_purgeaddrs(ifp); 555 mtx_lock(&tp->tap_mtx); 556 } 557 } 558 559 if_link_state_change(ifp, LINK_STATE_DOWN); 560 CURVNET_RESTORE(); 561 562 funsetown(&tp->tap_sigio); 563 selwakeuppri(&tp->tap_rsel, PZERO+1); 564 KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); 565 566 tp->tap_flags &= ~TAP_OPEN; 567 tp->tap_pid = 0; 568 mtx_unlock(&tp->tap_mtx); 569 570 TAPDEBUG("%s is closed. minor = %#x\n", 571 ifp->if_xname, dev2unit(dev)); 572 573 return (0); 574} /* tapclose */ 575 576 577/* 578 * tapifinit 579 * 580 * network interface initialization function 581 */ 582static void 583tapifinit(void *xtp) 584{ 585 struct tap_softc *tp = (struct tap_softc *)xtp; 586 struct ifnet *ifp = tp->tap_ifp; 587 588 TAPDEBUG("initializing %s\n", ifp->if_xname); 589 590 mtx_lock(&tp->tap_mtx); 591 ifp->if_drv_flags |= IFF_DRV_RUNNING; 592 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 593 mtx_unlock(&tp->tap_mtx); 594 595 /* attempt to start output */ 596 tapifstart(ifp); 597} /* tapifinit */ 598 599 600/* 601 * tapifioctl 602 * 603 * Process an ioctl request on network interface 604 */ 605static int 606tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 607{ 608 struct tap_softc *tp = ifp->if_softc; 609 struct ifreq *ifr = (struct ifreq *)data; 610 struct ifstat *ifs = NULL; 611 struct ifmediareq *ifmr = NULL; 612 int dummy, error = 0; 613 614 switch (cmd) { 615 case SIOCSIFFLAGS: /* XXX -- just like vmnet does */ 616 case SIOCADDMULTI: 617 case SIOCDELMULTI: 618 break; 619 620 case SIOCGIFMEDIA: 621 ifmr = (struct ifmediareq *)data; 622 dummy = ifmr->ifm_count; 623 ifmr->ifm_count = 1; 624 ifmr->ifm_status = IFM_AVALID; 625 ifmr->ifm_active = IFM_ETHER; 626 if (tp->tap_flags & TAP_OPEN) 627 ifmr->ifm_status |= IFM_ACTIVE; 628 ifmr->ifm_current = ifmr->ifm_active; 629 if (dummy >= 1) { 630 int media = IFM_ETHER; 631 error = copyout(&media, ifmr->ifm_ulist, 632 sizeof(int)); 633 } 634 break; 635 636 case SIOCSIFMTU: 637 ifp->if_mtu = ifr->ifr_mtu; 638 break; 639 640 case SIOCGIFSTATUS: 641 ifs = (struct ifstat *)data; 642 mtx_lock(&tp->tap_mtx); 643 if (tp->tap_pid != 0) 644 snprintf(ifs->ascii, sizeof(ifs->ascii), 645 "\tOpened by PID %d\n", tp->tap_pid); 646 else 647 ifs->ascii[0] = '\0'; 648 mtx_unlock(&tp->tap_mtx); 649 break; 650 651 default: 652 error = ether_ioctl(ifp, cmd, data); 653 break; 654 } 655 656 return (error); 657} /* tapifioctl */ 658 659 660/* 661 * tapifstart 662 * 663 * queue packets from higher level ready to put out 664 */ 665static void 666tapifstart(struct ifnet *ifp) 667{ 668 struct tap_softc *tp = ifp->if_softc; 669 670 TAPDEBUG("%s starting\n", ifp->if_xname); 671 672 /* 673 * do not junk pending output if we are in VMnet mode. 674 * XXX: can this do any harm because of queue overflow? 675 */ 676 677 mtx_lock(&tp->tap_mtx); 678 if (((tp->tap_flags & TAP_VMNET) == 0) && 679 ((tp->tap_flags & TAP_READY) != TAP_READY)) { 680 struct mbuf *m; 681 682 /* Unlocked read. */ 683 TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname, 684 tp->tap_flags); 685 686 for (;;) { 687 IF_DEQUEUE(&ifp->if_snd, m); 688 if (m != NULL) { 689 m_freem(m); 690 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 691 } else 692 break; 693 } 694 mtx_unlock(&tp->tap_mtx); 695 696 return; 697 } 698 699 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 700 701 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 702 if (tp->tap_flags & TAP_RWAIT) { 703 tp->tap_flags &= ~TAP_RWAIT; 704 wakeup(tp); 705 } 706 707 if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) { 708 mtx_unlock(&tp->tap_mtx); 709 pgsigio(&tp->tap_sigio, SIGIO, 0); 710 mtx_lock(&tp->tap_mtx); 711 } 712 713 selwakeuppri(&tp->tap_rsel, PZERO+1); 714 KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); 715 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */ 716 } 717 718 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 719 mtx_unlock(&tp->tap_mtx); 720} /* tapifstart */ 721 722 723/* 724 * tapioctl 725 * 726 * the cdevsw interface is now pretty minimal 727 */ 728static int 729tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) 730{ 731 struct tap_softc *tp = dev->si_drv1; 732 struct ifnet *ifp = tp->tap_ifp; 733 struct tapinfo *tapp = NULL; 734 int f; 735#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 736 defined(COMPAT_FREEBSD4) 737 int ival; 738#endif 739 740 switch (cmd) { 741 case TAPSIFINFO: 742 tapp = (struct tapinfo *)data; 743 mtx_lock(&tp->tap_mtx); 744 ifp->if_mtu = tapp->mtu; 745 ifp->if_type = tapp->type; 746 ifp->if_baudrate = tapp->baudrate; 747 mtx_unlock(&tp->tap_mtx); 748 break; 749 750 case TAPGIFINFO: 751 tapp = (struct tapinfo *)data; 752 mtx_lock(&tp->tap_mtx); 753 tapp->mtu = ifp->if_mtu; 754 tapp->type = ifp->if_type; 755 tapp->baudrate = ifp->if_baudrate; 756 mtx_unlock(&tp->tap_mtx); 757 break; 758 759 case TAPSDEBUG: 760 tapdebug = *(int *)data; 761 break; 762 763 case TAPGDEBUG: 764 *(int *)data = tapdebug; 765 break; 766 767 case TAPGIFNAME: { 768 struct ifreq *ifr = (struct ifreq *) data; 769 770 strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 771 } break; 772 773 case FIONBIO: 774 break; 775 776 case FIOASYNC: 777 mtx_lock(&tp->tap_mtx); 778 if (*(int *)data) 779 tp->tap_flags |= TAP_ASYNC; 780 else 781 tp->tap_flags &= ~TAP_ASYNC; 782 mtx_unlock(&tp->tap_mtx); 783 break; 784 785 case FIONREAD: 786 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 787 struct mbuf *mb; 788 789 IFQ_LOCK(&ifp->if_snd); 790 IFQ_POLL_NOLOCK(&ifp->if_snd, mb); 791 for (*(int *)data = 0; mb != NULL; 792 mb = mb->m_next) 793 *(int *)data += mb->m_len; 794 IFQ_UNLOCK(&ifp->if_snd); 795 } else 796 *(int *)data = 0; 797 break; 798 799 case FIOSETOWN: 800 return (fsetown(*(int *)data, &tp->tap_sigio)); 801 802 case FIOGETOWN: 803 *(int *)data = fgetown(&tp->tap_sigio); 804 return (0); 805 806 /* this is deprecated, FIOSETOWN should be used instead */ 807 case TIOCSPGRP: 808 return (fsetown(-(*(int *)data), &tp->tap_sigio)); 809 810 /* this is deprecated, FIOGETOWN should be used instead */ 811 case TIOCGPGRP: 812 *(int *)data = -fgetown(&tp->tap_sigio); 813 return (0); 814 815 /* VMware/VMnet port ioctl's */ 816 817#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 818 defined(COMPAT_FREEBSD4) 819 case _IO('V', 0): 820 ival = IOCPARM_IVAL(data); 821 data = (caddr_t)&ival; 822 /* FALLTHROUGH */ 823#endif 824 case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */ 825 f = *(int *)data; 826 f &= 0x0fff; 827 f &= ~IFF_CANTCHANGE; 828 f |= IFF_UP; 829 830 mtx_lock(&tp->tap_mtx); 831 ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE); 832 mtx_unlock(&tp->tap_mtx); 833 break; 834 835 case SIOCGIFADDR: /* get MAC address of the remote side */ 836 mtx_lock(&tp->tap_mtx); 837 bcopy(tp->ether_addr, data, sizeof(tp->ether_addr)); 838 mtx_unlock(&tp->tap_mtx); 839 break; 840 841 case SIOCSIFADDR: /* set MAC address of the remote side */ 842 mtx_lock(&tp->tap_mtx); 843 bcopy(data, tp->ether_addr, sizeof(tp->ether_addr)); 844 mtx_unlock(&tp->tap_mtx); 845 break; 846 847 default: 848 return (ENOTTY); 849 } 850 return (0); 851} /* tapioctl */ 852 853 854/* 855 * tapread 856 * 857 * the cdevsw read interface - reads a packet at a time, or at 858 * least as much of a packet as can be read 859 */ 860static int 861tapread(struct cdev *dev, struct uio *uio, int flag) 862{ 863 struct tap_softc *tp = dev->si_drv1; 864 struct ifnet *ifp = tp->tap_ifp; 865 struct mbuf *m = NULL; 866 int error = 0, len; 867 868 TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, dev2unit(dev)); 869 870 mtx_lock(&tp->tap_mtx); 871 if ((tp->tap_flags & TAP_READY) != TAP_READY) { 872 mtx_unlock(&tp->tap_mtx); 873 874 /* Unlocked read. */ 875 TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n", 876 ifp->if_xname, dev2unit(dev), tp->tap_flags); 877 878 return (EHOSTDOWN); 879 } 880 881 tp->tap_flags &= ~TAP_RWAIT; 882 883 /* sleep until we get a packet */ 884 do { 885 IF_DEQUEUE(&ifp->if_snd, m); 886 887 if (m == NULL) { 888 if (flag & O_NONBLOCK) { 889 mtx_unlock(&tp->tap_mtx); 890 return (EWOULDBLOCK); 891 } 892 893 tp->tap_flags |= TAP_RWAIT; 894 error = mtx_sleep(tp, &tp->tap_mtx, PCATCH | (PZERO + 1), 895 "taprd", 0); 896 if (error) { 897 mtx_unlock(&tp->tap_mtx); 898 return (error); 899 } 900 } 901 } while (m == NULL); 902 mtx_unlock(&tp->tap_mtx); 903 904 /* feed packet to bpf */ 905 BPF_MTAP(ifp, m); 906 907 /* xfer packet to user space */ 908 while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) { 909 len = min(uio->uio_resid, m->m_len); 910 if (len == 0) 911 break; 912 913 error = uiomove(mtod(m, void *), len, uio); 914 m = m_free(m); 915 } 916 917 if (m != NULL) { 918 TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname, 919 dev2unit(dev)); 920 m_freem(m); 921 } 922 923 return (error); 924} /* tapread */ 925 926 927/* 928 * tapwrite 929 * 930 * the cdevsw write interface - an atomic write is a packet - or else! 931 */ 932static int 933tapwrite(struct cdev *dev, struct uio *uio, int flag) 934{ 935 struct ether_header *eh; 936 struct tap_softc *tp = dev->si_drv1; 937 struct ifnet *ifp = tp->tap_ifp; 938 struct mbuf *m; 939 940 TAPDEBUG("%s writing, minor = %#x\n", 941 ifp->if_xname, dev2unit(dev)); 942 943 if (uio->uio_resid == 0) 944 return (0); 945 946 if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) { 947 TAPDEBUG("%s invalid packet len = %zd, minor = %#x\n", 948 ifp->if_xname, uio->uio_resid, dev2unit(dev)); 949 950 return (EIO); 951 } 952 953 if ((m = m_uiotombuf(uio, M_NOWAIT, 0, ETHER_ALIGN, 954 M_PKTHDR)) == NULL) { 955 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 956 return (ENOBUFS); 957 } 958 959 m->m_pkthdr.rcvif = ifp; 960 961 /* 962 * Only pass a unicast frame to ether_input(), if it would actually 963 * have been received by non-virtual hardware. 964 */ 965 if (m->m_len < sizeof(struct ether_header)) { 966 m_freem(m); 967 return (0); 968 } 969 eh = mtod(m, struct ether_header *); 970 971 if (eh && (ifp->if_flags & IFF_PROMISC) == 0 && 972 !ETHER_IS_MULTICAST(eh->ether_dhost) && 973 bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) { 974 m_freem(m); 975 return (0); 976 } 977 978 /* Pass packet up to parent. */ 979 CURVNET_SET(ifp->if_vnet); 980 (*ifp->if_input)(ifp, m); 981 CURVNET_RESTORE(); 982 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* ibytes are counted in parent */ 983 984 return (0); 985} /* tapwrite */ 986 987 988/* 989 * tappoll 990 * 991 * the poll interface, this is only useful on reads 992 * really. the write detect always returns true, write never blocks 993 * anyway, it either accepts the packet or drops it 994 */ 995static int 996tappoll(struct cdev *dev, int events, struct thread *td) 997{ 998 struct tap_softc *tp = dev->si_drv1; 999 struct ifnet *ifp = tp->tap_ifp; 1000 int revents = 0; 1001 1002 TAPDEBUG("%s polling, minor = %#x\n", 1003 ifp->if_xname, dev2unit(dev)); 1004 1005 if (events & (POLLIN | POLLRDNORM)) { 1006 IFQ_LOCK(&ifp->if_snd); 1007 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 1008 TAPDEBUG("%s have data in queue. len = %d, " \ 1009 "minor = %#x\n", ifp->if_xname, 1010 ifp->if_snd.ifq_len, dev2unit(dev)); 1011 1012 revents |= (events & (POLLIN | POLLRDNORM)); 1013 } else { 1014 TAPDEBUG("%s waiting for data, minor = %#x\n", 1015 ifp->if_xname, dev2unit(dev)); 1016 1017 selrecord(td, &tp->tap_rsel); 1018 } 1019 IFQ_UNLOCK(&ifp->if_snd); 1020 } 1021 1022 if (events & (POLLOUT | POLLWRNORM)) 1023 revents |= (events & (POLLOUT | POLLWRNORM)); 1024 1025 return (revents); 1026} /* tappoll */ 1027 1028 1029/* 1030 * tap_kqfilter 1031 * 1032 * support for kevent() system call 1033 */ 1034static int 1035tapkqfilter(struct cdev *dev, struct knote *kn) 1036{ 1037 struct tap_softc *tp = dev->si_drv1; 1038 struct ifnet *ifp = tp->tap_ifp; 1039 1040 switch (kn->kn_filter) { 1041 case EVFILT_READ: 1042 TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n", 1043 ifp->if_xname, dev2unit(dev)); 1044 kn->kn_fop = &tap_read_filterops; 1045 break; 1046 1047 case EVFILT_WRITE: 1048 TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n", 1049 ifp->if_xname, dev2unit(dev)); 1050 kn->kn_fop = &tap_write_filterops; 1051 break; 1052 1053 default: 1054 TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n", 1055 ifp->if_xname, dev2unit(dev)); 1056 return (EINVAL); 1057 /* NOT REACHED */ 1058 } 1059 1060 kn->kn_hook = tp; 1061 knlist_add(&tp->tap_rsel.si_note, kn, 0); 1062 1063 return (0); 1064} /* tapkqfilter */ 1065 1066 1067/* 1068 * tap_kqread 1069 * 1070 * Return true if there is data in the interface queue 1071 */ 1072static int 1073tapkqread(struct knote *kn, long hint) 1074{ 1075 int ret; 1076 struct tap_softc *tp = kn->kn_hook; 1077 struct cdev *dev = tp->tap_dev; 1078 struct ifnet *ifp = tp->tap_ifp; 1079 1080 if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) { 1081 TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n", 1082 ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); 1083 ret = 1; 1084 } else { 1085 TAPDEBUG("%s waiting for data, minor = %#x\n", 1086 ifp->if_xname, dev2unit(dev)); 1087 ret = 0; 1088 } 1089 1090 return (ret); 1091} /* tapkqread */ 1092 1093 1094/* 1095 * tap_kqwrite 1096 * 1097 * Always can write. Return the MTU in kn->data 1098 */ 1099static int 1100tapkqwrite(struct knote *kn, long hint) 1101{ 1102 struct tap_softc *tp = kn->kn_hook; 1103 struct ifnet *ifp = tp->tap_ifp; 1104 1105 kn->kn_data = ifp->if_mtu; 1106 1107 return (1); 1108} /* tapkqwrite */ 1109 1110 1111static void 1112tapkqdetach(struct knote *kn) 1113{ 1114 struct tap_softc *tp = kn->kn_hook; 1115 1116 knlist_remove(&tp->tap_rsel.si_note, kn, 0); 1117} /* tapkqdetach */ 1118 1119