if_tun.c revision 32776
1233203Stijl/* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */ 2233203Stijl 3233203Stijl/* 4233203Stijl * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk> 5233203Stijl * Nottingham University 1987. 6233203Stijl * 7233203Stijl * This source may be freely distributed, however I would be interested 8233203Stijl * in any changes that are made. 9233203Stijl * 10233203Stijl * This driver takes packets off the IP i/f and hands them up to a 11233203Stijl * user process to have it's wicked way with. This driver has it's 12233203Stijl * roots in a similar driver written by Phil Cockcroft (formerly) at 13233203Stijl * UCL. This driver is based much more on read/write/poll mode of 14233203Stijl * operation though. 15233203Stijl */ 16233203Stijl 17233203Stijl#include "tun.h" 18233203Stijl#if NTUN > 0 19233203Stijl 20233203Stijl#include "opt_devfs.h" 21233203Stijl#include "opt_inet.h" 22233203Stijl 23233203Stijl#include <sys/param.h> 24233203Stijl#include <sys/proc.h> 25233203Stijl#include <sys/systm.h> 26233203Stijl#include <sys/mbuf.h> 27233203Stijl#include <sys/socket.h> 28233203Stijl#include <sys/filio.h> 29233203Stijl#include <sys/sockio.h> 30233203Stijl#include <sys/ttycom.h> 31233203Stijl#include <sys/poll.h> 32233203Stijl#include <sys/signalvar.h> 33233203Stijl#include <sys/kernel.h> 34233203Stijl#include <sys/sysctl.h> 35233203Stijl#ifdef DEVFS 36233203Stijl#include <sys/devfsext.h> 37233203Stijl#endif /*DEVFS*/ 38233203Stijl#include <sys/conf.h> 39233203Stijl#include <sys/uio.h> 40233203Stijl/* 41233203Stijl * XXX stop <sys/vnode.h> from including <vnode_if.h>. <vnode_if.h> doesn't 42233203Stijl * exist if we are an LKM. 43233203Stijl */ 44233203Stijl#undef KERNEL 45233203Stijl#include <sys/vnode.h> 46233203Stijl#define KERNEL 47233203Stijl 48233203Stijl#include <net/if.h> 49233203Stijl#include <net/netisr.h> 50233203Stijl#include <net/route.h> 51233203Stijl 52233203Stijl#ifdef INET 53233203Stijl#include <netinet/in.h> 54233203Stijl#include <netinet/in_var.h> 55233203Stijl#endif 56233203Stijl 57233203Stijl#ifdef NS 58233203Stijl#include <netns/ns.h> 59233203Stijl#include <netns/ns_if.h> 60233203Stijl#endif 61233203Stijl 62233203Stijl#include "bpfilter.h" 63233203Stijl#if NBPFILTER > 0 64233203Stijl#include <net/bpf.h> 65233203Stijl#endif 66233203Stijl 67233203Stijl#include <net/if_tunvar.h> 68233203Stijl#include <net/if_tun.h> 69233203Stijl 70233203Stijlstatic void tunattach __P((void *)); 71233203StijlPSEUDO_SET(tunattach, if_tun); 72233203Stijl 73233203Stijl#define TUNDEBUG if (tundebug) printf 74233203Stijlstatic int tundebug = 0; 75233203StijlSYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); 76233203Stijl 77233203Stijlstatic struct tun_softc tunctl[NTUN]; 78233203Stijl 79233203Stijlstatic int tunoutput __P((struct ifnet *, struct mbuf *, struct sockaddr *, 80233203Stijl struct rtentry *rt)); 81233203Stijlstatic int tunifioctl __P((struct ifnet *, int, caddr_t)); 82233203Stijlstatic int tuninit __P((int)); 83233203Stijl 84233203Stijlstatic d_open_t tunopen; 85233203Stijlstatic d_close_t tunclose; 86233203Stijlstatic d_read_t tunread; 87233203Stijlstatic d_write_t tunwrite; 88233203Stijlstatic d_ioctl_t tunioctl; 89233203Stijlstatic d_poll_t tunpoll; 90233203Stijl 91233203Stijl#define CDEV_MAJOR 52 92233203Stijlstatic struct cdevsw tun_cdevsw = { 93233203Stijl tunopen, tunclose, tunread, tunwrite, 94233203Stijl tunioctl, nullstop, noreset, nodevtotty, 95233203Stijl tunpoll, nommap, nostrategy, "tun", NULL, -1 96233203Stijl}; 97233203Stijl 98233203Stijl 99233203Stijlstatic tun_devsw_installed = 0; 100233203Stijl#ifdef DEVFS 101233203Stijlstatic void *tun_devfs_token[NTUN]; 102233203Stijl#endif 103233203Stijl 104233203Stijlstatic void 105233203Stijltunattach(dummy) 106233203Stijl void *dummy; 107233203Stijl{ 108233203Stijl register int i; 109233203Stijl struct ifnet *ifp; 110233203Stijl dev_t dev; 111233203Stijl 112233203Stijl if ( tun_devsw_installed ) 113233203Stijl return; 114233203Stijl dev = makedev(CDEV_MAJOR, 0); 115233203Stijl cdevsw_add(&dev, &tun_cdevsw, NULL); 116233203Stijl tun_devsw_installed = 1; 117233203Stijl for ( i = 0; i < NTUN; i++ ) { 118233203Stijl#ifdef DEVFS 119233203Stijl tun_devfs_token[i] = devfs_add_devswf(&tun_cdevsw, i, DV_CHR, 120233203Stijl UID_UUCP, GID_DIALER, 121233203Stijl 0600, "tun%d", i); 122233203Stijl#endif 123233203Stijl tunctl[i].tun_flags = TUN_INITED; 124233203Stijl 125233203Stijl ifp = &tunctl[i].tun_if; 126233203Stijl ifp->if_unit = i; 127233203Stijl ifp->if_name = "tun"; 128233203Stijl ifp->if_mtu = TUNMTU; 129233203Stijl ifp->if_ioctl = tunifioctl; 130233203Stijl ifp->if_output = tunoutput; 131233203Stijl ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; 132233203Stijl ifp->if_snd.ifq_maxlen = ifqmaxlen; 133233203Stijl ifp->if_collisions = 0; 134233203Stijl ifp->if_ierrors = 0; 135233203Stijl ifp->if_oerrors = 0; 136233203Stijl ifp->if_ipackets = 0; 137233203Stijl ifp->if_opackets = 0; 138233203Stijl ifp->if_ibytes = 0; 139233203Stijl ifp->if_obytes = 0; 140233203Stijl if_attach(ifp); 141233203Stijl#if NBPFILTER > 0 142233203Stijl bpfattach(ifp, DLT_NULL, sizeof(u_int)); 143233203Stijl#endif 144233203Stijl } 145233203Stijl} 146233203Stijl 147233203Stijl/* 148233203Stijl * tunnel open - must be superuser & the device must be 149233203Stijl * configured in 150233203Stijl */ 151233203Stijlstatic int 152233203Stijltunopen(dev, flag, mode, p) 153233203Stijl dev_t dev; 154233203Stijl int flag, mode; 155233203Stijl struct proc *p; 156233203Stijl{ 157233203Stijl struct ifnet *ifp; 158233203Stijl struct tun_softc *tp; 159233203Stijl register int unit, error; 160233203Stijl 161233203Stijl error = suser(p->p_ucred, &p->p_acflag); 162233203Stijl if (error) 163233203Stijl return (error); 164233203Stijl 165233203Stijl if ((unit = minor(dev)) >= NTUN) 166233203Stijl return (ENXIO); 167233203Stijl tp = &tunctl[unit]; 168233203Stijl if (tp->tun_flags & TUN_OPEN) 169233203Stijl return EBUSY; 170233203Stijl ifp = &tp->tun_if; 171233203Stijl tp->tun_flags |= TUN_OPEN; 172233203Stijl TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit); 173233203Stijl return (0); 174233203Stijl} 175233203Stijl 176233203Stijl/* 177233203Stijl * tunclose - close the device - mark i/f down & delete 178233203Stijl * routing info 179233203Stijl */ 180233203Stijlstatic int 181233203Stijltunclose(dev, foo, bar, p) 182233203Stijl dev_t dev; 183233203Stijl int foo; 184233203Stijl int bar; 185233203Stijl struct proc *p; 186233203Stijl{ 187233203Stijl register int unit = minor(dev), s; 188233203Stijl struct tun_softc *tp = &tunctl[unit]; 189233203Stijl struct ifnet *ifp = &tp->tun_if; 190233203Stijl struct mbuf *m; 191233203Stijl 192233203Stijl tp->tun_flags &= ~TUN_OPEN; 193233203Stijl 194233203Stijl /* 195233203Stijl * junk all pending output 196233203Stijl */ 197233203Stijl do { 198233203Stijl s = splimp(); 199233203Stijl IF_DEQUEUE(&ifp->if_snd, m); 200233203Stijl splx(s); 201233203Stijl if (m) 202233203Stijl m_freem(m); 203233203Stijl } while (m); 204233203Stijl 205233203Stijl if (ifp->if_flags & IFF_UP) { 206233203Stijl s = splimp(); 207233203Stijl if_down(ifp); 208233203Stijl if (ifp->if_flags & IFF_RUNNING) { 209233203Stijl /* find internet addresses and delete routes */ 210233203Stijl register struct ifaddr *ifa; 211233203Stijl for (ifa = ifp->if_addrhead.tqh_first; ifa; 212233203Stijl ifa = ifa->ifa_link.tqe_next) { 213233203Stijl if (ifa->ifa_addr->sa_family == AF_INET) { 214233203Stijl rtinit(ifa, (int)RTM_DELETE, 215233203Stijl tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0); 216233203Stijl } 217233203Stijl } 218233203Stijl } 219233613Sjhb splx(s); 220255040Sgibbs } 221233203Stijl tp->tun_pgrp = 0; 222233203Stijl selwakeup(&tp->tun_rsel); 223233203Stijl 224233203Stijl TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit); 225233203Stijl return (0); 226233203Stijl} 227233203Stijl 228233203Stijlstatic int 229233203Stijltuninit(unit) 230233203Stijl int unit; 231233203Stijl{ 232233203Stijl struct tun_softc *tp = &tunctl[unit]; 233233203Stijl struct ifnet *ifp = &tp->tun_if; 234233203Stijl register struct ifaddr *ifa; 235233203Stijl 236233203Stijl TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit); 237233203Stijl 238233203Stijl ifp->if_flags |= IFF_UP | IFF_RUNNING; 239233203Stijl microtime(&ifp->if_lastchange); 240233203Stijl 241233203Stijl for (ifa = ifp->if_addrhead.tqh_first; ifa; 242233203Stijl ifa = ifa->ifa_link.tqe_next) { 243233203Stijl#ifdef INET 244233203Stijl if (ifa->ifa_addr->sa_family == AF_INET) { 245233203Stijl struct sockaddr_in *si; 246233203Stijl 247233203Stijl si = (struct sockaddr_in *)ifa->ifa_addr; 248233203Stijl if (si && si->sin_addr.s_addr) 249233203Stijl tp->tun_flags |= TUN_IASET; 250233203Stijl 251233203Stijl si = (struct sockaddr_in *)ifa->ifa_dstaddr; 252233203Stijl if (si && si->sin_addr.s_addr) 253233203Stijl tp->tun_flags |= TUN_DSTADDR; 254233203Stijl } 255233203Stijl#endif 256233203Stijl } 257233203Stijl return 0; 258233203Stijl} 259233203Stijl 260233203Stijl/* 261233203Stijl * Process an ioctl request. 262233203Stijl */ 263233203Stijlint 264233203Stijltunifioctl(ifp, cmd, data) 265233203Stijl struct ifnet *ifp; 266233203Stijl int cmd; 267233203Stijl caddr_t data; 268233203Stijl{ 269233203Stijl register struct ifreq *ifr = (struct ifreq *)data; 270233203Stijl int error = 0, s; 271233203Stijl 272233203Stijl s = splimp(); 273233203Stijl switch(cmd) { 274233203Stijl case SIOCSIFADDR: 275233203Stijl tuninit(ifp->if_unit); 276233203Stijl TUNDEBUG("%s%d: address set\n", 277233203Stijl ifp->if_name, ifp->if_unit); 278233203Stijl break; 279233203Stijl case SIOCSIFDSTADDR: 280233203Stijl tuninit(ifp->if_unit); 281233203Stijl TUNDEBUG("%s%d: destination address set\n", 282233203Stijl ifp->if_name, ifp->if_unit); 283233203Stijl break; 284233203Stijl case SIOCSIFMTU: 285233203Stijl ifp->if_mtu = ifr->ifr_mtu; 286233203Stijl TUNDEBUG("%s%d: mtu set\n", 287233203Stijl ifp->if_name, ifp->if_unit); 288 break; 289 case SIOCADDMULTI: 290 case SIOCDELMULTI: 291 break; 292 293 294 default: 295 error = EINVAL; 296 } 297 splx(s); 298 return (error); 299} 300 301/* 302 * tunoutput - queue packets from higher level ready to put out. 303 */ 304int 305tunoutput(ifp, m0, dst, rt) 306 struct ifnet *ifp; 307 struct mbuf *m0; 308 struct sockaddr *dst; 309 struct rtentry *rt; 310{ 311 struct tun_softc *tp = &tunctl[ifp->if_unit]; 312 struct proc *p; 313 int s; 314 315 TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit); 316 317 if ((tp->tun_flags & TUN_READY) != TUN_READY) { 318 TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name, 319 ifp->if_unit, tp->tun_flags); 320 m_freem (m0); 321 return EHOSTDOWN; 322 } 323 324#if NBPFILTER > 0 325 /* BPF write needs to be handled specially */ 326 if (dst->sa_family == AF_UNSPEC) { 327 dst->sa_family = *(mtod(m0, int *)); 328 m0->m_len -= sizeof(int); 329 m0->m_pkthdr.len -= sizeof(int); 330 m0->m_data += sizeof(int); 331 } 332 333 if (ifp->if_bpf) { 334 /* 335 * We need to prepend the address family as 336 * a four byte field. Cons up a dummy header 337 * to pacify bpf. This is safe because bpf 338 * will only read from the mbuf (i.e., it won't 339 * try to free it or keep a pointer to it). 340 */ 341 struct mbuf m; 342 u_int af = dst->sa_family; 343 344 m.m_next = m0; 345 m.m_len = 4; 346 m.m_data = (char *)⁡ 347 348 bpf_mtap(ifp, &m); 349 } 350#endif 351 352 switch(dst->sa_family) { 353#ifdef INET 354 case AF_INET: 355 s = splimp(); 356 if (IF_QFULL(&ifp->if_snd)) { 357 IF_DROP(&ifp->if_snd); 358 m_freem(m0); 359 splx(s); 360 ifp->if_collisions++; 361 return (ENOBUFS); 362 } 363 ifp->if_obytes += m0->m_pkthdr.len; 364 IF_ENQUEUE(&ifp->if_snd, m0); 365 splx(s); 366 ifp->if_opackets++; 367 break; 368#endif 369 default: 370 m_freem(m0); 371 return EAFNOSUPPORT; 372 } 373 374 if (tp->tun_flags & TUN_RWAIT) { 375 tp->tun_flags &= ~TUN_RWAIT; 376 wakeup((caddr_t)tp); 377 } 378 if (tp->tun_flags & TUN_ASYNC && tp->tun_pgrp) { 379 if (tp->tun_pgrp > 0) 380 gsignal(tp->tun_pgrp, SIGIO); 381 else if ((p = pfind(-tp->tun_pgrp)) != 0) 382 psignal(p, SIGIO); 383 } 384 selwakeup(&tp->tun_rsel); 385 return 0; 386} 387 388/* 389 * the cdevsw interface is now pretty minimal. 390 */ 391static int 392tunioctl(dev, cmd, data, flag, p) 393 dev_t dev; 394 int cmd; 395 caddr_t data; 396 int flag; 397 struct proc *p; 398{ 399 int unit = minor(dev), s; 400 struct tun_softc *tp = &tunctl[unit]; 401 struct tuninfo *tunp; 402 403 switch (cmd) { 404 case TUNSIFINFO: 405 tunp = (struct tuninfo *)data; 406 tp->tun_if.if_mtu = tunp->mtu; 407 tp->tun_if.if_type = tunp->type; 408 tp->tun_if.if_baudrate = tunp->baudrate; 409 break; 410 case TUNGIFINFO: 411 tunp = (struct tuninfo *)data; 412 tunp->mtu = tp->tun_if.if_mtu; 413 tunp->type = tp->tun_if.if_type; 414 tunp->baudrate = tp->tun_if.if_baudrate; 415 break; 416 case TUNSDEBUG: 417 tundebug = *(int *)data; 418 break; 419 case TUNGDEBUG: 420 *(int *)data = tundebug; 421 break; 422 case FIONBIO: 423 break; 424 case FIOASYNC: 425 if (*(int *)data) 426 tp->tun_flags |= TUN_ASYNC; 427 else 428 tp->tun_flags &= ~TUN_ASYNC; 429 break; 430 case FIONREAD: 431 s = splimp(); 432 if (tp->tun_if.if_snd.ifq_head) { 433 struct mbuf *mb = tp->tun_if.if_snd.ifq_head; 434 for( *(int *)data = 0; mb != 0; mb = mb->m_next) 435 *(int *)data += mb->m_len; 436 } else 437 *(int *)data = 0; 438 splx(s); 439 break; 440 case TIOCSPGRP: 441 tp->tun_pgrp = *(int *)data; 442 break; 443 case TIOCGPGRP: 444 *(int *)data = tp->tun_pgrp; 445 break; 446 default: 447 return (ENOTTY); 448 } 449 return (0); 450} 451 452/* 453 * The cdevsw read interface - reads a packet at a time, or at 454 * least as much of a packet as can be read. 455 */ 456static int 457tunread(dev, uio, flag) 458 dev_t dev; 459 struct uio *uio; 460 int flag; 461{ 462 int unit = minor(dev); 463 struct tun_softc *tp = &tunctl[unit]; 464 struct ifnet *ifp = &tp->tun_if; 465 struct mbuf *m, *m0; 466 int error=0, len, s; 467 468 TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit); 469 if ((tp->tun_flags & TUN_READY) != TUN_READY) { 470 TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name, 471 ifp->if_unit, tp->tun_flags); 472 return EHOSTDOWN; 473 } 474 475 tp->tun_flags &= ~TUN_RWAIT; 476 477 s = splimp(); 478 do { 479 IF_DEQUEUE(&ifp->if_snd, m0); 480 if (m0 == 0) { 481 if (flag & IO_NDELAY) { 482 splx(s); 483 return EWOULDBLOCK; 484 } 485 tp->tun_flags |= TUN_RWAIT; 486 if( error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1), 487 "tunread", 0)) { 488 splx(s); 489 return error; 490 } 491 } 492 } while (m0 == 0); 493 splx(s); 494 495 while (m0 && uio->uio_resid > 0 && error == 0) { 496 len = min(uio->uio_resid, m0->m_len); 497 if (len == 0) 498 break; 499 error = uiomove(mtod(m0, caddr_t), len, uio); 500 MFREE(m0, m); 501 m0 = m; 502 } 503 504 if (m0) { 505 TUNDEBUG("Dropping mbuf\n"); 506 m_freem(m0); 507 } 508 return error; 509} 510 511/* 512 * the cdevsw write interface - an atomic write is a packet - or else! 513 */ 514static int 515tunwrite(dev, uio, flag) 516 dev_t dev; 517 struct uio *uio; 518 int flag; 519{ 520 int unit = minor (dev); 521 struct ifnet *ifp = &tunctl[unit].tun_if; 522 struct mbuf *top, **mp, *m; 523 int error=0, s, tlen, mlen; 524 525 TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit); 526 527 if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) { 528 TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit, 529 uio->uio_resid); 530 return EIO; 531 } 532 tlen = uio->uio_resid; 533 534 /* get a header mbuf */ 535 MGETHDR(m, M_DONTWAIT, MT_DATA); 536 if (m == NULL) 537 return ENOBUFS; 538 mlen = MHLEN; 539 540 top = 0; 541 mp = ⊤ 542 while (error == 0 && uio->uio_resid > 0) { 543 m->m_len = min(mlen, uio->uio_resid); 544 error = uiomove(mtod (m, caddr_t), m->m_len, uio); 545 *mp = m; 546 mp = &m->m_next; 547 if (uio->uio_resid > 0) { 548 MGET (m, M_DONTWAIT, MT_DATA); 549 if (m == 0) { 550 error = ENOBUFS; 551 break; 552 } 553 mlen = MLEN; 554 } 555 } 556 if (error) { 557 if (top) 558 m_freem (top); 559 return error; 560 } 561 562 top->m_pkthdr.len = tlen; 563 top->m_pkthdr.rcvif = ifp; 564 565#if NBPFILTER > 0 566 if (ifp->if_bpf) { 567 /* 568 * We need to prepend the address family as 569 * a four byte field. Cons up a dummy header 570 * to pacify bpf. This is safe because bpf 571 * will only read from the mbuf (i.e., it won't 572 * try to free it or keep a pointer to it). 573 */ 574 struct mbuf m; 575 u_int af = AF_INET; 576 577 m.m_next = top; 578 m.m_len = 4; 579 m.m_data = (char *)⁡ 580 581 bpf_mtap(ifp, &m); 582 } 583#endif 584 585#ifdef INET 586 s = splimp(); 587 if (IF_QFULL (&ipintrq)) { 588 IF_DROP(&ipintrq); 589 splx(s); 590 ifp->if_collisions++; 591 m_freem(top); 592 return ENOBUFS; 593 } 594 IF_ENQUEUE(&ipintrq, top); 595 splx(s); 596 ifp->if_ibytes += tlen; 597 ifp->if_ipackets++; 598 schednetisr(NETISR_IP); 599#endif 600 return error; 601} 602 603/* 604 * tunpoll - the poll interface, this is only useful on reads 605 * really. The write detect always returns true, write never blocks 606 * anyway, it either accepts the packet or drops it. 607 */ 608static int 609tunpoll(dev, events, p) 610 dev_t dev; 611 int events; 612 struct proc *p; 613{ 614 int unit = minor(dev), s; 615 struct tun_softc *tp = &tunctl[unit]; 616 struct ifnet *ifp = &tp->tun_if; 617 int revents = 0; 618 619 s = splimp(); 620 TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit); 621 622 if (events & (POLLIN | POLLRDNORM)) 623 if (ifp->if_snd.ifq_len > 0) { 624 TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name, 625 ifp->if_unit, ifp->if_snd.ifq_len); 626 revents |= events & (POLLIN | POLLRDNORM); 627 } else { 628 TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name, 629 ifp->if_unit); 630 selrecord(p, &tp->tun_rsel); 631 } 632 633 if (events & (POLLOUT | POLLWRNORM)) 634 revents |= events & (POLLOUT | POLLWRNORM); 635 636 splx(s); 637 return (revents); 638} 639 640 641#endif /* NTUN */ 642