kern_sendfile.c revision 65770
1220497Smarkm/* 2220497Smarkm * Copyright (c) 1982, 1986, 1989, 1990, 1993 3220497Smarkm * The Regents of the University of California. All rights reserved. 4220497Smarkm * 5220497Smarkm * sendfile(2) and related extensions: 6220497Smarkm * Copyright (c) 1998, David Greenman. All rights reserved. 7220497Smarkm * 8220497Smarkm * Redistribution and use in source and binary forms, with or without 9220497Smarkm * modification, are permitted provided that the following conditions 10220497Smarkm * are met: 11220497Smarkm * 1. Redistributions of source code must retain the above copyright 12220497Smarkm * notice, this list of conditions and the following disclaimer. 13220497Smarkm * 2. Redistributions in binary form must reproduce the above copyright 14220497Smarkm * notice, this list of conditions and the following disclaimer in the 15220497Smarkm * documentation and/or other materials provided with the distribution. 16220497Smarkm * 3. All advertising materials mentioning features or use of this software 17220497Smarkm * must display the following acknowledgement: 18220497Smarkm * This product includes software developed by the University of 19220497Smarkm * California, Berkeley and its contributors. 20220497Smarkm * 4. Neither the name of the University nor the names of its contributors 21220497Smarkm * may be used to endorse or promote products derived from this software 22220497Smarkm * without specific prior written permission. 23220497Smarkm * 24220497Smarkm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25220497Smarkm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26220497Smarkm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27220497Smarkm * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28220497Smarkm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29220497Smarkm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30220497Smarkm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31220497Smarkm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32220497Smarkm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33220497Smarkm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34220497Smarkm * SUCH DAMAGE. 35220497Smarkm * 36220497Smarkm * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37220497Smarkm * $FreeBSD: head/sys/kern/uipc_syscalls.c 65770 2000-09-12 09:49:08Z bp $ 38220497Smarkm */ 39220497Smarkm 40220497Smarkm#include "opt_compat.h" 41220497Smarkm#include "opt_ktrace.h" 42220497Smarkm 43220497Smarkm#include <sys/param.h> 44220497Smarkm#include <sys/systm.h> 45220497Smarkm#include <sys/kernel.h> 46220497Smarkm#include <sys/sysproto.h> 47220497Smarkm#include <sys/malloc.h> 48220497Smarkm#include <sys/filedesc.h> 49220497Smarkm#include <sys/event.h> 50220497Smarkm#include <sys/proc.h> 51220497Smarkm#include <sys/fcntl.h> 52220497Smarkm#include <sys/file.h> 53220497Smarkm#include <sys/mbuf.h> 54220497Smarkm#include <sys/protosw.h> 55220497Smarkm#include <sys/socket.h> 56220497Smarkm#include <sys/socketvar.h> 57220497Smarkm#include <sys/signalvar.h> 58220497Smarkm#include <sys/uio.h> 59220497Smarkm#include <sys/vnode.h> 60220497Smarkm#include <sys/lock.h> 61220497Smarkm#include <sys/mount.h> 62220497Smarkm#ifdef KTRACE 63220497Smarkm#include <sys/ktrace.h> 64220497Smarkm#endif 65220497Smarkm#include <vm/vm.h> 66220497Smarkm#include <vm/vm_object.h> 67220497Smarkm#include <vm/vm_page.h> 68220497Smarkm#include <vm/vm_pageout.h> 69220497Smarkm#include <vm/vm_kern.h> 70220497Smarkm#include <vm/vm_extern.h> 71220497Smarkm 72220497Smarkmstatic void sf_buf_init(void *arg); 73220497SmarkmSYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 74220497Smarkmstatic struct sf_buf *sf_buf_alloc(void); 75220497Smarkmstatic void sf_buf_free(caddr_t addr, void *args); 76220497Smarkm 77220497Smarkmstatic int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags)); 78220497Smarkmstatic int recvit __P((struct proc *p, int s, struct msghdr *mp, 79220497Smarkm caddr_t namelenp)); 80220497Smarkm 81220497Smarkmstatic int accept1 __P((struct proc *p, struct accept_args *uap, int compat)); 82220497Smarkmstatic int getsockname1 __P((struct proc *p, struct getsockname_args *uap, 83220497Smarkm int compat)); 84220497Smarkmstatic int getpeername1 __P((struct proc *p, struct getpeername_args *uap, 85220497Smarkm int compat)); 86220497Smarkm 87220497Smarkmstatic SLIST_HEAD(, sf_buf) sf_freelist; 88220497Smarkmstatic vm_offset_t sf_base; 89220497Smarkmstatic struct sf_buf *sf_bufs; 90220497Smarkmstatic int sf_buf_alloc_want; 91220497Smarkm 92220497Smarkm/* 93220497Smarkm * System call interface to the socket abstraction. 94220497Smarkm */ 95220497Smarkm#if defined(COMPAT_43) || defined(COMPAT_SUNOS) 96220497Smarkm#define COMPAT_OLDSOCK 97220497Smarkm#endif 98220497Smarkm 99220497Smarkmextern struct fileops socketops; 100220497Smarkm 101220497Smarkmint 102220497Smarkmsocket(p, uap) 103220497Smarkm struct proc *p; 104220497Smarkm register struct socket_args /* { 105220497Smarkm int domain; 106220497Smarkm int type; 107220497Smarkm int protocol; 108220497Smarkm } */ *uap; 109220497Smarkm{ 110220497Smarkm struct filedesc *fdp = p->p_fd; 111220497Smarkm struct socket *so; 112220497Smarkm struct file *fp; 113220497Smarkm int fd, error; 114220497Smarkm 115220497Smarkm error = falloc(p, &fp, &fd); 116220497Smarkm if (error) 117220497Smarkm return (error); 118220497Smarkm error = socreate(uap->domain, &so, uap->type, uap->protocol, p); 119220497Smarkm if (error) { 120220497Smarkm fdp->fd_ofiles[fd] = 0; 121220497Smarkm ffree(fp); 122220497Smarkm } else { 123220497Smarkm fp->f_data = (caddr_t)so; 124220497Smarkm fp->f_flag = FREAD|FWRITE; 125220497Smarkm fp->f_ops = &socketops; 126220497Smarkm fp->f_type = DTYPE_SOCKET; 127220497Smarkm p->p_retval[0] = fd; 128220497Smarkm } 129220497Smarkm return (error); 130220497Smarkm} 131220497Smarkm 132220497Smarkm/* ARGSUSED */ 133220497Smarkmint 134220497Smarkmbind(p, uap) 135220497Smarkm struct proc *p; 136220497Smarkm register struct bind_args /* { 137220497Smarkm int s; 138220497Smarkm caddr_t name; 139220497Smarkm int namelen; 140220497Smarkm } */ *uap; 141220497Smarkm{ 142220497Smarkm struct file *fp; 143220497Smarkm struct sockaddr *sa; 144220497Smarkm int error; 145220497Smarkm 146220497Smarkm error = getsock(p->p_fd, uap->s, &fp); 147220497Smarkm if (error) 148220497Smarkm return (error); 149220497Smarkm error = getsockaddr(&sa, uap->name, uap->namelen); 150220497Smarkm if (error) 151220497Smarkm return (error); 152220497Smarkm error = sobind((struct socket *)fp->f_data, sa, p); 153220497Smarkm FREE(sa, M_SONAME); 154220497Smarkm return (error); 155220497Smarkm} 156220497Smarkm 157220497Smarkm/* ARGSUSED */ 158220497Smarkmint 159220497Smarkmlisten(p, uap) 160220497Smarkm struct proc *p; 161220497Smarkm register struct listen_args /* { 162220497Smarkm int s; 163220497Smarkm int backlog; 164220497Smarkm } */ *uap; 165220497Smarkm{ 166220497Smarkm struct file *fp; 167220497Smarkm int error; 168220497Smarkm 169220497Smarkm error = getsock(p->p_fd, uap->s, &fp); 170220497Smarkm if (error) 171220497Smarkm return (error); 172220497Smarkm return (solisten((struct socket *)fp->f_data, uap->backlog, p)); 173220497Smarkm} 174220497Smarkm 175220497Smarkmstatic int 176220497Smarkmaccept1(p, uap, compat) 177220497Smarkm struct proc *p; 178220497Smarkm register struct accept_args /* { 179220497Smarkm int s; 180220497Smarkm caddr_t name; 181220497Smarkm int *anamelen; 182220497Smarkm } */ *uap; 183220497Smarkm int compat; 184220497Smarkm{ 185220497Smarkm struct filedesc *fdp = p->p_fd; 186220497Smarkm struct file *fp; 187220497Smarkm struct sockaddr *sa; 188220497Smarkm int namelen, error, s; 189220497Smarkm struct socket *head, *so; 190220497Smarkm int fd; 191220497Smarkm short fflag; /* type must match fp->f_flag */ 192220497Smarkm 193220497Smarkm if (uap->name) { 194220497Smarkm error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen, 195220497Smarkm sizeof (namelen)); 196220497Smarkm if(error) 197220497Smarkm return (error); 198220497Smarkm } 199220497Smarkm error = getsock(fdp, uap->s, &fp); 200220497Smarkm if (error) 201220497Smarkm return (error); 202220497Smarkm s = splnet(); 203220497Smarkm head = (struct socket *)fp->f_data; 204220497Smarkm if ((head->so_options & SO_ACCEPTCONN) == 0) { 205220497Smarkm splx(s); 206220497Smarkm return (EINVAL); 207220497Smarkm } 208220497Smarkm if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 209220497Smarkm splx(s); 210220497Smarkm return (EWOULDBLOCK); 211220497Smarkm } 212220497Smarkm while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 213220497Smarkm if (head->so_state & SS_CANTRCVMORE) { 214220497Smarkm head->so_error = ECONNABORTED; 215220497Smarkm break; 216220497Smarkm } 217220497Smarkm error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH, 218220497Smarkm "accept", 0); 219220497Smarkm if (error) { 220220497Smarkm splx(s); 221220497Smarkm return (error); 222220497Smarkm } 223220497Smarkm } 224220497Smarkm if (head->so_error) { 225220497Smarkm error = head->so_error; 226220497Smarkm head->so_error = 0; 227220497Smarkm splx(s); 228220497Smarkm return (error); 229220497Smarkm } 230220497Smarkm 231220497Smarkm /* 232220497Smarkm * At this point we know that there is at least one connection 233220497Smarkm * ready to be accepted. Remove it from the queue prior to 234220497Smarkm * allocating the file descriptor for it since falloc() may 235220497Smarkm * block allowing another process to accept the connection 236220497Smarkm * instead. 237220497Smarkm */ 238220497Smarkm so = TAILQ_FIRST(&head->so_comp); 239220497Smarkm TAILQ_REMOVE(&head->so_comp, so, so_list); 240220497Smarkm head->so_qlen--; 241220497Smarkm 242220497Smarkm fflag = fp->f_flag; 243220497Smarkm error = falloc(p, &fp, &fd); 244220497Smarkm if (error) { 245220497Smarkm /* 246220497Smarkm * Probably ran out of file descriptors. Put the 247220497Smarkm * unaccepted connection back onto the queue and 248220497Smarkm * do another wakeup so some other process might 249220497Smarkm * have a chance at it. 250220497Smarkm */ 251220497Smarkm TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 252220497Smarkm head->so_qlen++; 253220497Smarkm wakeup_one(&head->so_timeo); 254220497Smarkm splx(s); 255220497Smarkm return (error); 256220497Smarkm } else 257220497Smarkm p->p_retval[0] = fd; 258220497Smarkm 259220497Smarkm /* connection has been removed from the listen queue */ 260220497Smarkm KNOTE(&head->so_rcv.sb_sel.si_note, 0); 261220497Smarkm 262220497Smarkm so->so_state &= ~SS_COMP; 263220497Smarkm so->so_head = NULL; 264220497Smarkm if (head->so_sigio != NULL) 265220497Smarkm fsetown(fgetown(head->so_sigio), &so->so_sigio); 266220497Smarkm 267220497Smarkm fp->f_data = (caddr_t)so; 268220497Smarkm fp->f_flag = fflag; 269220497Smarkm fp->f_ops = &socketops; 270220497Smarkm fp->f_type = DTYPE_SOCKET; 271220497Smarkm sa = 0; 272220497Smarkm (void) soaccept(so, &sa); 273220497Smarkm if (sa == 0) { 274220497Smarkm namelen = 0; 275220497Smarkm if (uap->name) 276220497Smarkm goto gotnoname; 277220497Smarkm splx(s); 278220497Smarkm return 0; 279220497Smarkm } 280220497Smarkm if (uap->name) { 281220497Smarkm /* check sa_len before it is destroyed */ 282220497Smarkm if (namelen > sa->sa_len) 283220497Smarkm namelen = sa->sa_len; 284220497Smarkm#ifdef COMPAT_OLDSOCK 285220497Smarkm if (compat) 286220497Smarkm ((struct osockaddr *)sa)->sa_family = 287220497Smarkm sa->sa_family; 288220497Smarkm#endif 289220497Smarkm error = copyout(sa, (caddr_t)uap->name, (u_int)namelen); 290220497Smarkm if (!error) 291220497Smarkmgotnoname: 292220497Smarkm error = copyout((caddr_t)&namelen, 293220497Smarkm (caddr_t)uap->anamelen, sizeof (*uap->anamelen)); 294220497Smarkm } 295220497Smarkm if (sa) 296220497Smarkm FREE(sa, M_SONAME); 297220497Smarkm if (error) { 298220497Smarkm fdp->fd_ofiles[fd] = 0; 299220497Smarkm ffree(fp); 300220497Smarkm } 301220497Smarkm splx(s); 302220497Smarkm return (error); 303220497Smarkm} 304220497Smarkm 305220497Smarkmint 306220497Smarkmaccept(p, uap) 307220497Smarkm struct proc *p; 308220497Smarkm struct accept_args *uap; 309220497Smarkm{ 310220497Smarkm 311220497Smarkm return (accept1(p, uap, 0)); 312220497Smarkm} 313220497Smarkm 314220497Smarkm#ifdef COMPAT_OLDSOCK 315220497Smarkmint 316220497Smarkmoaccept(p, uap) 317220497Smarkm struct proc *p; 318220497Smarkm struct accept_args *uap; 319220497Smarkm{ 320220497Smarkm 321220497Smarkm return (accept1(p, uap, 1)); 322220497Smarkm} 323220497Smarkm#endif /* COMPAT_OLDSOCK */ 324220497Smarkm 325220497Smarkm/* ARGSUSED */ 326220497Smarkmint 327220497Smarkmconnect(p, uap) 328220497Smarkm struct proc *p; 329220497Smarkm register struct connect_args /* { 330220497Smarkm int s; 331220497Smarkm caddr_t name; 332220497Smarkm int namelen; 333220497Smarkm } */ *uap; 334220497Smarkm{ 335220497Smarkm struct file *fp; 336220497Smarkm register struct socket *so; 337220497Smarkm struct sockaddr *sa; 338220497Smarkm int error, s; 339220497Smarkm 340220497Smarkm error = getsock(p->p_fd, uap->s, &fp); 341220497Smarkm if (error) 342220497Smarkm return (error); 343220497Smarkm so = (struct socket *)fp->f_data; 344220497Smarkm if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) 345220497Smarkm return (EALREADY); 346220497Smarkm error = getsockaddr(&sa, uap->name, uap->namelen); 347220497Smarkm if (error) 348220497Smarkm return (error); 349220497Smarkm error = soconnect(so, sa, p); 350220497Smarkm if (error) 351220497Smarkm goto bad; 352220497Smarkm if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 353220497Smarkm FREE(sa, M_SONAME); 354220497Smarkm return (EINPROGRESS); 355220497Smarkm } 356220497Smarkm s = splnet(); 357220497Smarkm while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 358220497Smarkm error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, 359220497Smarkm "connec", 0); 360220497Smarkm if (error) 361220497Smarkm break; 362220497Smarkm } 363220497Smarkm if (error == 0) { 364220497Smarkm error = so->so_error; 365220497Smarkm so->so_error = 0; 366220497Smarkm } 367220497Smarkm splx(s); 368220497Smarkmbad: 369220497Smarkm so->so_state &= ~SS_ISCONNECTING; 370220497Smarkm FREE(sa, M_SONAME); 371220497Smarkm if (error == ERESTART) 372220497Smarkm error = EINTR; 373220497Smarkm return (error); 374220497Smarkm} 375220497Smarkm 376220497Smarkmint 377220497Smarkmsocketpair(p, uap) 378220497Smarkm struct proc *p; 379220497Smarkm register struct socketpair_args /* { 380220497Smarkm int domain; 381220497Smarkm int type; 382220497Smarkm int protocol; 383220497Smarkm int *rsv; 384220497Smarkm } */ *uap; 385220497Smarkm{ 386220497Smarkm register struct filedesc *fdp = p->p_fd; 387220497Smarkm struct file *fp1, *fp2; 388220497Smarkm struct socket *so1, *so2; 389220497Smarkm int fd, error, sv[2]; 390220497Smarkm 391220497Smarkm error = socreate(uap->domain, &so1, uap->type, uap->protocol, p); 392220497Smarkm if (error) 393220497Smarkm return (error); 394220497Smarkm error = socreate(uap->domain, &so2, uap->type, uap->protocol, p); 395220497Smarkm if (error) 396220497Smarkm goto free1; 397220497Smarkm error = falloc(p, &fp1, &fd); 398220497Smarkm if (error) 399220497Smarkm goto free2; 400220497Smarkm sv[0] = fd; 401220497Smarkm fp1->f_data = (caddr_t)so1; 402220497Smarkm error = falloc(p, &fp2, &fd); 403220497Smarkm if (error) 404220497Smarkm goto free3; 405220497Smarkm fp2->f_data = (caddr_t)so2; 406220497Smarkm sv[1] = fd; 407220497Smarkm error = soconnect2(so1, so2); 408220497Smarkm if (error) 409220497Smarkm goto free4; 410220497Smarkm if (uap->type == SOCK_DGRAM) { 411220497Smarkm /* 412220497Smarkm * Datagram socket connection is asymmetric. 413220497Smarkm */ 414220497Smarkm error = soconnect2(so2, so1); 415220497Smarkm if (error) 416220497Smarkm goto free4; 417220497Smarkm } 418220497Smarkm fp1->f_flag = fp2->f_flag = FREAD|FWRITE; 419220497Smarkm fp1->f_ops = fp2->f_ops = &socketops; 420220497Smarkm fp1->f_type = fp2->f_type = DTYPE_SOCKET; 421220497Smarkm error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); 422220497Smarkm return (error); 423220497Smarkmfree4: 424220497Smarkm fdp->fd_ofiles[sv[1]] = 0; 425220497Smarkm ffree(fp2); 426220497Smarkmfree3: 427220497Smarkm fdp->fd_ofiles[sv[0]] = 0; 428220497Smarkm ffree(fp1); 429220497Smarkmfree2: 430220497Smarkm (void)soclose(so2); 431220497Smarkmfree1: 432220497Smarkm (void)soclose(so1); 433220497Smarkm return (error); 434220497Smarkm} 435220497Smarkm 436220497Smarkmstatic int 437220497Smarkmsendit(p, s, mp, flags) 438220497Smarkm register struct proc *p; 439220497Smarkm int s; 440220497Smarkm register struct msghdr *mp; 441220497Smarkm int flags; 442220497Smarkm{ 443220497Smarkm struct file *fp; 444220497Smarkm struct uio auio; 445220497Smarkm register struct iovec *iov; 446220497Smarkm register int i; 447220497Smarkm struct mbuf *control; 448220497Smarkm struct sockaddr *to; 449220497Smarkm int len, error; 450220497Smarkm struct socket *so; 451220497Smarkm#ifdef KTRACE 452220497Smarkm struct iovec *ktriov = NULL; 453220497Smarkm struct uio ktruio; 454220497Smarkm#endif 455220497Smarkm 456220497Smarkm error = getsock(p->p_fd, s, &fp); 457220497Smarkm if (error) 458220497Smarkm return (error); 459220497Smarkm auio.uio_iov = mp->msg_iov; 460220497Smarkm auio.uio_iovcnt = mp->msg_iovlen; 461220497Smarkm auio.uio_segflg = UIO_USERSPACE; 462220497Smarkm auio.uio_rw = UIO_WRITE; 463220497Smarkm auio.uio_procp = p; 464220497Smarkm auio.uio_offset = 0; /* XXX */ 465220497Smarkm auio.uio_resid = 0; 466220497Smarkm iov = mp->msg_iov; 467220497Smarkm for (i = 0; i < mp->msg_iovlen; i++, iov++) { 468220497Smarkm if ((auio.uio_resid += iov->iov_len) < 0) 469220497Smarkm return (EINVAL); 470220497Smarkm } 471220497Smarkm if (mp->msg_name) { 472220497Smarkm error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 473220497Smarkm if (error) 474220497Smarkm return (error); 475220497Smarkm } else 476220497Smarkm to = 0; 477220497Smarkm if (mp->msg_control) { 478 if (mp->msg_controllen < sizeof(struct cmsghdr) 479#ifdef COMPAT_OLDSOCK 480 && mp->msg_flags != MSG_COMPAT 481#endif 482 ) { 483 error = EINVAL; 484 goto bad; 485 } 486 error = sockargs(&control, mp->msg_control, 487 mp->msg_controllen, MT_CONTROL); 488 if (error) 489 goto bad; 490#ifdef COMPAT_OLDSOCK 491 if (mp->msg_flags == MSG_COMPAT) { 492 register struct cmsghdr *cm; 493 494 M_PREPEND(control, sizeof(*cm), M_WAIT); 495 if (control == 0) { 496 error = ENOBUFS; 497 goto bad; 498 } else { 499 cm = mtod(control, struct cmsghdr *); 500 cm->cmsg_len = control->m_len; 501 cm->cmsg_level = SOL_SOCKET; 502 cm->cmsg_type = SCM_RIGHTS; 503 } 504 } 505#endif 506 } else 507 control = 0; 508#ifdef KTRACE 509 if (KTRPOINT(p, KTR_GENIO)) { 510 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 511 512 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 513 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 514 ktruio = auio; 515 } 516#endif 517 len = auio.uio_resid; 518 so = (struct socket *)fp->f_data; 519 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 520 flags, p); 521 if (error) { 522 if (auio.uio_resid != len && (error == ERESTART || 523 error == EINTR || error == EWOULDBLOCK)) 524 error = 0; 525 if (error == EPIPE) 526 psignal(p, SIGPIPE); 527 } 528 if (error == 0) 529 p->p_retval[0] = len - auio.uio_resid; 530#ifdef KTRACE 531 if (ktriov != NULL) { 532 if (error == 0) { 533 ktruio.uio_iov = ktriov; 534 ktruio.uio_resid = p->p_retval[0]; 535 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error); 536 } 537 FREE(ktriov, M_TEMP); 538 } 539#endif 540bad: 541 if (to) 542 FREE(to, M_SONAME); 543 return (error); 544} 545 546int 547sendto(p, uap) 548 struct proc *p; 549 register struct sendto_args /* { 550 int s; 551 caddr_t buf; 552 size_t len; 553 int flags; 554 caddr_t to; 555 int tolen; 556 } */ *uap; 557{ 558 struct msghdr msg; 559 struct iovec aiov; 560 561 msg.msg_name = uap->to; 562 msg.msg_namelen = uap->tolen; 563 msg.msg_iov = &aiov; 564 msg.msg_iovlen = 1; 565 msg.msg_control = 0; 566#ifdef COMPAT_OLDSOCK 567 msg.msg_flags = 0; 568#endif 569 aiov.iov_base = uap->buf; 570 aiov.iov_len = uap->len; 571 return (sendit(p, uap->s, &msg, uap->flags)); 572} 573 574#ifdef COMPAT_OLDSOCK 575int 576osend(p, uap) 577 struct proc *p; 578 register struct osend_args /* { 579 int s; 580 caddr_t buf; 581 int len; 582 int flags; 583 } */ *uap; 584{ 585 struct msghdr msg; 586 struct iovec aiov; 587 588 msg.msg_name = 0; 589 msg.msg_namelen = 0; 590 msg.msg_iov = &aiov; 591 msg.msg_iovlen = 1; 592 aiov.iov_base = uap->buf; 593 aiov.iov_len = uap->len; 594 msg.msg_control = 0; 595 msg.msg_flags = 0; 596 return (sendit(p, uap->s, &msg, uap->flags)); 597} 598 599int 600osendmsg(p, uap) 601 struct proc *p; 602 register struct osendmsg_args /* { 603 int s; 604 caddr_t msg; 605 int flags; 606 } */ *uap; 607{ 608 struct msghdr msg; 609 struct iovec aiov[UIO_SMALLIOV], *iov; 610 int error; 611 612 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)); 613 if (error) 614 return (error); 615 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 616 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 617 return (EMSGSIZE); 618 MALLOC(iov, struct iovec *, 619 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 620 M_WAITOK); 621 } else 622 iov = aiov; 623 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 624 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 625 if (error) 626 goto done; 627 msg.msg_flags = MSG_COMPAT; 628 msg.msg_iov = iov; 629 error = sendit(p, uap->s, &msg, uap->flags); 630done: 631 if (iov != aiov) 632 FREE(iov, M_IOV); 633 return (error); 634} 635#endif 636 637int 638sendmsg(p, uap) 639 struct proc *p; 640 register struct sendmsg_args /* { 641 int s; 642 caddr_t msg; 643 int flags; 644 } */ *uap; 645{ 646 struct msghdr msg; 647 struct iovec aiov[UIO_SMALLIOV], *iov; 648 int error; 649 650 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)); 651 if (error) 652 return (error); 653 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 654 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 655 return (EMSGSIZE); 656 MALLOC(iov, struct iovec *, 657 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 658 M_WAITOK); 659 } else 660 iov = aiov; 661 if (msg.msg_iovlen && 662 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 663 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 664 goto done; 665 msg.msg_iov = iov; 666#ifdef COMPAT_OLDSOCK 667 msg.msg_flags = 0; 668#endif 669 error = sendit(p, uap->s, &msg, uap->flags); 670done: 671 if (iov != aiov) 672 FREE(iov, M_IOV); 673 return (error); 674} 675 676static int 677recvit(p, s, mp, namelenp) 678 register struct proc *p; 679 int s; 680 register struct msghdr *mp; 681 caddr_t namelenp; 682{ 683 struct file *fp; 684 struct uio auio; 685 register struct iovec *iov; 686 register int i; 687 int len, error; 688 struct mbuf *m, *control = 0; 689 caddr_t ctlbuf; 690 struct socket *so; 691 struct sockaddr *fromsa = 0; 692#ifdef KTRACE 693 struct iovec *ktriov = NULL; 694 struct uio ktruio; 695#endif 696 697 error = getsock(p->p_fd, s, &fp); 698 if (error) 699 return (error); 700 auio.uio_iov = mp->msg_iov; 701 auio.uio_iovcnt = mp->msg_iovlen; 702 auio.uio_segflg = UIO_USERSPACE; 703 auio.uio_rw = UIO_READ; 704 auio.uio_procp = p; 705 auio.uio_offset = 0; /* XXX */ 706 auio.uio_resid = 0; 707 iov = mp->msg_iov; 708 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 709 if ((auio.uio_resid += iov->iov_len) < 0) 710 return (EINVAL); 711 } 712#ifdef KTRACE 713 if (KTRPOINT(p, KTR_GENIO)) { 714 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 715 716 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 717 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 718 ktruio = auio; 719 } 720#endif 721 len = auio.uio_resid; 722 so = (struct socket *)fp->f_data; 723 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 724 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 725 &mp->msg_flags); 726 if (error) { 727 if (auio.uio_resid != len && (error == ERESTART || 728 error == EINTR || error == EWOULDBLOCK)) 729 error = 0; 730 } 731#ifdef KTRACE 732 if (ktriov != NULL) { 733 if (error == 0) { 734 ktruio.uio_iov = ktriov; 735 ktruio.uio_resid = len - auio.uio_resid; 736 ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error); 737 } 738 FREE(ktriov, M_TEMP); 739 } 740#endif 741 if (error) 742 goto out; 743 p->p_retval[0] = len - auio.uio_resid; 744 if (mp->msg_name) { 745 len = mp->msg_namelen; 746 if (len <= 0 || fromsa == 0) 747 len = 0; 748 else { 749#ifndef MIN 750#define MIN(a,b) ((a)>(b)?(b):(a)) 751#endif 752 /* save sa_len before it is destroyed by MSG_COMPAT */ 753 len = MIN(len, fromsa->sa_len); 754#ifdef COMPAT_OLDSOCK 755 if (mp->msg_flags & MSG_COMPAT) 756 ((struct osockaddr *)fromsa)->sa_family = 757 fromsa->sa_family; 758#endif 759 error = copyout(fromsa, 760 (caddr_t)mp->msg_name, (unsigned)len); 761 if (error) 762 goto out; 763 } 764 mp->msg_namelen = len; 765 if (namelenp && 766 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) { 767#ifdef COMPAT_OLDSOCK 768 if (mp->msg_flags & MSG_COMPAT) 769 error = 0; /* old recvfrom didn't check */ 770 else 771#endif 772 goto out; 773 } 774 } 775 if (mp->msg_control) { 776#ifdef COMPAT_OLDSOCK 777 /* 778 * We assume that old recvmsg calls won't receive access 779 * rights and other control info, esp. as control info 780 * is always optional and those options didn't exist in 4.3. 781 * If we receive rights, trim the cmsghdr; anything else 782 * is tossed. 783 */ 784 if (control && mp->msg_flags & MSG_COMPAT) { 785 if (mtod(control, struct cmsghdr *)->cmsg_level != 786 SOL_SOCKET || 787 mtod(control, struct cmsghdr *)->cmsg_type != 788 SCM_RIGHTS) { 789 mp->msg_controllen = 0; 790 goto out; 791 } 792 control->m_len -= sizeof (struct cmsghdr); 793 control->m_data += sizeof (struct cmsghdr); 794 } 795#endif 796 len = mp->msg_controllen; 797 m = control; 798 mp->msg_controllen = 0; 799 ctlbuf = (caddr_t) mp->msg_control; 800 801 while (m && len > 0) { 802 unsigned int tocopy; 803 804 if (len >= m->m_len) 805 tocopy = m->m_len; 806 else { 807 mp->msg_flags |= MSG_CTRUNC; 808 tocopy = len; 809 } 810 811 if ((error = copyout((caddr_t)mtod(m, caddr_t), 812 ctlbuf, tocopy)) != 0) 813 goto out; 814 815 ctlbuf += tocopy; 816 len -= tocopy; 817 m = m->m_next; 818 } 819 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 820 } 821out: 822 if (fromsa) 823 FREE(fromsa, M_SONAME); 824 if (control) 825 m_freem(control); 826 return (error); 827} 828 829int 830recvfrom(p, uap) 831 struct proc *p; 832 register struct recvfrom_args /* { 833 int s; 834 caddr_t buf; 835 size_t len; 836 int flags; 837 caddr_t from; 838 int *fromlenaddr; 839 } */ *uap; 840{ 841 struct msghdr msg; 842 struct iovec aiov; 843 int error; 844 845 if (uap->fromlenaddr) { 846 error = copyin((caddr_t)uap->fromlenaddr, 847 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)); 848 if (error) 849 return (error); 850 } else 851 msg.msg_namelen = 0; 852 msg.msg_name = uap->from; 853 msg.msg_iov = &aiov; 854 msg.msg_iovlen = 1; 855 aiov.iov_base = uap->buf; 856 aiov.iov_len = uap->len; 857 msg.msg_control = 0; 858 msg.msg_flags = uap->flags; 859 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr)); 860} 861 862#ifdef COMPAT_OLDSOCK 863int 864orecvfrom(p, uap) 865 struct proc *p; 866 struct recvfrom_args *uap; 867{ 868 869 uap->flags |= MSG_COMPAT; 870 return (recvfrom(p, uap)); 871} 872#endif 873 874 875#ifdef COMPAT_OLDSOCK 876int 877orecv(p, uap) 878 struct proc *p; 879 register struct orecv_args /* { 880 int s; 881 caddr_t buf; 882 int len; 883 int flags; 884 } */ *uap; 885{ 886 struct msghdr msg; 887 struct iovec aiov; 888 889 msg.msg_name = 0; 890 msg.msg_namelen = 0; 891 msg.msg_iov = &aiov; 892 msg.msg_iovlen = 1; 893 aiov.iov_base = uap->buf; 894 aiov.iov_len = uap->len; 895 msg.msg_control = 0; 896 msg.msg_flags = uap->flags; 897 return (recvit(p, uap->s, &msg, (caddr_t)0)); 898} 899 900/* 901 * Old recvmsg. This code takes advantage of the fact that the old msghdr 902 * overlays the new one, missing only the flags, and with the (old) access 903 * rights where the control fields are now. 904 */ 905int 906orecvmsg(p, uap) 907 struct proc *p; 908 register struct orecvmsg_args /* { 909 int s; 910 struct omsghdr *msg; 911 int flags; 912 } */ *uap; 913{ 914 struct msghdr msg; 915 struct iovec aiov[UIO_SMALLIOV], *iov; 916 int error; 917 918 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, 919 sizeof (struct omsghdr)); 920 if (error) 921 return (error); 922 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 923 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 924 return (EMSGSIZE); 925 MALLOC(iov, struct iovec *, 926 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 927 M_WAITOK); 928 } else 929 iov = aiov; 930 msg.msg_flags = uap->flags | MSG_COMPAT; 931 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 932 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 933 if (error) 934 goto done; 935 msg.msg_iov = iov; 936 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen); 937 938 if (msg.msg_controllen && error == 0) 939 error = copyout((caddr_t)&msg.msg_controllen, 940 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int)); 941done: 942 if (iov != aiov) 943 FREE(iov, M_IOV); 944 return (error); 945} 946#endif 947 948int 949recvmsg(p, uap) 950 struct proc *p; 951 register struct recvmsg_args /* { 952 int s; 953 struct msghdr *msg; 954 int flags; 955 } */ *uap; 956{ 957 struct msghdr msg; 958 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 959 register int error; 960 961 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg)); 962 if (error) 963 return (error); 964 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 965 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 966 return (EMSGSIZE); 967 MALLOC(iov, struct iovec *, 968 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 969 M_WAITOK); 970 } else 971 iov = aiov; 972#ifdef COMPAT_OLDSOCK 973 msg.msg_flags = uap->flags &~ MSG_COMPAT; 974#else 975 msg.msg_flags = uap->flags; 976#endif 977 uiov = msg.msg_iov; 978 msg.msg_iov = iov; 979 error = copyin((caddr_t)uiov, (caddr_t)iov, 980 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 981 if (error) 982 goto done; 983 error = recvit(p, uap->s, &msg, (caddr_t)0); 984 if (!error) { 985 msg.msg_iov = uiov; 986 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg)); 987 } 988done: 989 if (iov != aiov) 990 FREE(iov, M_IOV); 991 return (error); 992} 993 994/* ARGSUSED */ 995int 996shutdown(p, uap) 997 struct proc *p; 998 register struct shutdown_args /* { 999 int s; 1000 int how; 1001 } */ *uap; 1002{ 1003 struct file *fp; 1004 int error; 1005 1006 error = getsock(p->p_fd, uap->s, &fp); 1007 if (error) 1008 return (error); 1009 return (soshutdown((struct socket *)fp->f_data, uap->how)); 1010} 1011 1012/* ARGSUSED */ 1013int 1014setsockopt(p, uap) 1015 struct proc *p; 1016 register struct setsockopt_args /* { 1017 int s; 1018 int level; 1019 int name; 1020 caddr_t val; 1021 int valsize; 1022 } */ *uap; 1023{ 1024 struct file *fp; 1025 struct sockopt sopt; 1026 int error; 1027 1028 if (uap->val == 0 && uap->valsize != 0) 1029 return (EFAULT); 1030 if (uap->valsize < 0) 1031 return (EINVAL); 1032 1033 error = getsock(p->p_fd, uap->s, &fp); 1034 if (error) 1035 return (error); 1036 1037 sopt.sopt_dir = SOPT_SET; 1038 sopt.sopt_level = uap->level; 1039 sopt.sopt_name = uap->name; 1040 sopt.sopt_val = uap->val; 1041 sopt.sopt_valsize = uap->valsize; 1042 sopt.sopt_p = p; 1043 1044 return (sosetopt((struct socket *)fp->f_data, &sopt)); 1045} 1046 1047/* ARGSUSED */ 1048int 1049getsockopt(p, uap) 1050 struct proc *p; 1051 register struct getsockopt_args /* { 1052 int s; 1053 int level; 1054 int name; 1055 caddr_t val; 1056 int *avalsize; 1057 } */ *uap; 1058{ 1059 int valsize, error; 1060 struct file *fp; 1061 struct sockopt sopt; 1062 1063 error = getsock(p->p_fd, uap->s, &fp); 1064 if (error) 1065 return (error); 1066 if (uap->val) { 1067 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, 1068 sizeof (valsize)); 1069 if (error) 1070 return (error); 1071 if (valsize < 0) 1072 return (EINVAL); 1073 } else 1074 valsize = 0; 1075 1076 sopt.sopt_dir = SOPT_GET; 1077 sopt.sopt_level = uap->level; 1078 sopt.sopt_name = uap->name; 1079 sopt.sopt_val = uap->val; 1080 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1081 sopt.sopt_p = p; 1082 1083 error = sogetopt((struct socket *)fp->f_data, &sopt); 1084 if (error == 0) { 1085 valsize = sopt.sopt_valsize; 1086 error = copyout((caddr_t)&valsize, 1087 (caddr_t)uap->avalsize, sizeof (valsize)); 1088 } 1089 return (error); 1090} 1091 1092/* 1093 * Get socket name. 1094 */ 1095/* ARGSUSED */ 1096static int 1097getsockname1(p, uap, compat) 1098 struct proc *p; 1099 register struct getsockname_args /* { 1100 int fdes; 1101 caddr_t asa; 1102 int *alen; 1103 } */ *uap; 1104 int compat; 1105{ 1106 struct file *fp; 1107 register struct socket *so; 1108 struct sockaddr *sa; 1109 int len, error; 1110 1111 error = getsock(p->p_fd, uap->fdes, &fp); 1112 if (error) 1113 return (error); 1114 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1115 if (error) 1116 return (error); 1117 so = (struct socket *)fp->f_data; 1118 sa = 0; 1119 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1120 if (error) 1121 goto bad; 1122 if (sa == 0) { 1123 len = 0; 1124 goto gotnothing; 1125 } 1126 1127 len = MIN(len, sa->sa_len); 1128#ifdef COMPAT_OLDSOCK 1129 if (compat) 1130 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1131#endif 1132 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1133 if (error == 0) 1134gotnothing: 1135 error = copyout((caddr_t)&len, (caddr_t)uap->alen, 1136 sizeof (len)); 1137bad: 1138 if (sa) 1139 FREE(sa, M_SONAME); 1140 return (error); 1141} 1142 1143int 1144getsockname(p, uap) 1145 struct proc *p; 1146 struct getsockname_args *uap; 1147{ 1148 1149 return (getsockname1(p, uap, 0)); 1150} 1151 1152#ifdef COMPAT_OLDSOCK 1153int 1154ogetsockname(p, uap) 1155 struct proc *p; 1156 struct getsockname_args *uap; 1157{ 1158 1159 return (getsockname1(p, uap, 1)); 1160} 1161#endif /* COMPAT_OLDSOCK */ 1162 1163/* 1164 * Get name of peer for connected socket. 1165 */ 1166/* ARGSUSED */ 1167static int 1168getpeername1(p, uap, compat) 1169 struct proc *p; 1170 register struct getpeername_args /* { 1171 int fdes; 1172 caddr_t asa; 1173 int *alen; 1174 } */ *uap; 1175 int compat; 1176{ 1177 struct file *fp; 1178 register struct socket *so; 1179 struct sockaddr *sa; 1180 int len, error; 1181 1182 error = getsock(p->p_fd, uap->fdes, &fp); 1183 if (error) 1184 return (error); 1185 so = (struct socket *)fp->f_data; 1186 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) 1187 return (ENOTCONN); 1188 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1189 if (error) 1190 return (error); 1191 sa = 0; 1192 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1193 if (error) 1194 goto bad; 1195 if (sa == 0) { 1196 len = 0; 1197 goto gotnothing; 1198 } 1199 len = MIN(len, sa->sa_len); 1200#ifdef COMPAT_OLDSOCK 1201 if (compat) 1202 ((struct osockaddr *)sa)->sa_family = 1203 sa->sa_family; 1204#endif 1205 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1206 if (error) 1207 goto bad; 1208gotnothing: 1209 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); 1210bad: 1211 if (sa) FREE(sa, M_SONAME); 1212 return (error); 1213} 1214 1215int 1216getpeername(p, uap) 1217 struct proc *p; 1218 struct getpeername_args *uap; 1219{ 1220 1221 return (getpeername1(p, uap, 0)); 1222} 1223 1224#ifdef COMPAT_OLDSOCK 1225int 1226ogetpeername(p, uap) 1227 struct proc *p; 1228 struct ogetpeername_args *uap; 1229{ 1230 1231 /* XXX uap should have type `getpeername_args *' to begin with. */ 1232 return (getpeername1(p, (struct getpeername_args *)uap, 1)); 1233} 1234#endif /* COMPAT_OLDSOCK */ 1235 1236int 1237sockargs(mp, buf, buflen, type) 1238 struct mbuf **mp; 1239 caddr_t buf; 1240 int buflen, type; 1241{ 1242 register struct sockaddr *sa; 1243 register struct mbuf *m; 1244 int error; 1245 1246 if ((u_int)buflen > MLEN) { 1247#ifdef COMPAT_OLDSOCK 1248 if (type == MT_SONAME && (u_int)buflen <= 112) 1249 buflen = MLEN; /* unix domain compat. hack */ 1250 else 1251#endif 1252 return (EINVAL); 1253 } 1254 m = m_get(M_WAIT, type); 1255 if (m == NULL) 1256 return (ENOBUFS); 1257 m->m_len = buflen; 1258 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1259 if (error) 1260 (void) m_free(m); 1261 else { 1262 *mp = m; 1263 if (type == MT_SONAME) { 1264 sa = mtod(m, struct sockaddr *); 1265 1266#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1267 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1268 sa->sa_family = sa->sa_len; 1269#endif 1270 sa->sa_len = buflen; 1271 } 1272 } 1273 return (error); 1274} 1275 1276int 1277getsockaddr(namp, uaddr, len) 1278 struct sockaddr **namp; 1279 caddr_t uaddr; 1280 size_t len; 1281{ 1282 struct sockaddr *sa; 1283 int error; 1284 1285 if (len > SOCK_MAXADDRLEN) 1286 return ENAMETOOLONG; 1287 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1288 error = copyin(uaddr, sa, len); 1289 if (error) { 1290 FREE(sa, M_SONAME); 1291 } else { 1292#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1293 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1294 sa->sa_family = sa->sa_len; 1295#endif 1296 sa->sa_len = len; 1297 *namp = sa; 1298 } 1299 return error; 1300} 1301 1302int 1303getsock(fdp, fdes, fpp) 1304 struct filedesc *fdp; 1305 int fdes; 1306 struct file **fpp; 1307{ 1308 register struct file *fp; 1309 1310 if ((unsigned)fdes >= fdp->fd_nfiles || 1311 (fp = fdp->fd_ofiles[fdes]) == NULL) 1312 return (EBADF); 1313 if (fp->f_type != DTYPE_SOCKET) 1314 return (ENOTSOCK); 1315 *fpp = fp; 1316 return (0); 1317} 1318 1319/* 1320 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1321 * XXX - The sf_buf functions are currently private to sendfile(2), so have 1322 * been made static, but may be useful in the future for doing zero-copy in 1323 * other parts of the networking code. 1324 */ 1325static void 1326sf_buf_init(void *arg) 1327{ 1328 int i; 1329 1330 SLIST_INIT(&sf_freelist); 1331 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1332 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT); 1333 bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf)); 1334 for (i = 0; i < nsfbufs; i++) { 1335 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1336 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list); 1337 } 1338} 1339 1340/* 1341 * Get an sf_buf from the freelist. Will block if none are available. 1342 */ 1343static struct sf_buf * 1344sf_buf_alloc() 1345{ 1346 struct sf_buf *sf; 1347 int s; 1348 1349 s = splimp(); 1350 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) { 1351 sf_buf_alloc_want = 1; 1352 tsleep(&sf_freelist, PVM, "sfbufa", 0); 1353 } 1354 SLIST_REMOVE_HEAD(&sf_freelist, free_list); 1355 splx(s); 1356 return (sf); 1357} 1358 1359#define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT]) 1360 1361/* 1362 * 1363 * Detatch mapped page and release resources back to the system. 1364 * 1365 * Must be called at splimp. 1366 */ 1367static void 1368sf_buf_free(caddr_t addr, void *args) 1369{ 1370 struct sf_buf *sf; 1371 struct vm_page *m; 1372 int s; 1373 1374 sf = dtosf(addr); 1375 pmap_qremove((vm_offset_t)addr, 1); 1376 m = sf->m; 1377 s = splvm(); 1378 vm_page_unwire(m, 0); 1379 /* 1380 * Check for the object going away on us. This can 1381 * happen since we don't hold a reference to it. 1382 * If so, we're responsible for freeing the page. 1383 */ 1384 if (m->wire_count == 0 && m->object == NULL) 1385 vm_page_free(m); 1386 splx(s); 1387 sf->m = NULL; 1388 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list); 1389 if (sf_buf_alloc_want) { 1390 sf_buf_alloc_want = 0; 1391 wakeup(&sf_freelist); 1392 } 1393} 1394 1395/* 1396 * sendfile(2). 1397 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1398 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1399 * 1400 * Send a file specified by 'fd' and starting at 'offset' to a socket 1401 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1402 * nbytes == 0. Optionally add a header and/or trailer to the socket 1403 * output. If specified, write the total number of bytes sent into *sbytes. 1404 */ 1405int 1406sendfile(struct proc *p, struct sendfile_args *uap) 1407{ 1408 struct file *fp; 1409 struct filedesc *fdp = p->p_fd; 1410 struct vnode *vp; 1411 struct vm_object *obj; 1412 struct socket *so; 1413 struct mbuf *m; 1414 struct sf_buf *sf; 1415 struct vm_page *pg; 1416 struct writev_args nuap; 1417 struct sf_hdtr hdtr; 1418 off_t off, xfsize, sbytes = 0; 1419 int error = 0, s; 1420 1421 vp = NULL; 1422 /* 1423 * Do argument checking. Must be a regular file in, stream 1424 * type and connected socket out, positive offset. 1425 */ 1426 fp = getfp(fdp, uap->fd, FREAD); 1427 if (fp == NULL) { 1428 error = EBADF; 1429 goto done; 1430 } 1431 if (fp->f_type != DTYPE_VNODE) { 1432 error = EINVAL; 1433 goto done; 1434 } 1435 vp = (struct vnode *)fp->f_data; 1436 vref(vp); 1437 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1438 error = EINVAL; 1439 goto done; 1440 } 1441 error = getsock(p->p_fd, uap->s, &fp); 1442 if (error) 1443 goto done; 1444 so = (struct socket *)fp->f_data; 1445 if (so->so_type != SOCK_STREAM) { 1446 error = EINVAL; 1447 goto done; 1448 } 1449 if ((so->so_state & SS_ISCONNECTED) == 0) { 1450 error = ENOTCONN; 1451 goto done; 1452 } 1453 if (uap->offset < 0) { 1454 error = EINVAL; 1455 goto done; 1456 } 1457 1458 /* 1459 * If specified, get the pointer to the sf_hdtr struct for 1460 * any headers/trailers. 1461 */ 1462 if (uap->hdtr != NULL) { 1463 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1464 if (error) 1465 goto done; 1466 /* 1467 * Send any headers. Wimp out and use writev(2). 1468 */ 1469 if (hdtr.headers != NULL) { 1470 nuap.fd = uap->s; 1471 nuap.iovp = hdtr.headers; 1472 nuap.iovcnt = hdtr.hdr_cnt; 1473 error = writev(p, &nuap); 1474 if (error) 1475 goto done; 1476 sbytes += p->p_retval[0]; 1477 } 1478 } 1479 1480 /* 1481 * Protect against multiple writers to the socket. 1482 */ 1483 (void) sblock(&so->so_snd, M_WAITOK); 1484 1485 /* 1486 * Loop through the pages in the file, starting with the requested 1487 * offset. Get a file page (do I/O if necessary), map the file page 1488 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1489 * it on the socket. 1490 */ 1491 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1492 vm_pindex_t pindex; 1493 vm_offset_t pgoff; 1494 1495 pindex = OFF_TO_IDX(off); 1496retry_lookup: 1497 /* 1498 * Calculate the amount to transfer. Not to exceed a page, 1499 * the EOF, or the passed in nbytes. 1500 */ 1501 xfsize = obj->un_pager.vnp.vnp_size - off; 1502 if (xfsize > PAGE_SIZE) 1503 xfsize = PAGE_SIZE; 1504 pgoff = (vm_offset_t)(off & PAGE_MASK); 1505 if (PAGE_SIZE - pgoff < xfsize) 1506 xfsize = PAGE_SIZE - pgoff; 1507 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1508 xfsize = uap->nbytes - sbytes; 1509 if (xfsize <= 0) 1510 break; 1511 /* 1512 * Optimize the non-blocking case by looking at the socket space 1513 * before going to the extra work of constituting the sf_buf. 1514 */ 1515 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1516 if (so->so_state & SS_CANTSENDMORE) 1517 error = EPIPE; 1518 else 1519 error = EAGAIN; 1520 sbunlock(&so->so_snd); 1521 goto done; 1522 } 1523 /* 1524 * Attempt to look up the page. 1525 * 1526 * Allocate if not found 1527 * 1528 * Wait and loop if busy. 1529 */ 1530 pg = vm_page_lookup(obj, pindex); 1531 1532 if (pg == NULL) { 1533 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1534 if (pg == NULL) { 1535 VM_WAIT; 1536 goto retry_lookup; 1537 } 1538 vm_page_wakeup(pg); 1539 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1540 goto retry_lookup; 1541 } 1542 1543 /* 1544 * Wire the page so it does not get ripped out from under 1545 * us. 1546 */ 1547 1548 vm_page_wire(pg); 1549 1550 /* 1551 * If page is not valid for what we need, initiate I/O 1552 */ 1553 1554 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1555 struct uio auio; 1556 struct iovec aiov; 1557 int bsize; 1558 1559 /* 1560 * Ensure that our page is still around when the I/O 1561 * completes. 1562 */ 1563 vm_page_io_start(pg); 1564 1565 /* 1566 * Get the page from backing store. 1567 */ 1568 bsize = vp->v_mount->mnt_stat.f_iosize; 1569 auio.uio_iov = &aiov; 1570 auio.uio_iovcnt = 1; 1571 aiov.iov_base = 0; 1572 aiov.iov_len = MAXBSIZE; 1573 auio.uio_resid = MAXBSIZE; 1574 auio.uio_offset = trunc_page(off); 1575 auio.uio_segflg = UIO_NOCOPY; 1576 auio.uio_rw = UIO_READ; 1577 auio.uio_procp = p; 1578 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p); 1579 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16), 1580 p->p_ucred); 1581 VOP_UNLOCK(vp, 0, p); 1582 vm_page_flag_clear(pg, PG_ZERO); 1583 vm_page_io_finish(pg); 1584 if (error) { 1585 vm_page_unwire(pg, 0); 1586 /* 1587 * See if anyone else might know about this page. 1588 * If not and it is not valid, then free it. 1589 */ 1590 if (pg->wire_count == 0 && pg->valid == 0 && 1591 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1592 pg->hold_count == 0) 1593 vm_page_free(pg); 1594 sbunlock(&so->so_snd); 1595 goto done; 1596 } 1597 } 1598 1599 /* 1600 * Allocate a kernel virtual page and insert the physical page 1601 * into it. 1602 */ 1603 1604 sf = sf_buf_alloc(); 1605 sf->m = pg; 1606 pmap_qenter(sf->kva, &pg, 1); 1607 /* 1608 * Get an mbuf header and set it up as having external storage. 1609 */ 1610 MGETHDR(m, M_WAIT, MT_DATA); 1611 if (m == NULL) { 1612 error = ENOBUFS; 1613 goto done; 1614 } 1615 /* 1616 * Setup external storage for mbuf. 1617 */ 1618 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL); 1619 m->m_data = (char *) sf->kva + pgoff; 1620 m->m_pkthdr.len = m->m_len = xfsize; 1621 /* 1622 * Add the buffer to the socket buffer chain. 1623 */ 1624 s = splnet(); 1625retry_space: 1626 /* 1627 * Make sure that the socket is still able to take more data. 1628 * CANTSENDMORE being true usually means that the connection 1629 * was closed. so_error is true when an error was sensed after 1630 * a previous send. 1631 * The state is checked after the page mapping and buffer 1632 * allocation above since those operations may block and make 1633 * any socket checks stale. From this point forward, nothing 1634 * blocks before the pru_send (or more accurately, any blocking 1635 * results in a loop back to here to re-check). 1636 */ 1637 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1638 if (so->so_state & SS_CANTSENDMORE) { 1639 error = EPIPE; 1640 } else { 1641 error = so->so_error; 1642 so->so_error = 0; 1643 } 1644 m_freem(m); 1645 sbunlock(&so->so_snd); 1646 splx(s); 1647 goto done; 1648 } 1649 /* 1650 * Wait for socket space to become available. We do this just 1651 * after checking the connection state above in order to avoid 1652 * a race condition with sbwait(). 1653 */ 1654 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1655 if (so->so_state & SS_NBIO) { 1656 m_freem(m); 1657 sbunlock(&so->so_snd); 1658 splx(s); 1659 error = EAGAIN; 1660 goto done; 1661 } 1662 error = sbwait(&so->so_snd); 1663 /* 1664 * An error from sbwait usually indicates that we've 1665 * been interrupted by a signal. If we've sent anything 1666 * then return bytes sent, otherwise return the error. 1667 */ 1668 if (error) { 1669 m_freem(m); 1670 sbunlock(&so->so_snd); 1671 splx(s); 1672 goto done; 1673 } 1674 goto retry_space; 1675 } 1676 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p); 1677 splx(s); 1678 if (error) { 1679 sbunlock(&so->so_snd); 1680 goto done; 1681 } 1682 } 1683 sbunlock(&so->so_snd); 1684 1685 /* 1686 * Send trailers. Wimp out and use writev(2). 1687 */ 1688 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1689 nuap.fd = uap->s; 1690 nuap.iovp = hdtr.trailers; 1691 nuap.iovcnt = hdtr.trl_cnt; 1692 error = writev(p, &nuap); 1693 if (error) 1694 goto done; 1695 sbytes += p->p_retval[0]; 1696 } 1697 1698done: 1699 if (uap->sbytes != NULL) { 1700 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1701 } 1702 if (vp) 1703 vrele(vp); 1704 return (error); 1705} 1706