Deleted Added
full compact
sctp_syscalls.c (62378) sctp_syscalls.c (64837)
1/*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
1/*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
37 * $FreeBSD: head/sys/kern/uipc_syscalls.c 62378 2000-07-02 08:08:09Z green $
37 * $FreeBSD: head/sys/kern/uipc_syscalls.c 64837 2000-08-19 08:32:59Z dwmalone $
38 */
39
40#include "opt_compat.h"
41#include "opt_ktrace.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/sysproto.h>
47#include <sys/malloc.h>
48#include <sys/filedesc.h>
49#include <sys/event.h>
50#include <sys/proc.h>
51#include <sys/fcntl.h>
52#include <sys/file.h>
53#include <sys/mbuf.h>
54#include <sys/protosw.h>
55#include <sys/socket.h>
56#include <sys/socketvar.h>
57#include <sys/signalvar.h>
58#include <sys/uio.h>
59#include <sys/vnode.h>
60#include <sys/lock.h>
61#include <sys/mount.h>
62#ifdef KTRACE
63#include <sys/ktrace.h>
64#endif
65#include <vm/vm.h>
66#include <vm/vm_object.h>
67#include <vm/vm_page.h>
68#include <vm/vm_pageout.h>
69#include <vm/vm_kern.h>
70#include <vm/vm_extern.h>
71
72static void sf_buf_init(void *arg);
73SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
74static struct sf_buf *sf_buf_alloc(void);
38 */
39
40#include "opt_compat.h"
41#include "opt_ktrace.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/sysproto.h>
47#include <sys/malloc.h>
48#include <sys/filedesc.h>
49#include <sys/event.h>
50#include <sys/proc.h>
51#include <sys/fcntl.h>
52#include <sys/file.h>
53#include <sys/mbuf.h>
54#include <sys/protosw.h>
55#include <sys/socket.h>
56#include <sys/socketvar.h>
57#include <sys/signalvar.h>
58#include <sys/uio.h>
59#include <sys/vnode.h>
60#include <sys/lock.h>
61#include <sys/mount.h>
62#ifdef KTRACE
63#include <sys/ktrace.h>
64#endif
65#include <vm/vm.h>
66#include <vm/vm_object.h>
67#include <vm/vm_page.h>
68#include <vm/vm_pageout.h>
69#include <vm/vm_kern.h>
70#include <vm/vm_extern.h>
71
72static void sf_buf_init(void *arg);
73SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
74static struct sf_buf *sf_buf_alloc(void);
75static void sf_buf_ref(caddr_t addr, u_int size);
76static void sf_buf_free(caddr_t addr, u_int size);
75static void sf_buf_free(caddr_t addr, void *args);
77
78static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags));
79static int recvit __P((struct proc *p, int s, struct msghdr *mp,
80 caddr_t namelenp));
81
82static int accept1 __P((struct proc *p, struct accept_args *uap, int compat));
83static int getsockname1 __P((struct proc *p, struct getsockname_args *uap,
84 int compat));
85static int getpeername1 __P((struct proc *p, struct getpeername_args *uap,
86 int compat));
87
88static SLIST_HEAD(, sf_buf) sf_freelist;
89static vm_offset_t sf_base;
90static struct sf_buf *sf_bufs;
91static int sf_buf_alloc_want;
92
93/*
94 * System call interface to the socket abstraction.
95 */
96#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
97#define COMPAT_OLDSOCK
98#endif
99
100extern struct fileops socketops;
101
102int
103socket(p, uap)
104 struct proc *p;
105 register struct socket_args /* {
106 int domain;
107 int type;
108 int protocol;
109 } */ *uap;
110{
111 struct filedesc *fdp = p->p_fd;
112 struct socket *so;
113 struct file *fp;
114 int fd, error;
115
116 error = falloc(p, &fp, &fd);
117 if (error)
118 return (error);
119 error = socreate(uap->domain, &so, uap->type, uap->protocol, p);
120 if (error) {
121 fdp->fd_ofiles[fd] = 0;
122 ffree(fp);
123 } else {
124 fp->f_data = (caddr_t)so;
125 fp->f_flag = FREAD|FWRITE;
126 fp->f_ops = &socketops;
127 fp->f_type = DTYPE_SOCKET;
128 p->p_retval[0] = fd;
129 }
130 return (error);
131}
132
133/* ARGSUSED */
134int
135bind(p, uap)
136 struct proc *p;
137 register struct bind_args /* {
138 int s;
139 caddr_t name;
140 int namelen;
141 } */ *uap;
142{
143 struct file *fp;
144 struct sockaddr *sa;
145 int error;
146
147 error = getsock(p->p_fd, uap->s, &fp);
148 if (error)
149 return (error);
150 error = getsockaddr(&sa, uap->name, uap->namelen);
151 if (error)
152 return (error);
153 error = sobind((struct socket *)fp->f_data, sa, p);
154 FREE(sa, M_SONAME);
155 return (error);
156}
157
158/* ARGSUSED */
159int
160listen(p, uap)
161 struct proc *p;
162 register struct listen_args /* {
163 int s;
164 int backlog;
165 } */ *uap;
166{
167 struct file *fp;
168 int error;
169
170 error = getsock(p->p_fd, uap->s, &fp);
171 if (error)
172 return (error);
173 return (solisten((struct socket *)fp->f_data, uap->backlog, p));
174}
175
176static int
177accept1(p, uap, compat)
178 struct proc *p;
179 register struct accept_args /* {
180 int s;
181 caddr_t name;
182 int *anamelen;
183 } */ *uap;
184 int compat;
185{
186 struct filedesc *fdp = p->p_fd;
187 struct file *fp;
188 struct sockaddr *sa;
189 int namelen, error, s;
190 struct socket *head, *so;
191 int fd;
192 short fflag; /* type must match fp->f_flag */
193
194 if (uap->name) {
195 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
196 sizeof (namelen));
197 if(error)
198 return (error);
199 }
200 error = getsock(fdp, uap->s, &fp);
201 if (error)
202 return (error);
203 s = splnet();
204 head = (struct socket *)fp->f_data;
205 if ((head->so_options & SO_ACCEPTCONN) == 0) {
206 splx(s);
207 return (EINVAL);
208 }
209 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
210 splx(s);
211 return (EWOULDBLOCK);
212 }
213 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
214 if (head->so_state & SS_CANTRCVMORE) {
215 head->so_error = ECONNABORTED;
216 break;
217 }
218 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
219 "accept", 0);
220 if (error) {
221 splx(s);
222 return (error);
223 }
224 }
225 if (head->so_error) {
226 error = head->so_error;
227 head->so_error = 0;
228 splx(s);
229 return (error);
230 }
231
232 /*
233 * At this point we know that there is at least one connection
234 * ready to be accepted. Remove it from the queue prior to
235 * allocating the file descriptor for it since falloc() may
236 * block allowing another process to accept the connection
237 * instead.
238 */
239 so = TAILQ_FIRST(&head->so_comp);
240 TAILQ_REMOVE(&head->so_comp, so, so_list);
241 head->so_qlen--;
242
243 fflag = fp->f_flag;
244 error = falloc(p, &fp, &fd);
245 if (error) {
246 /*
247 * Probably ran out of file descriptors. Put the
248 * unaccepted connection back onto the queue and
249 * do another wakeup so some other process might
250 * have a chance at it.
251 */
252 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
253 head->so_qlen++;
254 wakeup_one(&head->so_timeo);
255 splx(s);
256 return (error);
257 } else
258 p->p_retval[0] = fd;
259
260 /* connection has been removed from the listen queue */
261 KNOTE(&head->so_rcv.sb_sel.si_note, 0);
262
263 so->so_state &= ~SS_COMP;
264 so->so_head = NULL;
265 if (head->so_sigio != NULL)
266 fsetown(fgetown(head->so_sigio), &so->so_sigio);
267
268 fp->f_data = (caddr_t)so;
269 fp->f_flag = fflag;
270 fp->f_ops = &socketops;
271 fp->f_type = DTYPE_SOCKET;
272 sa = 0;
273 (void) soaccept(so, &sa);
274 if (sa == 0) {
275 namelen = 0;
276 if (uap->name)
277 goto gotnoname;
278 splx(s);
279 return 0;
280 }
281 if (uap->name) {
282 /* check sa_len before it is destroyed */
283 if (namelen > sa->sa_len)
284 namelen = sa->sa_len;
285#ifdef COMPAT_OLDSOCK
286 if (compat)
287 ((struct osockaddr *)sa)->sa_family =
288 sa->sa_family;
289#endif
290 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
291 if (!error)
292gotnoname:
293 error = copyout((caddr_t)&namelen,
294 (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
295 }
296 if (sa)
297 FREE(sa, M_SONAME);
298 if (error) {
299 fdp->fd_ofiles[fd] = 0;
300 ffree(fp);
301 }
302 splx(s);
303 return (error);
304}
305
306int
307accept(p, uap)
308 struct proc *p;
309 struct accept_args *uap;
310{
311
312 return (accept1(p, uap, 0));
313}
314
315#ifdef COMPAT_OLDSOCK
316int
317oaccept(p, uap)
318 struct proc *p;
319 struct accept_args *uap;
320{
321
322 return (accept1(p, uap, 1));
323}
324#endif /* COMPAT_OLDSOCK */
325
326/* ARGSUSED */
327int
328connect(p, uap)
329 struct proc *p;
330 register struct connect_args /* {
331 int s;
332 caddr_t name;
333 int namelen;
334 } */ *uap;
335{
336 struct file *fp;
337 register struct socket *so;
338 struct sockaddr *sa;
339 int error, s;
340
341 error = getsock(p->p_fd, uap->s, &fp);
342 if (error)
343 return (error);
344 so = (struct socket *)fp->f_data;
345 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
346 return (EALREADY);
347 error = getsockaddr(&sa, uap->name, uap->namelen);
348 if (error)
349 return (error);
350 error = soconnect(so, sa, p);
351 if (error)
352 goto bad;
353 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
354 FREE(sa, M_SONAME);
355 return (EINPROGRESS);
356 }
357 s = splnet();
358 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
359 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
360 "connec", 0);
361 if (error)
362 break;
363 }
364 if (error == 0) {
365 error = so->so_error;
366 so->so_error = 0;
367 }
368 splx(s);
369bad:
370 so->so_state &= ~SS_ISCONNECTING;
371 FREE(sa, M_SONAME);
372 if (error == ERESTART)
373 error = EINTR;
374 return (error);
375}
376
377int
378socketpair(p, uap)
379 struct proc *p;
380 register struct socketpair_args /* {
381 int domain;
382 int type;
383 int protocol;
384 int *rsv;
385 } */ *uap;
386{
387 register struct filedesc *fdp = p->p_fd;
388 struct file *fp1, *fp2;
389 struct socket *so1, *so2;
390 int fd, error, sv[2];
391
392 error = socreate(uap->domain, &so1, uap->type, uap->protocol, p);
393 if (error)
394 return (error);
395 error = socreate(uap->domain, &so2, uap->type, uap->protocol, p);
396 if (error)
397 goto free1;
398 error = falloc(p, &fp1, &fd);
399 if (error)
400 goto free2;
401 sv[0] = fd;
402 fp1->f_data = (caddr_t)so1;
403 error = falloc(p, &fp2, &fd);
404 if (error)
405 goto free3;
406 fp2->f_data = (caddr_t)so2;
407 sv[1] = fd;
408 error = soconnect2(so1, so2);
409 if (error)
410 goto free4;
411 if (uap->type == SOCK_DGRAM) {
412 /*
413 * Datagram socket connection is asymmetric.
414 */
415 error = soconnect2(so2, so1);
416 if (error)
417 goto free4;
418 }
419 fp1->f_flag = fp2->f_flag = FREAD|FWRITE;
420 fp1->f_ops = fp2->f_ops = &socketops;
421 fp1->f_type = fp2->f_type = DTYPE_SOCKET;
422 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
423 return (error);
424free4:
425 fdp->fd_ofiles[sv[1]] = 0;
426 ffree(fp2);
427free3:
428 fdp->fd_ofiles[sv[0]] = 0;
429 ffree(fp1);
430free2:
431 (void)soclose(so2);
432free1:
433 (void)soclose(so1);
434 return (error);
435}
436
437static int
438sendit(p, s, mp, flags)
439 register struct proc *p;
440 int s;
441 register struct msghdr *mp;
442 int flags;
443{
444 struct file *fp;
445 struct uio auio;
446 register struct iovec *iov;
447 register int i;
448 struct mbuf *control;
449 struct sockaddr *to;
450 int len, error;
451 struct socket *so;
452#ifdef KTRACE
453 struct iovec *ktriov = NULL;
454 struct uio ktruio;
455#endif
456
457 error = getsock(p->p_fd, s, &fp);
458 if (error)
459 return (error);
460 auio.uio_iov = mp->msg_iov;
461 auio.uio_iovcnt = mp->msg_iovlen;
462 auio.uio_segflg = UIO_USERSPACE;
463 auio.uio_rw = UIO_WRITE;
464 auio.uio_procp = p;
465 auio.uio_offset = 0; /* XXX */
466 auio.uio_resid = 0;
467 iov = mp->msg_iov;
468 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
469 if ((auio.uio_resid += iov->iov_len) < 0)
470 return (EINVAL);
471 }
472 if (mp->msg_name) {
473 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
474 if (error)
475 return (error);
476 } else
477 to = 0;
478 if (mp->msg_control) {
479 if (mp->msg_controllen < sizeof(struct cmsghdr)
480#ifdef COMPAT_OLDSOCK
481 && mp->msg_flags != MSG_COMPAT
482#endif
483 ) {
484 error = EINVAL;
485 goto bad;
486 }
487 error = sockargs(&control, mp->msg_control,
488 mp->msg_controllen, MT_CONTROL);
489 if (error)
490 goto bad;
491#ifdef COMPAT_OLDSOCK
492 if (mp->msg_flags == MSG_COMPAT) {
493 register struct cmsghdr *cm;
494
495 M_PREPEND(control, sizeof(*cm), M_WAIT);
496 if (control == 0) {
497 error = ENOBUFS;
498 goto bad;
499 } else {
500 cm = mtod(control, struct cmsghdr *);
501 cm->cmsg_len = control->m_len;
502 cm->cmsg_level = SOL_SOCKET;
503 cm->cmsg_type = SCM_RIGHTS;
504 }
505 }
506#endif
507 } else
508 control = 0;
509#ifdef KTRACE
510 if (KTRPOINT(p, KTR_GENIO)) {
511 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
512
513 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
514 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
515 ktruio = auio;
516 }
517#endif
518 len = auio.uio_resid;
519 so = (struct socket *)fp->f_data;
520 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
521 flags, p);
522 if (error) {
523 if (auio.uio_resid != len && (error == ERESTART ||
524 error == EINTR || error == EWOULDBLOCK))
525 error = 0;
526 if (error == EPIPE)
527 psignal(p, SIGPIPE);
528 }
529 if (error == 0)
530 p->p_retval[0] = len - auio.uio_resid;
531#ifdef KTRACE
532 if (ktriov != NULL) {
533 if (error == 0) {
534 ktruio.uio_iov = ktriov;
535 ktruio.uio_resid = p->p_retval[0];
536 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error);
537 }
538 FREE(ktriov, M_TEMP);
539 }
540#endif
541bad:
542 if (to)
543 FREE(to, M_SONAME);
544 return (error);
545}
546
547int
548sendto(p, uap)
549 struct proc *p;
550 register struct sendto_args /* {
551 int s;
552 caddr_t buf;
553 size_t len;
554 int flags;
555 caddr_t to;
556 int tolen;
557 } */ *uap;
558{
559 struct msghdr msg;
560 struct iovec aiov;
561
562 msg.msg_name = uap->to;
563 msg.msg_namelen = uap->tolen;
564 msg.msg_iov = &aiov;
565 msg.msg_iovlen = 1;
566 msg.msg_control = 0;
567#ifdef COMPAT_OLDSOCK
568 msg.msg_flags = 0;
569#endif
570 aiov.iov_base = uap->buf;
571 aiov.iov_len = uap->len;
572 return (sendit(p, uap->s, &msg, uap->flags));
573}
574
575#ifdef COMPAT_OLDSOCK
576int
577osend(p, uap)
578 struct proc *p;
579 register struct osend_args /* {
580 int s;
581 caddr_t buf;
582 int len;
583 int flags;
584 } */ *uap;
585{
586 struct msghdr msg;
587 struct iovec aiov;
588
589 msg.msg_name = 0;
590 msg.msg_namelen = 0;
591 msg.msg_iov = &aiov;
592 msg.msg_iovlen = 1;
593 aiov.iov_base = uap->buf;
594 aiov.iov_len = uap->len;
595 msg.msg_control = 0;
596 msg.msg_flags = 0;
597 return (sendit(p, uap->s, &msg, uap->flags));
598}
599
600int
601osendmsg(p, uap)
602 struct proc *p;
603 register struct osendmsg_args /* {
604 int s;
605 caddr_t msg;
606 int flags;
607 } */ *uap;
608{
609 struct msghdr msg;
610 struct iovec aiov[UIO_SMALLIOV], *iov;
611 int error;
612
613 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
614 if (error)
615 return (error);
616 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
617 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
618 return (EMSGSIZE);
619 MALLOC(iov, struct iovec *,
620 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
621 M_WAITOK);
622 } else
623 iov = aiov;
624 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
625 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
626 if (error)
627 goto done;
628 msg.msg_flags = MSG_COMPAT;
629 msg.msg_iov = iov;
630 error = sendit(p, uap->s, &msg, uap->flags);
631done:
632 if (iov != aiov)
633 FREE(iov, M_IOV);
634 return (error);
635}
636#endif
637
638int
639sendmsg(p, uap)
640 struct proc *p;
641 register struct sendmsg_args /* {
642 int s;
643 caddr_t msg;
644 int flags;
645 } */ *uap;
646{
647 struct msghdr msg;
648 struct iovec aiov[UIO_SMALLIOV], *iov;
649 int error;
650
651 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
652 if (error)
653 return (error);
654 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
655 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
656 return (EMSGSIZE);
657 MALLOC(iov, struct iovec *,
658 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
659 M_WAITOK);
660 } else
661 iov = aiov;
662 if (msg.msg_iovlen &&
663 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
664 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
665 goto done;
666 msg.msg_iov = iov;
667#ifdef COMPAT_OLDSOCK
668 msg.msg_flags = 0;
669#endif
670 error = sendit(p, uap->s, &msg, uap->flags);
671done:
672 if (iov != aiov)
673 FREE(iov, M_IOV);
674 return (error);
675}
676
677static int
678recvit(p, s, mp, namelenp)
679 register struct proc *p;
680 int s;
681 register struct msghdr *mp;
682 caddr_t namelenp;
683{
684 struct file *fp;
685 struct uio auio;
686 register struct iovec *iov;
687 register int i;
688 int len, error;
689 struct mbuf *m, *control = 0;
690 caddr_t ctlbuf;
691 struct socket *so;
692 struct sockaddr *fromsa = 0;
693#ifdef KTRACE
694 struct iovec *ktriov = NULL;
695 struct uio ktruio;
696#endif
697
698 error = getsock(p->p_fd, s, &fp);
699 if (error)
700 return (error);
701 auio.uio_iov = mp->msg_iov;
702 auio.uio_iovcnt = mp->msg_iovlen;
703 auio.uio_segflg = UIO_USERSPACE;
704 auio.uio_rw = UIO_READ;
705 auio.uio_procp = p;
706 auio.uio_offset = 0; /* XXX */
707 auio.uio_resid = 0;
708 iov = mp->msg_iov;
709 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
710 if ((auio.uio_resid += iov->iov_len) < 0)
711 return (EINVAL);
712 }
713#ifdef KTRACE
714 if (KTRPOINT(p, KTR_GENIO)) {
715 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
716
717 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
718 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
719 ktruio = auio;
720 }
721#endif
722 len = auio.uio_resid;
723 so = (struct socket *)fp->f_data;
724 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
725 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
726 &mp->msg_flags);
727 if (error) {
728 if (auio.uio_resid != len && (error == ERESTART ||
729 error == EINTR || error == EWOULDBLOCK))
730 error = 0;
731 }
732#ifdef KTRACE
733 if (ktriov != NULL) {
734 if (error == 0) {
735 ktruio.uio_iov = ktriov;
736 ktruio.uio_resid = len - auio.uio_resid;
737 ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error);
738 }
739 FREE(ktriov, M_TEMP);
740 }
741#endif
742 if (error)
743 goto out;
744 p->p_retval[0] = len - auio.uio_resid;
745 if (mp->msg_name) {
746 len = mp->msg_namelen;
747 if (len <= 0 || fromsa == 0)
748 len = 0;
749 else {
750#ifndef MIN
751#define MIN(a,b) ((a)>(b)?(b):(a))
752#endif
753 /* save sa_len before it is destroyed by MSG_COMPAT */
754 len = MIN(len, fromsa->sa_len);
755#ifdef COMPAT_OLDSOCK
756 if (mp->msg_flags & MSG_COMPAT)
757 ((struct osockaddr *)fromsa)->sa_family =
758 fromsa->sa_family;
759#endif
760 error = copyout(fromsa,
761 (caddr_t)mp->msg_name, (unsigned)len);
762 if (error)
763 goto out;
764 }
765 mp->msg_namelen = len;
766 if (namelenp &&
767 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
768#ifdef COMPAT_OLDSOCK
769 if (mp->msg_flags & MSG_COMPAT)
770 error = 0; /* old recvfrom didn't check */
771 else
772#endif
773 goto out;
774 }
775 }
776 if (mp->msg_control) {
777#ifdef COMPAT_OLDSOCK
778 /*
779 * We assume that old recvmsg calls won't receive access
780 * rights and other control info, esp. as control info
781 * is always optional and those options didn't exist in 4.3.
782 * If we receive rights, trim the cmsghdr; anything else
783 * is tossed.
784 */
785 if (control && mp->msg_flags & MSG_COMPAT) {
786 if (mtod(control, struct cmsghdr *)->cmsg_level !=
787 SOL_SOCKET ||
788 mtod(control, struct cmsghdr *)->cmsg_type !=
789 SCM_RIGHTS) {
790 mp->msg_controllen = 0;
791 goto out;
792 }
793 control->m_len -= sizeof (struct cmsghdr);
794 control->m_data += sizeof (struct cmsghdr);
795 }
796#endif
797 len = mp->msg_controllen;
798 m = control;
799 mp->msg_controllen = 0;
800 ctlbuf = (caddr_t) mp->msg_control;
801
802 while (m && len > 0) {
803 unsigned int tocopy;
804
805 if (len >= m->m_len)
806 tocopy = m->m_len;
807 else {
808 mp->msg_flags |= MSG_CTRUNC;
809 tocopy = len;
810 }
811
812 if ((error = copyout((caddr_t)mtod(m, caddr_t),
813 ctlbuf, tocopy)) != 0)
814 goto out;
815
816 ctlbuf += tocopy;
817 len -= tocopy;
818 m = m->m_next;
819 }
820 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
821 }
822out:
823 if (fromsa)
824 FREE(fromsa, M_SONAME);
825 if (control)
826 m_freem(control);
827 return (error);
828}
829
830int
831recvfrom(p, uap)
832 struct proc *p;
833 register struct recvfrom_args /* {
834 int s;
835 caddr_t buf;
836 size_t len;
837 int flags;
838 caddr_t from;
839 int *fromlenaddr;
840 } */ *uap;
841{
842 struct msghdr msg;
843 struct iovec aiov;
844 int error;
845
846 if (uap->fromlenaddr) {
847 error = copyin((caddr_t)uap->fromlenaddr,
848 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
849 if (error)
850 return (error);
851 } else
852 msg.msg_namelen = 0;
853 msg.msg_name = uap->from;
854 msg.msg_iov = &aiov;
855 msg.msg_iovlen = 1;
856 aiov.iov_base = uap->buf;
857 aiov.iov_len = uap->len;
858 msg.msg_control = 0;
859 msg.msg_flags = uap->flags;
860 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr));
861}
862
863#ifdef COMPAT_OLDSOCK
864int
865orecvfrom(p, uap)
866 struct proc *p;
867 struct recvfrom_args *uap;
868{
869
870 uap->flags |= MSG_COMPAT;
871 return (recvfrom(p, uap));
872}
873#endif
874
875
876#ifdef COMPAT_OLDSOCK
877int
878orecv(p, uap)
879 struct proc *p;
880 register struct orecv_args /* {
881 int s;
882 caddr_t buf;
883 int len;
884 int flags;
885 } */ *uap;
886{
887 struct msghdr msg;
888 struct iovec aiov;
889
890 msg.msg_name = 0;
891 msg.msg_namelen = 0;
892 msg.msg_iov = &aiov;
893 msg.msg_iovlen = 1;
894 aiov.iov_base = uap->buf;
895 aiov.iov_len = uap->len;
896 msg.msg_control = 0;
897 msg.msg_flags = uap->flags;
898 return (recvit(p, uap->s, &msg, (caddr_t)0));
899}
900
901/*
902 * Old recvmsg. This code takes advantage of the fact that the old msghdr
903 * overlays the new one, missing only the flags, and with the (old) access
904 * rights where the control fields are now.
905 */
906int
907orecvmsg(p, uap)
908 struct proc *p;
909 register struct orecvmsg_args /* {
910 int s;
911 struct omsghdr *msg;
912 int flags;
913 } */ *uap;
914{
915 struct msghdr msg;
916 struct iovec aiov[UIO_SMALLIOV], *iov;
917 int error;
918
919 error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
920 sizeof (struct omsghdr));
921 if (error)
922 return (error);
923 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
924 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
925 return (EMSGSIZE);
926 MALLOC(iov, struct iovec *,
927 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
928 M_WAITOK);
929 } else
930 iov = aiov;
931 msg.msg_flags = uap->flags | MSG_COMPAT;
932 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
933 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
934 if (error)
935 goto done;
936 msg.msg_iov = iov;
937 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen);
938
939 if (msg.msg_controllen && error == 0)
940 error = copyout((caddr_t)&msg.msg_controllen,
941 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
942done:
943 if (iov != aiov)
944 FREE(iov, M_IOV);
945 return (error);
946}
947#endif
948
949int
950recvmsg(p, uap)
951 struct proc *p;
952 register struct recvmsg_args /* {
953 int s;
954 struct msghdr *msg;
955 int flags;
956 } */ *uap;
957{
958 struct msghdr msg;
959 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
960 register int error;
961
962 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
963 if (error)
964 return (error);
965 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
966 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
967 return (EMSGSIZE);
968 MALLOC(iov, struct iovec *,
969 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
970 M_WAITOK);
971 } else
972 iov = aiov;
973#ifdef COMPAT_OLDSOCK
974 msg.msg_flags = uap->flags &~ MSG_COMPAT;
975#else
976 msg.msg_flags = uap->flags;
977#endif
978 uiov = msg.msg_iov;
979 msg.msg_iov = iov;
980 error = copyin((caddr_t)uiov, (caddr_t)iov,
981 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
982 if (error)
983 goto done;
984 error = recvit(p, uap->s, &msg, (caddr_t)0);
985 if (!error) {
986 msg.msg_iov = uiov;
987 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
988 }
989done:
990 if (iov != aiov)
991 FREE(iov, M_IOV);
992 return (error);
993}
994
995/* ARGSUSED */
996int
997shutdown(p, uap)
998 struct proc *p;
999 register struct shutdown_args /* {
1000 int s;
1001 int how;
1002 } */ *uap;
1003{
1004 struct file *fp;
1005 int error;
1006
1007 error = getsock(p->p_fd, uap->s, &fp);
1008 if (error)
1009 return (error);
1010 return (soshutdown((struct socket *)fp->f_data, uap->how));
1011}
1012
1013/* ARGSUSED */
1014int
1015setsockopt(p, uap)
1016 struct proc *p;
1017 register struct setsockopt_args /* {
1018 int s;
1019 int level;
1020 int name;
1021 caddr_t val;
1022 int valsize;
1023 } */ *uap;
1024{
1025 struct file *fp;
1026 struct sockopt sopt;
1027 int error;
1028
1029 if (uap->val == 0 && uap->valsize != 0)
1030 return (EFAULT);
1031 if (uap->valsize < 0)
1032 return (EINVAL);
1033
1034 error = getsock(p->p_fd, uap->s, &fp);
1035 if (error)
1036 return (error);
1037
1038 sopt.sopt_dir = SOPT_SET;
1039 sopt.sopt_level = uap->level;
1040 sopt.sopt_name = uap->name;
1041 sopt.sopt_val = uap->val;
1042 sopt.sopt_valsize = uap->valsize;
1043 sopt.sopt_p = p;
1044
1045 return (sosetopt((struct socket *)fp->f_data, &sopt));
1046}
1047
1048/* ARGSUSED */
1049int
1050getsockopt(p, uap)
1051 struct proc *p;
1052 register struct getsockopt_args /* {
1053 int s;
1054 int level;
1055 int name;
1056 caddr_t val;
1057 int *avalsize;
1058 } */ *uap;
1059{
1060 int valsize, error;
1061 struct file *fp;
1062 struct sockopt sopt;
1063
1064 error = getsock(p->p_fd, uap->s, &fp);
1065 if (error)
1066 return (error);
1067 if (uap->val) {
1068 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1069 sizeof (valsize));
1070 if (error)
1071 return (error);
1072 if (valsize < 0)
1073 return (EINVAL);
1074 } else
1075 valsize = 0;
1076
1077 sopt.sopt_dir = SOPT_GET;
1078 sopt.sopt_level = uap->level;
1079 sopt.sopt_name = uap->name;
1080 sopt.sopt_val = uap->val;
1081 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1082 sopt.sopt_p = p;
1083
1084 error = sogetopt((struct socket *)fp->f_data, &sopt);
1085 if (error == 0) {
1086 valsize = sopt.sopt_valsize;
1087 error = copyout((caddr_t)&valsize,
1088 (caddr_t)uap->avalsize, sizeof (valsize));
1089 }
1090 return (error);
1091}
1092
1093/*
1094 * Get socket name.
1095 */
1096/* ARGSUSED */
1097static int
1098getsockname1(p, uap, compat)
1099 struct proc *p;
1100 register struct getsockname_args /* {
1101 int fdes;
1102 caddr_t asa;
1103 int *alen;
1104 } */ *uap;
1105 int compat;
1106{
1107 struct file *fp;
1108 register struct socket *so;
1109 struct sockaddr *sa;
1110 int len, error;
1111
1112 error = getsock(p->p_fd, uap->fdes, &fp);
1113 if (error)
1114 return (error);
1115 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1116 if (error)
1117 return (error);
1118 so = (struct socket *)fp->f_data;
1119 sa = 0;
1120 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1121 if (error)
1122 goto bad;
1123 if (sa == 0) {
1124 len = 0;
1125 goto gotnothing;
1126 }
1127
1128 len = MIN(len, sa->sa_len);
1129#ifdef COMPAT_OLDSOCK
1130 if (compat)
1131 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1132#endif
1133 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1134 if (error == 0)
1135gotnothing:
1136 error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1137 sizeof (len));
1138bad:
1139 if (sa)
1140 FREE(sa, M_SONAME);
1141 return (error);
1142}
1143
1144int
1145getsockname(p, uap)
1146 struct proc *p;
1147 struct getsockname_args *uap;
1148{
1149
1150 return (getsockname1(p, uap, 0));
1151}
1152
1153#ifdef COMPAT_OLDSOCK
1154int
1155ogetsockname(p, uap)
1156 struct proc *p;
1157 struct getsockname_args *uap;
1158{
1159
1160 return (getsockname1(p, uap, 1));
1161}
1162#endif /* COMPAT_OLDSOCK */
1163
1164/*
1165 * Get name of peer for connected socket.
1166 */
1167/* ARGSUSED */
1168static int
1169getpeername1(p, uap, compat)
1170 struct proc *p;
1171 register struct getpeername_args /* {
1172 int fdes;
1173 caddr_t asa;
1174 int *alen;
1175 } */ *uap;
1176 int compat;
1177{
1178 struct file *fp;
1179 register struct socket *so;
1180 struct sockaddr *sa;
1181 int len, error;
1182
1183 error = getsock(p->p_fd, uap->fdes, &fp);
1184 if (error)
1185 return (error);
1186 so = (struct socket *)fp->f_data;
1187 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
1188 return (ENOTCONN);
1189 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1190 if (error)
1191 return (error);
1192 sa = 0;
1193 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1194 if (error)
1195 goto bad;
1196 if (sa == 0) {
1197 len = 0;
1198 goto gotnothing;
1199 }
1200 len = MIN(len, sa->sa_len);
1201#ifdef COMPAT_OLDSOCK
1202 if (compat)
1203 ((struct osockaddr *)sa)->sa_family =
1204 sa->sa_family;
1205#endif
1206 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1207 if (error)
1208 goto bad;
1209gotnothing:
1210 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1211bad:
1212 if (sa) FREE(sa, M_SONAME);
1213 return (error);
1214}
1215
1216int
1217getpeername(p, uap)
1218 struct proc *p;
1219 struct getpeername_args *uap;
1220{
1221
1222 return (getpeername1(p, uap, 0));
1223}
1224
1225#ifdef COMPAT_OLDSOCK
1226int
1227ogetpeername(p, uap)
1228 struct proc *p;
1229 struct ogetpeername_args *uap;
1230{
1231
1232 /* XXX uap should have type `getpeername_args *' to begin with. */
1233 return (getpeername1(p, (struct getpeername_args *)uap, 1));
1234}
1235#endif /* COMPAT_OLDSOCK */
1236
1237int
1238sockargs(mp, buf, buflen, type)
1239 struct mbuf **mp;
1240 caddr_t buf;
1241 int buflen, type;
1242{
1243 register struct sockaddr *sa;
1244 register struct mbuf *m;
1245 int error;
1246
1247 if ((u_int)buflen > MLEN) {
1248#ifdef COMPAT_OLDSOCK
1249 if (type == MT_SONAME && (u_int)buflen <= 112)
1250 buflen = MLEN; /* unix domain compat. hack */
1251 else
1252#endif
1253 return (EINVAL);
1254 }
1255 m = m_get(M_WAIT, type);
1256 if (m == NULL)
1257 return (ENOBUFS);
1258 m->m_len = buflen;
1259 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1260 if (error)
1261 (void) m_free(m);
1262 else {
1263 *mp = m;
1264 if (type == MT_SONAME) {
1265 sa = mtod(m, struct sockaddr *);
1266
1267#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1268 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1269 sa->sa_family = sa->sa_len;
1270#endif
1271 sa->sa_len = buflen;
1272 }
1273 }
1274 return (error);
1275}
1276
1277int
1278getsockaddr(namp, uaddr, len)
1279 struct sockaddr **namp;
1280 caddr_t uaddr;
1281 size_t len;
1282{
1283 struct sockaddr *sa;
1284 int error;
1285
1286 if (len > SOCK_MAXADDRLEN)
1287 return ENAMETOOLONG;
1288 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1289 error = copyin(uaddr, sa, len);
1290 if (error) {
1291 FREE(sa, M_SONAME);
1292 } else {
1293#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1294 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1295 sa->sa_family = sa->sa_len;
1296#endif
1297 sa->sa_len = len;
1298 *namp = sa;
1299 }
1300 return error;
1301}
1302
1303int
1304getsock(fdp, fdes, fpp)
1305 struct filedesc *fdp;
1306 int fdes;
1307 struct file **fpp;
1308{
1309 register struct file *fp;
1310
1311 if ((unsigned)fdes >= fdp->fd_nfiles ||
1312 (fp = fdp->fd_ofiles[fdes]) == NULL)
1313 return (EBADF);
1314 if (fp->f_type != DTYPE_SOCKET)
1315 return (ENOTSOCK);
1316 *fpp = fp;
1317 return (0);
1318}
1319
1320/*
1321 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1322 * XXX - The sf_buf functions are currently private to sendfile(2), so have
1323 * been made static, but may be useful in the future for doing zero-copy in
1324 * other parts of the networking code.
1325 */
1326static void
1327sf_buf_init(void *arg)
1328{
1329 int i;
1330
1331 SLIST_INIT(&sf_freelist);
1332 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1333 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT);
1334 bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf));
1335 for (i = 0; i < nsfbufs; i++) {
1336 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1337 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
1338 }
1339}
1340
1341/*
1342 * Get an sf_buf from the freelist. Will block if none are available.
1343 */
1344static struct sf_buf *
1345sf_buf_alloc()
1346{
1347 struct sf_buf *sf;
1348 int s;
1349
1350 s = splimp();
1351 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
1352 sf_buf_alloc_want = 1;
1353 tsleep(&sf_freelist, PVM, "sfbufa", 0);
1354 }
1355 SLIST_REMOVE_HEAD(&sf_freelist, free_list);
1356 splx(s);
76
77static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags));
78static int recvit __P((struct proc *p, int s, struct msghdr *mp,
79 caddr_t namelenp));
80
81static int accept1 __P((struct proc *p, struct accept_args *uap, int compat));
82static int getsockname1 __P((struct proc *p, struct getsockname_args *uap,
83 int compat));
84static int getpeername1 __P((struct proc *p, struct getpeername_args *uap,
85 int compat));
86
87static SLIST_HEAD(, sf_buf) sf_freelist;
88static vm_offset_t sf_base;
89static struct sf_buf *sf_bufs;
90static int sf_buf_alloc_want;
91
92/*
93 * System call interface to the socket abstraction.
94 */
95#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
96#define COMPAT_OLDSOCK
97#endif
98
99extern struct fileops socketops;
100
101int
102socket(p, uap)
103 struct proc *p;
104 register struct socket_args /* {
105 int domain;
106 int type;
107 int protocol;
108 } */ *uap;
109{
110 struct filedesc *fdp = p->p_fd;
111 struct socket *so;
112 struct file *fp;
113 int fd, error;
114
115 error = falloc(p, &fp, &fd);
116 if (error)
117 return (error);
118 error = socreate(uap->domain, &so, uap->type, uap->protocol, p);
119 if (error) {
120 fdp->fd_ofiles[fd] = 0;
121 ffree(fp);
122 } else {
123 fp->f_data = (caddr_t)so;
124 fp->f_flag = FREAD|FWRITE;
125 fp->f_ops = &socketops;
126 fp->f_type = DTYPE_SOCKET;
127 p->p_retval[0] = fd;
128 }
129 return (error);
130}
131
132/* ARGSUSED */
133int
134bind(p, uap)
135 struct proc *p;
136 register struct bind_args /* {
137 int s;
138 caddr_t name;
139 int namelen;
140 } */ *uap;
141{
142 struct file *fp;
143 struct sockaddr *sa;
144 int error;
145
146 error = getsock(p->p_fd, uap->s, &fp);
147 if (error)
148 return (error);
149 error = getsockaddr(&sa, uap->name, uap->namelen);
150 if (error)
151 return (error);
152 error = sobind((struct socket *)fp->f_data, sa, p);
153 FREE(sa, M_SONAME);
154 return (error);
155}
156
157/* ARGSUSED */
158int
159listen(p, uap)
160 struct proc *p;
161 register struct listen_args /* {
162 int s;
163 int backlog;
164 } */ *uap;
165{
166 struct file *fp;
167 int error;
168
169 error = getsock(p->p_fd, uap->s, &fp);
170 if (error)
171 return (error);
172 return (solisten((struct socket *)fp->f_data, uap->backlog, p));
173}
174
175static int
176accept1(p, uap, compat)
177 struct proc *p;
178 register struct accept_args /* {
179 int s;
180 caddr_t name;
181 int *anamelen;
182 } */ *uap;
183 int compat;
184{
185 struct filedesc *fdp = p->p_fd;
186 struct file *fp;
187 struct sockaddr *sa;
188 int namelen, error, s;
189 struct socket *head, *so;
190 int fd;
191 short fflag; /* type must match fp->f_flag */
192
193 if (uap->name) {
194 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
195 sizeof (namelen));
196 if(error)
197 return (error);
198 }
199 error = getsock(fdp, uap->s, &fp);
200 if (error)
201 return (error);
202 s = splnet();
203 head = (struct socket *)fp->f_data;
204 if ((head->so_options & SO_ACCEPTCONN) == 0) {
205 splx(s);
206 return (EINVAL);
207 }
208 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
209 splx(s);
210 return (EWOULDBLOCK);
211 }
212 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
213 if (head->so_state & SS_CANTRCVMORE) {
214 head->so_error = ECONNABORTED;
215 break;
216 }
217 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
218 "accept", 0);
219 if (error) {
220 splx(s);
221 return (error);
222 }
223 }
224 if (head->so_error) {
225 error = head->so_error;
226 head->so_error = 0;
227 splx(s);
228 return (error);
229 }
230
231 /*
232 * At this point we know that there is at least one connection
233 * ready to be accepted. Remove it from the queue prior to
234 * allocating the file descriptor for it since falloc() may
235 * block allowing another process to accept the connection
236 * instead.
237 */
238 so = TAILQ_FIRST(&head->so_comp);
239 TAILQ_REMOVE(&head->so_comp, so, so_list);
240 head->so_qlen--;
241
242 fflag = fp->f_flag;
243 error = falloc(p, &fp, &fd);
244 if (error) {
245 /*
246 * Probably ran out of file descriptors. Put the
247 * unaccepted connection back onto the queue and
248 * do another wakeup so some other process might
249 * have a chance at it.
250 */
251 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
252 head->so_qlen++;
253 wakeup_one(&head->so_timeo);
254 splx(s);
255 return (error);
256 } else
257 p->p_retval[0] = fd;
258
259 /* connection has been removed from the listen queue */
260 KNOTE(&head->so_rcv.sb_sel.si_note, 0);
261
262 so->so_state &= ~SS_COMP;
263 so->so_head = NULL;
264 if (head->so_sigio != NULL)
265 fsetown(fgetown(head->so_sigio), &so->so_sigio);
266
267 fp->f_data = (caddr_t)so;
268 fp->f_flag = fflag;
269 fp->f_ops = &socketops;
270 fp->f_type = DTYPE_SOCKET;
271 sa = 0;
272 (void) soaccept(so, &sa);
273 if (sa == 0) {
274 namelen = 0;
275 if (uap->name)
276 goto gotnoname;
277 splx(s);
278 return 0;
279 }
280 if (uap->name) {
281 /* check sa_len before it is destroyed */
282 if (namelen > sa->sa_len)
283 namelen = sa->sa_len;
284#ifdef COMPAT_OLDSOCK
285 if (compat)
286 ((struct osockaddr *)sa)->sa_family =
287 sa->sa_family;
288#endif
289 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
290 if (!error)
291gotnoname:
292 error = copyout((caddr_t)&namelen,
293 (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
294 }
295 if (sa)
296 FREE(sa, M_SONAME);
297 if (error) {
298 fdp->fd_ofiles[fd] = 0;
299 ffree(fp);
300 }
301 splx(s);
302 return (error);
303}
304
305int
306accept(p, uap)
307 struct proc *p;
308 struct accept_args *uap;
309{
310
311 return (accept1(p, uap, 0));
312}
313
314#ifdef COMPAT_OLDSOCK
315int
316oaccept(p, uap)
317 struct proc *p;
318 struct accept_args *uap;
319{
320
321 return (accept1(p, uap, 1));
322}
323#endif /* COMPAT_OLDSOCK */
324
325/* ARGSUSED */
326int
327connect(p, uap)
328 struct proc *p;
329 register struct connect_args /* {
330 int s;
331 caddr_t name;
332 int namelen;
333 } */ *uap;
334{
335 struct file *fp;
336 register struct socket *so;
337 struct sockaddr *sa;
338 int error, s;
339
340 error = getsock(p->p_fd, uap->s, &fp);
341 if (error)
342 return (error);
343 so = (struct socket *)fp->f_data;
344 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
345 return (EALREADY);
346 error = getsockaddr(&sa, uap->name, uap->namelen);
347 if (error)
348 return (error);
349 error = soconnect(so, sa, p);
350 if (error)
351 goto bad;
352 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
353 FREE(sa, M_SONAME);
354 return (EINPROGRESS);
355 }
356 s = splnet();
357 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
358 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
359 "connec", 0);
360 if (error)
361 break;
362 }
363 if (error == 0) {
364 error = so->so_error;
365 so->so_error = 0;
366 }
367 splx(s);
368bad:
369 so->so_state &= ~SS_ISCONNECTING;
370 FREE(sa, M_SONAME);
371 if (error == ERESTART)
372 error = EINTR;
373 return (error);
374}
375
376int
377socketpair(p, uap)
378 struct proc *p;
379 register struct socketpair_args /* {
380 int domain;
381 int type;
382 int protocol;
383 int *rsv;
384 } */ *uap;
385{
386 register struct filedesc *fdp = p->p_fd;
387 struct file *fp1, *fp2;
388 struct socket *so1, *so2;
389 int fd, error, sv[2];
390
391 error = socreate(uap->domain, &so1, uap->type, uap->protocol, p);
392 if (error)
393 return (error);
394 error = socreate(uap->domain, &so2, uap->type, uap->protocol, p);
395 if (error)
396 goto free1;
397 error = falloc(p, &fp1, &fd);
398 if (error)
399 goto free2;
400 sv[0] = fd;
401 fp1->f_data = (caddr_t)so1;
402 error = falloc(p, &fp2, &fd);
403 if (error)
404 goto free3;
405 fp2->f_data = (caddr_t)so2;
406 sv[1] = fd;
407 error = soconnect2(so1, so2);
408 if (error)
409 goto free4;
410 if (uap->type == SOCK_DGRAM) {
411 /*
412 * Datagram socket connection is asymmetric.
413 */
414 error = soconnect2(so2, so1);
415 if (error)
416 goto free4;
417 }
418 fp1->f_flag = fp2->f_flag = FREAD|FWRITE;
419 fp1->f_ops = fp2->f_ops = &socketops;
420 fp1->f_type = fp2->f_type = DTYPE_SOCKET;
421 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
422 return (error);
423free4:
424 fdp->fd_ofiles[sv[1]] = 0;
425 ffree(fp2);
426free3:
427 fdp->fd_ofiles[sv[0]] = 0;
428 ffree(fp1);
429free2:
430 (void)soclose(so2);
431free1:
432 (void)soclose(so1);
433 return (error);
434}
435
436static int
437sendit(p, s, mp, flags)
438 register struct proc *p;
439 int s;
440 register struct msghdr *mp;
441 int flags;
442{
443 struct file *fp;
444 struct uio auio;
445 register struct iovec *iov;
446 register int i;
447 struct mbuf *control;
448 struct sockaddr *to;
449 int len, error;
450 struct socket *so;
451#ifdef KTRACE
452 struct iovec *ktriov = NULL;
453 struct uio ktruio;
454#endif
455
456 error = getsock(p->p_fd, s, &fp);
457 if (error)
458 return (error);
459 auio.uio_iov = mp->msg_iov;
460 auio.uio_iovcnt = mp->msg_iovlen;
461 auio.uio_segflg = UIO_USERSPACE;
462 auio.uio_rw = UIO_WRITE;
463 auio.uio_procp = p;
464 auio.uio_offset = 0; /* XXX */
465 auio.uio_resid = 0;
466 iov = mp->msg_iov;
467 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
468 if ((auio.uio_resid += iov->iov_len) < 0)
469 return (EINVAL);
470 }
471 if (mp->msg_name) {
472 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
473 if (error)
474 return (error);
475 } else
476 to = 0;
477 if (mp->msg_control) {
478 if (mp->msg_controllen < sizeof(struct cmsghdr)
479#ifdef COMPAT_OLDSOCK
480 && mp->msg_flags != MSG_COMPAT
481#endif
482 ) {
483 error = EINVAL;
484 goto bad;
485 }
486 error = sockargs(&control, mp->msg_control,
487 mp->msg_controllen, MT_CONTROL);
488 if (error)
489 goto bad;
490#ifdef COMPAT_OLDSOCK
491 if (mp->msg_flags == MSG_COMPAT) {
492 register struct cmsghdr *cm;
493
494 M_PREPEND(control, sizeof(*cm), M_WAIT);
495 if (control == 0) {
496 error = ENOBUFS;
497 goto bad;
498 } else {
499 cm = mtod(control, struct cmsghdr *);
500 cm->cmsg_len = control->m_len;
501 cm->cmsg_level = SOL_SOCKET;
502 cm->cmsg_type = SCM_RIGHTS;
503 }
504 }
505#endif
506 } else
507 control = 0;
508#ifdef KTRACE
509 if (KTRPOINT(p, KTR_GENIO)) {
510 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
511
512 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
513 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
514 ktruio = auio;
515 }
516#endif
517 len = auio.uio_resid;
518 so = (struct socket *)fp->f_data;
519 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
520 flags, p);
521 if (error) {
522 if (auio.uio_resid != len && (error == ERESTART ||
523 error == EINTR || error == EWOULDBLOCK))
524 error = 0;
525 if (error == EPIPE)
526 psignal(p, SIGPIPE);
527 }
528 if (error == 0)
529 p->p_retval[0] = len - auio.uio_resid;
530#ifdef KTRACE
531 if (ktriov != NULL) {
532 if (error == 0) {
533 ktruio.uio_iov = ktriov;
534 ktruio.uio_resid = p->p_retval[0];
535 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error);
536 }
537 FREE(ktriov, M_TEMP);
538 }
539#endif
540bad:
541 if (to)
542 FREE(to, M_SONAME);
543 return (error);
544}
545
546int
547sendto(p, uap)
548 struct proc *p;
549 register struct sendto_args /* {
550 int s;
551 caddr_t buf;
552 size_t len;
553 int flags;
554 caddr_t to;
555 int tolen;
556 } */ *uap;
557{
558 struct msghdr msg;
559 struct iovec aiov;
560
561 msg.msg_name = uap->to;
562 msg.msg_namelen = uap->tolen;
563 msg.msg_iov = &aiov;
564 msg.msg_iovlen = 1;
565 msg.msg_control = 0;
566#ifdef COMPAT_OLDSOCK
567 msg.msg_flags = 0;
568#endif
569 aiov.iov_base = uap->buf;
570 aiov.iov_len = uap->len;
571 return (sendit(p, uap->s, &msg, uap->flags));
572}
573
574#ifdef COMPAT_OLDSOCK
575int
576osend(p, uap)
577 struct proc *p;
578 register struct osend_args /* {
579 int s;
580 caddr_t buf;
581 int len;
582 int flags;
583 } */ *uap;
584{
585 struct msghdr msg;
586 struct iovec aiov;
587
588 msg.msg_name = 0;
589 msg.msg_namelen = 0;
590 msg.msg_iov = &aiov;
591 msg.msg_iovlen = 1;
592 aiov.iov_base = uap->buf;
593 aiov.iov_len = uap->len;
594 msg.msg_control = 0;
595 msg.msg_flags = 0;
596 return (sendit(p, uap->s, &msg, uap->flags));
597}
598
599int
600osendmsg(p, uap)
601 struct proc *p;
602 register struct osendmsg_args /* {
603 int s;
604 caddr_t msg;
605 int flags;
606 } */ *uap;
607{
608 struct msghdr msg;
609 struct iovec aiov[UIO_SMALLIOV], *iov;
610 int error;
611
612 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
613 if (error)
614 return (error);
615 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
616 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
617 return (EMSGSIZE);
618 MALLOC(iov, struct iovec *,
619 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
620 M_WAITOK);
621 } else
622 iov = aiov;
623 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
624 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
625 if (error)
626 goto done;
627 msg.msg_flags = MSG_COMPAT;
628 msg.msg_iov = iov;
629 error = sendit(p, uap->s, &msg, uap->flags);
630done:
631 if (iov != aiov)
632 FREE(iov, M_IOV);
633 return (error);
634}
635#endif
636
637int
638sendmsg(p, uap)
639 struct proc *p;
640 register struct sendmsg_args /* {
641 int s;
642 caddr_t msg;
643 int flags;
644 } */ *uap;
645{
646 struct msghdr msg;
647 struct iovec aiov[UIO_SMALLIOV], *iov;
648 int error;
649
650 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
651 if (error)
652 return (error);
653 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
654 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
655 return (EMSGSIZE);
656 MALLOC(iov, struct iovec *,
657 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
658 M_WAITOK);
659 } else
660 iov = aiov;
661 if (msg.msg_iovlen &&
662 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
663 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
664 goto done;
665 msg.msg_iov = iov;
666#ifdef COMPAT_OLDSOCK
667 msg.msg_flags = 0;
668#endif
669 error = sendit(p, uap->s, &msg, uap->flags);
670done:
671 if (iov != aiov)
672 FREE(iov, M_IOV);
673 return (error);
674}
675
676static int
677recvit(p, s, mp, namelenp)
678 register struct proc *p;
679 int s;
680 register struct msghdr *mp;
681 caddr_t namelenp;
682{
683 struct file *fp;
684 struct uio auio;
685 register struct iovec *iov;
686 register int i;
687 int len, error;
688 struct mbuf *m, *control = 0;
689 caddr_t ctlbuf;
690 struct socket *so;
691 struct sockaddr *fromsa = 0;
692#ifdef KTRACE
693 struct iovec *ktriov = NULL;
694 struct uio ktruio;
695#endif
696
697 error = getsock(p->p_fd, s, &fp);
698 if (error)
699 return (error);
700 auio.uio_iov = mp->msg_iov;
701 auio.uio_iovcnt = mp->msg_iovlen;
702 auio.uio_segflg = UIO_USERSPACE;
703 auio.uio_rw = UIO_READ;
704 auio.uio_procp = p;
705 auio.uio_offset = 0; /* XXX */
706 auio.uio_resid = 0;
707 iov = mp->msg_iov;
708 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
709 if ((auio.uio_resid += iov->iov_len) < 0)
710 return (EINVAL);
711 }
712#ifdef KTRACE
713 if (KTRPOINT(p, KTR_GENIO)) {
714 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
715
716 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
717 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
718 ktruio = auio;
719 }
720#endif
721 len = auio.uio_resid;
722 so = (struct socket *)fp->f_data;
723 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
724 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
725 &mp->msg_flags);
726 if (error) {
727 if (auio.uio_resid != len && (error == ERESTART ||
728 error == EINTR || error == EWOULDBLOCK))
729 error = 0;
730 }
731#ifdef KTRACE
732 if (ktriov != NULL) {
733 if (error == 0) {
734 ktruio.uio_iov = ktriov;
735 ktruio.uio_resid = len - auio.uio_resid;
736 ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error);
737 }
738 FREE(ktriov, M_TEMP);
739 }
740#endif
741 if (error)
742 goto out;
743 p->p_retval[0] = len - auio.uio_resid;
744 if (mp->msg_name) {
745 len = mp->msg_namelen;
746 if (len <= 0 || fromsa == 0)
747 len = 0;
748 else {
749#ifndef MIN
750#define MIN(a,b) ((a)>(b)?(b):(a))
751#endif
752 /* save sa_len before it is destroyed by MSG_COMPAT */
753 len = MIN(len, fromsa->sa_len);
754#ifdef COMPAT_OLDSOCK
755 if (mp->msg_flags & MSG_COMPAT)
756 ((struct osockaddr *)fromsa)->sa_family =
757 fromsa->sa_family;
758#endif
759 error = copyout(fromsa,
760 (caddr_t)mp->msg_name, (unsigned)len);
761 if (error)
762 goto out;
763 }
764 mp->msg_namelen = len;
765 if (namelenp &&
766 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
767#ifdef COMPAT_OLDSOCK
768 if (mp->msg_flags & MSG_COMPAT)
769 error = 0; /* old recvfrom didn't check */
770 else
771#endif
772 goto out;
773 }
774 }
775 if (mp->msg_control) {
776#ifdef COMPAT_OLDSOCK
777 /*
778 * We assume that old recvmsg calls won't receive access
779 * rights and other control info, esp. as control info
780 * is always optional and those options didn't exist in 4.3.
781 * If we receive rights, trim the cmsghdr; anything else
782 * is tossed.
783 */
784 if (control && mp->msg_flags & MSG_COMPAT) {
785 if (mtod(control, struct cmsghdr *)->cmsg_level !=
786 SOL_SOCKET ||
787 mtod(control, struct cmsghdr *)->cmsg_type !=
788 SCM_RIGHTS) {
789 mp->msg_controllen = 0;
790 goto out;
791 }
792 control->m_len -= sizeof (struct cmsghdr);
793 control->m_data += sizeof (struct cmsghdr);
794 }
795#endif
796 len = mp->msg_controllen;
797 m = control;
798 mp->msg_controllen = 0;
799 ctlbuf = (caddr_t) mp->msg_control;
800
801 while (m && len > 0) {
802 unsigned int tocopy;
803
804 if (len >= m->m_len)
805 tocopy = m->m_len;
806 else {
807 mp->msg_flags |= MSG_CTRUNC;
808 tocopy = len;
809 }
810
811 if ((error = copyout((caddr_t)mtod(m, caddr_t),
812 ctlbuf, tocopy)) != 0)
813 goto out;
814
815 ctlbuf += tocopy;
816 len -= tocopy;
817 m = m->m_next;
818 }
819 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
820 }
821out:
822 if (fromsa)
823 FREE(fromsa, M_SONAME);
824 if (control)
825 m_freem(control);
826 return (error);
827}
828
829int
830recvfrom(p, uap)
831 struct proc *p;
832 register struct recvfrom_args /* {
833 int s;
834 caddr_t buf;
835 size_t len;
836 int flags;
837 caddr_t from;
838 int *fromlenaddr;
839 } */ *uap;
840{
841 struct msghdr msg;
842 struct iovec aiov;
843 int error;
844
845 if (uap->fromlenaddr) {
846 error = copyin((caddr_t)uap->fromlenaddr,
847 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
848 if (error)
849 return (error);
850 } else
851 msg.msg_namelen = 0;
852 msg.msg_name = uap->from;
853 msg.msg_iov = &aiov;
854 msg.msg_iovlen = 1;
855 aiov.iov_base = uap->buf;
856 aiov.iov_len = uap->len;
857 msg.msg_control = 0;
858 msg.msg_flags = uap->flags;
859 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr));
860}
861
862#ifdef COMPAT_OLDSOCK
863int
864orecvfrom(p, uap)
865 struct proc *p;
866 struct recvfrom_args *uap;
867{
868
869 uap->flags |= MSG_COMPAT;
870 return (recvfrom(p, uap));
871}
872#endif
873
874
875#ifdef COMPAT_OLDSOCK
876int
877orecv(p, uap)
878 struct proc *p;
879 register struct orecv_args /* {
880 int s;
881 caddr_t buf;
882 int len;
883 int flags;
884 } */ *uap;
885{
886 struct msghdr msg;
887 struct iovec aiov;
888
889 msg.msg_name = 0;
890 msg.msg_namelen = 0;
891 msg.msg_iov = &aiov;
892 msg.msg_iovlen = 1;
893 aiov.iov_base = uap->buf;
894 aiov.iov_len = uap->len;
895 msg.msg_control = 0;
896 msg.msg_flags = uap->flags;
897 return (recvit(p, uap->s, &msg, (caddr_t)0));
898}
899
900/*
901 * Old recvmsg. This code takes advantage of the fact that the old msghdr
902 * overlays the new one, missing only the flags, and with the (old) access
903 * rights where the control fields are now.
904 */
905int
906orecvmsg(p, uap)
907 struct proc *p;
908 register struct orecvmsg_args /* {
909 int s;
910 struct omsghdr *msg;
911 int flags;
912 } */ *uap;
913{
914 struct msghdr msg;
915 struct iovec aiov[UIO_SMALLIOV], *iov;
916 int error;
917
918 error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
919 sizeof (struct omsghdr));
920 if (error)
921 return (error);
922 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
923 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
924 return (EMSGSIZE);
925 MALLOC(iov, struct iovec *,
926 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
927 M_WAITOK);
928 } else
929 iov = aiov;
930 msg.msg_flags = uap->flags | MSG_COMPAT;
931 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
932 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
933 if (error)
934 goto done;
935 msg.msg_iov = iov;
936 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen);
937
938 if (msg.msg_controllen && error == 0)
939 error = copyout((caddr_t)&msg.msg_controllen,
940 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
941done:
942 if (iov != aiov)
943 FREE(iov, M_IOV);
944 return (error);
945}
946#endif
947
948int
949recvmsg(p, uap)
950 struct proc *p;
951 register struct recvmsg_args /* {
952 int s;
953 struct msghdr *msg;
954 int flags;
955 } */ *uap;
956{
957 struct msghdr msg;
958 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
959 register int error;
960
961 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
962 if (error)
963 return (error);
964 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
965 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
966 return (EMSGSIZE);
967 MALLOC(iov, struct iovec *,
968 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
969 M_WAITOK);
970 } else
971 iov = aiov;
972#ifdef COMPAT_OLDSOCK
973 msg.msg_flags = uap->flags &~ MSG_COMPAT;
974#else
975 msg.msg_flags = uap->flags;
976#endif
977 uiov = msg.msg_iov;
978 msg.msg_iov = iov;
979 error = copyin((caddr_t)uiov, (caddr_t)iov,
980 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
981 if (error)
982 goto done;
983 error = recvit(p, uap->s, &msg, (caddr_t)0);
984 if (!error) {
985 msg.msg_iov = uiov;
986 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
987 }
988done:
989 if (iov != aiov)
990 FREE(iov, M_IOV);
991 return (error);
992}
993
994/* ARGSUSED */
995int
996shutdown(p, uap)
997 struct proc *p;
998 register struct shutdown_args /* {
999 int s;
1000 int how;
1001 } */ *uap;
1002{
1003 struct file *fp;
1004 int error;
1005
1006 error = getsock(p->p_fd, uap->s, &fp);
1007 if (error)
1008 return (error);
1009 return (soshutdown((struct socket *)fp->f_data, uap->how));
1010}
1011
1012/* ARGSUSED */
1013int
1014setsockopt(p, uap)
1015 struct proc *p;
1016 register struct setsockopt_args /* {
1017 int s;
1018 int level;
1019 int name;
1020 caddr_t val;
1021 int valsize;
1022 } */ *uap;
1023{
1024 struct file *fp;
1025 struct sockopt sopt;
1026 int error;
1027
1028 if (uap->val == 0 && uap->valsize != 0)
1029 return (EFAULT);
1030 if (uap->valsize < 0)
1031 return (EINVAL);
1032
1033 error = getsock(p->p_fd, uap->s, &fp);
1034 if (error)
1035 return (error);
1036
1037 sopt.sopt_dir = SOPT_SET;
1038 sopt.sopt_level = uap->level;
1039 sopt.sopt_name = uap->name;
1040 sopt.sopt_val = uap->val;
1041 sopt.sopt_valsize = uap->valsize;
1042 sopt.sopt_p = p;
1043
1044 return (sosetopt((struct socket *)fp->f_data, &sopt));
1045}
1046
1047/* ARGSUSED */
1048int
1049getsockopt(p, uap)
1050 struct proc *p;
1051 register struct getsockopt_args /* {
1052 int s;
1053 int level;
1054 int name;
1055 caddr_t val;
1056 int *avalsize;
1057 } */ *uap;
1058{
1059 int valsize, error;
1060 struct file *fp;
1061 struct sockopt sopt;
1062
1063 error = getsock(p->p_fd, uap->s, &fp);
1064 if (error)
1065 return (error);
1066 if (uap->val) {
1067 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1068 sizeof (valsize));
1069 if (error)
1070 return (error);
1071 if (valsize < 0)
1072 return (EINVAL);
1073 } else
1074 valsize = 0;
1075
1076 sopt.sopt_dir = SOPT_GET;
1077 sopt.sopt_level = uap->level;
1078 sopt.sopt_name = uap->name;
1079 sopt.sopt_val = uap->val;
1080 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1081 sopt.sopt_p = p;
1082
1083 error = sogetopt((struct socket *)fp->f_data, &sopt);
1084 if (error == 0) {
1085 valsize = sopt.sopt_valsize;
1086 error = copyout((caddr_t)&valsize,
1087 (caddr_t)uap->avalsize, sizeof (valsize));
1088 }
1089 return (error);
1090}
1091
1092/*
1093 * Get socket name.
1094 */
1095/* ARGSUSED */
1096static int
1097getsockname1(p, uap, compat)
1098 struct proc *p;
1099 register struct getsockname_args /* {
1100 int fdes;
1101 caddr_t asa;
1102 int *alen;
1103 } */ *uap;
1104 int compat;
1105{
1106 struct file *fp;
1107 register struct socket *so;
1108 struct sockaddr *sa;
1109 int len, error;
1110
1111 error = getsock(p->p_fd, uap->fdes, &fp);
1112 if (error)
1113 return (error);
1114 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1115 if (error)
1116 return (error);
1117 so = (struct socket *)fp->f_data;
1118 sa = 0;
1119 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1120 if (error)
1121 goto bad;
1122 if (sa == 0) {
1123 len = 0;
1124 goto gotnothing;
1125 }
1126
1127 len = MIN(len, sa->sa_len);
1128#ifdef COMPAT_OLDSOCK
1129 if (compat)
1130 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1131#endif
1132 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1133 if (error == 0)
1134gotnothing:
1135 error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1136 sizeof (len));
1137bad:
1138 if (sa)
1139 FREE(sa, M_SONAME);
1140 return (error);
1141}
1142
1143int
1144getsockname(p, uap)
1145 struct proc *p;
1146 struct getsockname_args *uap;
1147{
1148
1149 return (getsockname1(p, uap, 0));
1150}
1151
1152#ifdef COMPAT_OLDSOCK
1153int
1154ogetsockname(p, uap)
1155 struct proc *p;
1156 struct getsockname_args *uap;
1157{
1158
1159 return (getsockname1(p, uap, 1));
1160}
1161#endif /* COMPAT_OLDSOCK */
1162
1163/*
1164 * Get name of peer for connected socket.
1165 */
1166/* ARGSUSED */
1167static int
1168getpeername1(p, uap, compat)
1169 struct proc *p;
1170 register struct getpeername_args /* {
1171 int fdes;
1172 caddr_t asa;
1173 int *alen;
1174 } */ *uap;
1175 int compat;
1176{
1177 struct file *fp;
1178 register struct socket *so;
1179 struct sockaddr *sa;
1180 int len, error;
1181
1182 error = getsock(p->p_fd, uap->fdes, &fp);
1183 if (error)
1184 return (error);
1185 so = (struct socket *)fp->f_data;
1186 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
1187 return (ENOTCONN);
1188 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1189 if (error)
1190 return (error);
1191 sa = 0;
1192 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1193 if (error)
1194 goto bad;
1195 if (sa == 0) {
1196 len = 0;
1197 goto gotnothing;
1198 }
1199 len = MIN(len, sa->sa_len);
1200#ifdef COMPAT_OLDSOCK
1201 if (compat)
1202 ((struct osockaddr *)sa)->sa_family =
1203 sa->sa_family;
1204#endif
1205 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1206 if (error)
1207 goto bad;
1208gotnothing:
1209 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1210bad:
1211 if (sa) FREE(sa, M_SONAME);
1212 return (error);
1213}
1214
1215int
1216getpeername(p, uap)
1217 struct proc *p;
1218 struct getpeername_args *uap;
1219{
1220
1221 return (getpeername1(p, uap, 0));
1222}
1223
1224#ifdef COMPAT_OLDSOCK
1225int
1226ogetpeername(p, uap)
1227 struct proc *p;
1228 struct ogetpeername_args *uap;
1229{
1230
1231 /* XXX uap should have type `getpeername_args *' to begin with. */
1232 return (getpeername1(p, (struct getpeername_args *)uap, 1));
1233}
1234#endif /* COMPAT_OLDSOCK */
1235
1236int
1237sockargs(mp, buf, buflen, type)
1238 struct mbuf **mp;
1239 caddr_t buf;
1240 int buflen, type;
1241{
1242 register struct sockaddr *sa;
1243 register struct mbuf *m;
1244 int error;
1245
1246 if ((u_int)buflen > MLEN) {
1247#ifdef COMPAT_OLDSOCK
1248 if (type == MT_SONAME && (u_int)buflen <= 112)
1249 buflen = MLEN; /* unix domain compat. hack */
1250 else
1251#endif
1252 return (EINVAL);
1253 }
1254 m = m_get(M_WAIT, type);
1255 if (m == NULL)
1256 return (ENOBUFS);
1257 m->m_len = buflen;
1258 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1259 if (error)
1260 (void) m_free(m);
1261 else {
1262 *mp = m;
1263 if (type == MT_SONAME) {
1264 sa = mtod(m, struct sockaddr *);
1265
1266#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1267 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1268 sa->sa_family = sa->sa_len;
1269#endif
1270 sa->sa_len = buflen;
1271 }
1272 }
1273 return (error);
1274}
1275
1276int
1277getsockaddr(namp, uaddr, len)
1278 struct sockaddr **namp;
1279 caddr_t uaddr;
1280 size_t len;
1281{
1282 struct sockaddr *sa;
1283 int error;
1284
1285 if (len > SOCK_MAXADDRLEN)
1286 return ENAMETOOLONG;
1287 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1288 error = copyin(uaddr, sa, len);
1289 if (error) {
1290 FREE(sa, M_SONAME);
1291 } else {
1292#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1293 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1294 sa->sa_family = sa->sa_len;
1295#endif
1296 sa->sa_len = len;
1297 *namp = sa;
1298 }
1299 return error;
1300}
1301
1302int
1303getsock(fdp, fdes, fpp)
1304 struct filedesc *fdp;
1305 int fdes;
1306 struct file **fpp;
1307{
1308 register struct file *fp;
1309
1310 if ((unsigned)fdes >= fdp->fd_nfiles ||
1311 (fp = fdp->fd_ofiles[fdes]) == NULL)
1312 return (EBADF);
1313 if (fp->f_type != DTYPE_SOCKET)
1314 return (ENOTSOCK);
1315 *fpp = fp;
1316 return (0);
1317}
1318
1319/*
1320 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1321 * XXX - The sf_buf functions are currently private to sendfile(2), so have
1322 * been made static, but may be useful in the future for doing zero-copy in
1323 * other parts of the networking code.
1324 */
1325static void
1326sf_buf_init(void *arg)
1327{
1328 int i;
1329
1330 SLIST_INIT(&sf_freelist);
1331 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1332 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT);
1333 bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf));
1334 for (i = 0; i < nsfbufs; i++) {
1335 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1336 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
1337 }
1338}
1339
1340/*
1341 * Get an sf_buf from the freelist. Will block if none are available.
1342 */
1343static struct sf_buf *
1344sf_buf_alloc()
1345{
1346 struct sf_buf *sf;
1347 int s;
1348
1349 s = splimp();
1350 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
1351 sf_buf_alloc_want = 1;
1352 tsleep(&sf_freelist, PVM, "sfbufa", 0);
1353 }
1354 SLIST_REMOVE_HEAD(&sf_freelist, free_list);
1355 splx(s);
1357 sf->refcnt = 1;
1358 return (sf);
1359}
1360
1361#define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1356 return (sf);
1357}
1358
1359#define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1362static void
1363sf_buf_ref(caddr_t addr, u_int size)
1364{
1365 struct sf_buf *sf;
1366
1360
1367 sf = dtosf(addr);
1368 if (sf->refcnt == 0)
1369 panic("sf_buf_ref: referencing a free sf_buf");
1370 sf->refcnt++;
1371}
1372
1373/*
1361/*
1374 * Lose a reference to an sf_buf. When none left, detach mapped page
1375 * and release resources back to the system.
1376 *
1362 *
1363 * Detatch mapped page and release resources back to the system.
1364 *
1377 * Must be called at splimp.
1378 */
1379static void
1365 * Must be called at splimp.
1366 */
1367static void
1380sf_buf_free(caddr_t addr, u_int size)
1368sf_buf_free(caddr_t addr, void *args)
1381{
1382 struct sf_buf *sf;
1383 struct vm_page *m;
1384 int s;
1385
1386 sf = dtosf(addr);
1369{
1370 struct sf_buf *sf;
1371 struct vm_page *m;
1372 int s;
1373
1374 sf = dtosf(addr);
1387 if (sf->refcnt == 0)
1388 panic("sf_buf_free: freeing free sf_buf");
1389 sf->refcnt--;
1390 if (sf->refcnt == 0) {
1391 pmap_qremove((vm_offset_t)addr, 1);
1392 m = sf->m;
1393 s = splvm();
1394 vm_page_unwire(m, 0);
1395 /*
1396 * Check for the object going away on us. This can
1397 * happen since we don't hold a reference to it.
1398 * If so, we're responsible for freeing the page.
1399 */
1400 if (m->wire_count == 0 && m->object == NULL)
1401 vm_page_free(m);
1402 splx(s);
1403 sf->m = NULL;
1404 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
1405 if (sf_buf_alloc_want) {
1406 sf_buf_alloc_want = 0;
1407 wakeup(&sf_freelist);
1408 }
1375 pmap_qremove((vm_offset_t)addr, 1);
1376 m = sf->m;
1377 s = splvm();
1378 vm_page_unwire(m, 0);
1379 /*
1380 * Check for the object going away on us. This can
1381 * happen since we don't hold a reference to it.
1382 * If so, we're responsible for freeing the page.
1383 */
1384 if (m->wire_count == 0 && m->object == NULL)
1385 vm_page_free(m);
1386 splx(s);
1387 sf->m = NULL;
1388 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
1389 if (sf_buf_alloc_want) {
1390 sf_buf_alloc_want = 0;
1391 wakeup(&sf_freelist);
1409 }
1410}
1411
1412/*
1413 * sendfile(2).
1414 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1415 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1416 *
1417 * Send a file specified by 'fd' and starting at 'offset' to a socket
1418 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1419 * nbytes == 0. Optionally add a header and/or trailer to the socket
1420 * output. If specified, write the total number of bytes sent into *sbytes.
1421 */
1422int
1423sendfile(struct proc *p, struct sendfile_args *uap)
1424{
1425 struct file *fp;
1426 struct filedesc *fdp = p->p_fd;
1427 struct vnode *vp;
1428 struct vm_object *obj;
1429 struct socket *so;
1430 struct mbuf *m;
1431 struct sf_buf *sf;
1432 struct vm_page *pg;
1433 struct writev_args nuap;
1434 struct sf_hdtr hdtr;
1435 off_t off, xfsize, sbytes = 0;
1436 int error = 0, s;
1437
1438 vp = NULL;
1439 /*
1440 * Do argument checking. Must be a regular file in, stream
1441 * type and connected socket out, positive offset.
1442 */
1443 fp = getfp(fdp, uap->fd, FREAD);
1444 if (fp == NULL) {
1445 error = EBADF;
1446 goto done;
1447 }
1448 if (fp->f_type != DTYPE_VNODE) {
1449 error = EINVAL;
1450 goto done;
1451 }
1452 vp = (struct vnode *)fp->f_data;
1453 vref(vp);
1454 obj = vp->v_object;
1455 if (vp->v_type != VREG || obj == NULL) {
1456 error = EINVAL;
1457 goto done;
1458 }
1459 error = getsock(p->p_fd, uap->s, &fp);
1460 if (error)
1461 goto done;
1462 so = (struct socket *)fp->f_data;
1463 if (so->so_type != SOCK_STREAM) {
1464 error = EINVAL;
1465 goto done;
1466 }
1467 if ((so->so_state & SS_ISCONNECTED) == 0) {
1468 error = ENOTCONN;
1469 goto done;
1470 }
1471 if (uap->offset < 0) {
1472 error = EINVAL;
1473 goto done;
1474 }
1475
1476 /*
1477 * If specified, get the pointer to the sf_hdtr struct for
1478 * any headers/trailers.
1479 */
1480 if (uap->hdtr != NULL) {
1481 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1482 if (error)
1483 goto done;
1484 /*
1485 * Send any headers. Wimp out and use writev(2).
1486 */
1487 if (hdtr.headers != NULL) {
1488 nuap.fd = uap->s;
1489 nuap.iovp = hdtr.headers;
1490 nuap.iovcnt = hdtr.hdr_cnt;
1491 error = writev(p, &nuap);
1492 if (error)
1493 goto done;
1494 sbytes += p->p_retval[0];
1495 }
1496 }
1497
1498 /*
1499 * Protect against multiple writers to the socket.
1500 */
1501 (void) sblock(&so->so_snd, M_WAITOK);
1502
1503 /*
1504 * Loop through the pages in the file, starting with the requested
1505 * offset. Get a file page (do I/O if necessary), map the file page
1506 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1507 * it on the socket.
1508 */
1509 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1510 vm_pindex_t pindex;
1511 vm_offset_t pgoff;
1512
1513 pindex = OFF_TO_IDX(off);
1514retry_lookup:
1515 /*
1516 * Calculate the amount to transfer. Not to exceed a page,
1517 * the EOF, or the passed in nbytes.
1518 */
1519 xfsize = obj->un_pager.vnp.vnp_size - off;
1520 if (xfsize > PAGE_SIZE)
1521 xfsize = PAGE_SIZE;
1522 pgoff = (vm_offset_t)(off & PAGE_MASK);
1523 if (PAGE_SIZE - pgoff < xfsize)
1524 xfsize = PAGE_SIZE - pgoff;
1525 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1526 xfsize = uap->nbytes - sbytes;
1527 if (xfsize <= 0)
1528 break;
1529 /*
1530 * Optimize the non-blocking case by looking at the socket space
1531 * before going to the extra work of constituting the sf_buf.
1532 */
1533 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1534 if (so->so_state & SS_CANTSENDMORE)
1535 error = EPIPE;
1536 else
1537 error = EAGAIN;
1538 sbunlock(&so->so_snd);
1539 goto done;
1540 }
1541 /*
1542 * Attempt to look up the page.
1543 *
1544 * Allocate if not found
1545 *
1546 * Wait and loop if busy.
1547 */
1548 pg = vm_page_lookup(obj, pindex);
1549
1550 if (pg == NULL) {
1551 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1552 if (pg == NULL) {
1553 VM_WAIT;
1554 goto retry_lookup;
1555 }
1556 vm_page_wakeup(pg);
1557 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) {
1558 goto retry_lookup;
1559 }
1560
1561 /*
1562 * Wire the page so it does not get ripped out from under
1563 * us.
1564 */
1565
1566 vm_page_wire(pg);
1567
1568 /*
1569 * If page is not valid for what we need, initiate I/O
1570 */
1571
1572 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1573 struct uio auio;
1574 struct iovec aiov;
1575 int bsize;
1576
1577 /*
1578 * Ensure that our page is still around when the I/O
1579 * completes.
1580 */
1581 vm_page_io_start(pg);
1582
1583 /*
1584 * Get the page from backing store.
1585 */
1586 bsize = vp->v_mount->mnt_stat.f_iosize;
1587 auio.uio_iov = &aiov;
1588 auio.uio_iovcnt = 1;
1589 aiov.iov_base = 0;
1590 aiov.iov_len = MAXBSIZE;
1591 auio.uio_resid = MAXBSIZE;
1592 auio.uio_offset = trunc_page(off);
1593 auio.uio_segflg = UIO_NOCOPY;
1594 auio.uio_rw = UIO_READ;
1595 auio.uio_procp = p;
1596 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
1597 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1598 p->p_ucred);
1599 VOP_UNLOCK(vp, 0, p);
1600 vm_page_flag_clear(pg, PG_ZERO);
1601 vm_page_io_finish(pg);
1602 if (error) {
1603 vm_page_unwire(pg, 0);
1604 /*
1605 * See if anyone else might know about this page.
1606 * If not and it is not valid, then free it.
1607 */
1608 if (pg->wire_count == 0 && pg->valid == 0 &&
1609 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1610 pg->hold_count == 0)
1611 vm_page_free(pg);
1612 sbunlock(&so->so_snd);
1613 goto done;
1614 }
1615 }
1616
1617 /*
1618 * Allocate a kernel virtual page and insert the physical page
1619 * into it.
1620 */
1621
1622 sf = sf_buf_alloc();
1623 sf->m = pg;
1624 pmap_qenter(sf->kva, &pg, 1);
1625 /*
1626 * Get an mbuf header and set it up as having external storage.
1627 */
1628 MGETHDR(m, M_WAIT, MT_DATA);
1629 if (m == NULL) {
1630 error = ENOBUFS;
1631 goto done;
1632 }
1392 }
1393}
1394
1395/*
1396 * sendfile(2).
1397 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1398 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1399 *
1400 * Send a file specified by 'fd' and starting at 'offset' to a socket
1401 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1402 * nbytes == 0. Optionally add a header and/or trailer to the socket
1403 * output. If specified, write the total number of bytes sent into *sbytes.
1404 */
1405int
1406sendfile(struct proc *p, struct sendfile_args *uap)
1407{
1408 struct file *fp;
1409 struct filedesc *fdp = p->p_fd;
1410 struct vnode *vp;
1411 struct vm_object *obj;
1412 struct socket *so;
1413 struct mbuf *m;
1414 struct sf_buf *sf;
1415 struct vm_page *pg;
1416 struct writev_args nuap;
1417 struct sf_hdtr hdtr;
1418 off_t off, xfsize, sbytes = 0;
1419 int error = 0, s;
1420
1421 vp = NULL;
1422 /*
1423 * Do argument checking. Must be a regular file in, stream
1424 * type and connected socket out, positive offset.
1425 */
1426 fp = getfp(fdp, uap->fd, FREAD);
1427 if (fp == NULL) {
1428 error = EBADF;
1429 goto done;
1430 }
1431 if (fp->f_type != DTYPE_VNODE) {
1432 error = EINVAL;
1433 goto done;
1434 }
1435 vp = (struct vnode *)fp->f_data;
1436 vref(vp);
1437 obj = vp->v_object;
1438 if (vp->v_type != VREG || obj == NULL) {
1439 error = EINVAL;
1440 goto done;
1441 }
1442 error = getsock(p->p_fd, uap->s, &fp);
1443 if (error)
1444 goto done;
1445 so = (struct socket *)fp->f_data;
1446 if (so->so_type != SOCK_STREAM) {
1447 error = EINVAL;
1448 goto done;
1449 }
1450 if ((so->so_state & SS_ISCONNECTED) == 0) {
1451 error = ENOTCONN;
1452 goto done;
1453 }
1454 if (uap->offset < 0) {
1455 error = EINVAL;
1456 goto done;
1457 }
1458
1459 /*
1460 * If specified, get the pointer to the sf_hdtr struct for
1461 * any headers/trailers.
1462 */
1463 if (uap->hdtr != NULL) {
1464 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1465 if (error)
1466 goto done;
1467 /*
1468 * Send any headers. Wimp out and use writev(2).
1469 */
1470 if (hdtr.headers != NULL) {
1471 nuap.fd = uap->s;
1472 nuap.iovp = hdtr.headers;
1473 nuap.iovcnt = hdtr.hdr_cnt;
1474 error = writev(p, &nuap);
1475 if (error)
1476 goto done;
1477 sbytes += p->p_retval[0];
1478 }
1479 }
1480
1481 /*
1482 * Protect against multiple writers to the socket.
1483 */
1484 (void) sblock(&so->so_snd, M_WAITOK);
1485
1486 /*
1487 * Loop through the pages in the file, starting with the requested
1488 * offset. Get a file page (do I/O if necessary), map the file page
1489 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1490 * it on the socket.
1491 */
1492 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1493 vm_pindex_t pindex;
1494 vm_offset_t pgoff;
1495
1496 pindex = OFF_TO_IDX(off);
1497retry_lookup:
1498 /*
1499 * Calculate the amount to transfer. Not to exceed a page,
1500 * the EOF, or the passed in nbytes.
1501 */
1502 xfsize = obj->un_pager.vnp.vnp_size - off;
1503 if (xfsize > PAGE_SIZE)
1504 xfsize = PAGE_SIZE;
1505 pgoff = (vm_offset_t)(off & PAGE_MASK);
1506 if (PAGE_SIZE - pgoff < xfsize)
1507 xfsize = PAGE_SIZE - pgoff;
1508 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1509 xfsize = uap->nbytes - sbytes;
1510 if (xfsize <= 0)
1511 break;
1512 /*
1513 * Optimize the non-blocking case by looking at the socket space
1514 * before going to the extra work of constituting the sf_buf.
1515 */
1516 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1517 if (so->so_state & SS_CANTSENDMORE)
1518 error = EPIPE;
1519 else
1520 error = EAGAIN;
1521 sbunlock(&so->so_snd);
1522 goto done;
1523 }
1524 /*
1525 * Attempt to look up the page.
1526 *
1527 * Allocate if not found
1528 *
1529 * Wait and loop if busy.
1530 */
1531 pg = vm_page_lookup(obj, pindex);
1532
1533 if (pg == NULL) {
1534 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1535 if (pg == NULL) {
1536 VM_WAIT;
1537 goto retry_lookup;
1538 }
1539 vm_page_wakeup(pg);
1540 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) {
1541 goto retry_lookup;
1542 }
1543
1544 /*
1545 * Wire the page so it does not get ripped out from under
1546 * us.
1547 */
1548
1549 vm_page_wire(pg);
1550
1551 /*
1552 * If page is not valid for what we need, initiate I/O
1553 */
1554
1555 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1556 struct uio auio;
1557 struct iovec aiov;
1558 int bsize;
1559
1560 /*
1561 * Ensure that our page is still around when the I/O
1562 * completes.
1563 */
1564 vm_page_io_start(pg);
1565
1566 /*
1567 * Get the page from backing store.
1568 */
1569 bsize = vp->v_mount->mnt_stat.f_iosize;
1570 auio.uio_iov = &aiov;
1571 auio.uio_iovcnt = 1;
1572 aiov.iov_base = 0;
1573 aiov.iov_len = MAXBSIZE;
1574 auio.uio_resid = MAXBSIZE;
1575 auio.uio_offset = trunc_page(off);
1576 auio.uio_segflg = UIO_NOCOPY;
1577 auio.uio_rw = UIO_READ;
1578 auio.uio_procp = p;
1579 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
1580 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1581 p->p_ucred);
1582 VOP_UNLOCK(vp, 0, p);
1583 vm_page_flag_clear(pg, PG_ZERO);
1584 vm_page_io_finish(pg);
1585 if (error) {
1586 vm_page_unwire(pg, 0);
1587 /*
1588 * See if anyone else might know about this page.
1589 * If not and it is not valid, then free it.
1590 */
1591 if (pg->wire_count == 0 && pg->valid == 0 &&
1592 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1593 pg->hold_count == 0)
1594 vm_page_free(pg);
1595 sbunlock(&so->so_snd);
1596 goto done;
1597 }
1598 }
1599
1600 /*
1601 * Allocate a kernel virtual page and insert the physical page
1602 * into it.
1603 */
1604
1605 sf = sf_buf_alloc();
1606 sf->m = pg;
1607 pmap_qenter(sf->kva, &pg, 1);
1608 /*
1609 * Get an mbuf header and set it up as having external storage.
1610 */
1611 MGETHDR(m, M_WAIT, MT_DATA);
1612 if (m == NULL) {
1613 error = ENOBUFS;
1614 goto done;
1615 }
1633 m->m_ext.ext_free = sf_buf_free;
1634 m->m_ext.ext_ref = sf_buf_ref;
1635 m->m_ext.ext_buf = (void *)sf->kva;
1636 m->m_ext.ext_size = PAGE_SIZE;
1616 /*
1617 * Setup external storage for mbuf.
1618 */
1619 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL);
1637 m->m_data = (char *) sf->kva + pgoff;
1620 m->m_data = (char *) sf->kva + pgoff;
1638 m->m_flags |= M_EXT;
1639 m->m_pkthdr.len = m->m_len = xfsize;
1640 /*
1641 * Add the buffer to the socket buffer chain.
1642 */
1643 s = splnet();
1644retry_space:
1645 /*
1646 * Make sure that the socket is still able to take more data.
1647 * CANTSENDMORE being true usually means that the connection
1648 * was closed. so_error is true when an error was sensed after
1649 * a previous send.
1650 * The state is checked after the page mapping and buffer
1651 * allocation above since those operations may block and make
1652 * any socket checks stale. From this point forward, nothing
1653 * blocks before the pru_send (or more accurately, any blocking
1654 * results in a loop back to here to re-check).
1655 */
1656 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1657 if (so->so_state & SS_CANTSENDMORE) {
1658 error = EPIPE;
1659 } else {
1660 error = so->so_error;
1661 so->so_error = 0;
1662 }
1663 m_freem(m);
1664 sbunlock(&so->so_snd);
1665 splx(s);
1666 goto done;
1667 }
1668 /*
1669 * Wait for socket space to become available. We do this just
1670 * after checking the connection state above in order to avoid
1671 * a race condition with sbwait().
1672 */
1673 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1674 if (so->so_state & SS_NBIO) {
1675 m_freem(m);
1676 sbunlock(&so->so_snd);
1677 splx(s);
1678 error = EAGAIN;
1679 goto done;
1680 }
1681 error = sbwait(&so->so_snd);
1682 /*
1683 * An error from sbwait usually indicates that we've
1684 * been interrupted by a signal. If we've sent anything
1685 * then return bytes sent, otherwise return the error.
1686 */
1687 if (error) {
1688 m_freem(m);
1689 sbunlock(&so->so_snd);
1690 splx(s);
1691 goto done;
1692 }
1693 goto retry_space;
1694 }
1695 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
1696 splx(s);
1697 if (error) {
1698 sbunlock(&so->so_snd);
1699 goto done;
1700 }
1701 }
1702 sbunlock(&so->so_snd);
1703
1704 /*
1705 * Send trailers. Wimp out and use writev(2).
1706 */
1707 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1708 nuap.fd = uap->s;
1709 nuap.iovp = hdtr.trailers;
1710 nuap.iovcnt = hdtr.trl_cnt;
1711 error = writev(p, &nuap);
1712 if (error)
1713 goto done;
1714 sbytes += p->p_retval[0];
1715 }
1716
1717done:
1718 if (uap->sbytes != NULL) {
1719 copyout(&sbytes, uap->sbytes, sizeof(off_t));
1720 }
1721 if (vp)
1722 vrele(vp);
1723 return (error);
1724}
1621 m->m_pkthdr.len = m->m_len = xfsize;
1622 /*
1623 * Add the buffer to the socket buffer chain.
1624 */
1625 s = splnet();
1626retry_space:
1627 /*
1628 * Make sure that the socket is still able to take more data.
1629 * CANTSENDMORE being true usually means that the connection
1630 * was closed. so_error is true when an error was sensed after
1631 * a previous send.
1632 * The state is checked after the page mapping and buffer
1633 * allocation above since those operations may block and make
1634 * any socket checks stale. From this point forward, nothing
1635 * blocks before the pru_send (or more accurately, any blocking
1636 * results in a loop back to here to re-check).
1637 */
1638 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1639 if (so->so_state & SS_CANTSENDMORE) {
1640 error = EPIPE;
1641 } else {
1642 error = so->so_error;
1643 so->so_error = 0;
1644 }
1645 m_freem(m);
1646 sbunlock(&so->so_snd);
1647 splx(s);
1648 goto done;
1649 }
1650 /*
1651 * Wait for socket space to become available. We do this just
1652 * after checking the connection state above in order to avoid
1653 * a race condition with sbwait().
1654 */
1655 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1656 if (so->so_state & SS_NBIO) {
1657 m_freem(m);
1658 sbunlock(&so->so_snd);
1659 splx(s);
1660 error = EAGAIN;
1661 goto done;
1662 }
1663 error = sbwait(&so->so_snd);
1664 /*
1665 * An error from sbwait usually indicates that we've
1666 * been interrupted by a signal. If we've sent anything
1667 * then return bytes sent, otherwise return the error.
1668 */
1669 if (error) {
1670 m_freem(m);
1671 sbunlock(&so->so_snd);
1672 splx(s);
1673 goto done;
1674 }
1675 goto retry_space;
1676 }
1677 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
1678 splx(s);
1679 if (error) {
1680 sbunlock(&so->so_snd);
1681 goto done;
1682 }
1683 }
1684 sbunlock(&so->so_snd);
1685
1686 /*
1687 * Send trailers. Wimp out and use writev(2).
1688 */
1689 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1690 nuap.fd = uap->s;
1691 nuap.iovp = hdtr.trailers;
1692 nuap.iovcnt = hdtr.trl_cnt;
1693 error = writev(p, &nuap);
1694 if (error)
1695 goto done;
1696 sbytes += p->p_retval[0];
1697 }
1698
1699done:
1700 if (uap->sbytes != NULL) {
1701 copyout(&sbytes, uap->sbytes, sizeof(off_t));
1702 }
1703 if (vp)
1704 vrele(vp);
1705 return (error);
1706}