Deleted Added
full compact
uipc_syscalls.c (176498) uipc_syscalls.c (177599)
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 176498 2008-02-24 00:07:00Z cperciva $");
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 177599 2008-03-25 09:39:02Z ru $");
37
38#include "opt_sctp.h"
39#include "opt_compat.h"
40#include "opt_ktrace.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/sysproto.h>
49#include <sys/malloc.h>
50#include <sys/filedesc.h>
51#include <sys/event.h>
52#include <sys/proc.h>
53#include <sys/fcntl.h>
54#include <sys/file.h>
55#include <sys/filio.h>
56#include <sys/mount.h>
57#include <sys/mbuf.h>
58#include <sys/protosw.h>
59#include <sys/sf_buf.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/signalvar.h>
63#include <sys/syscallsubr.h>
64#include <sys/sysctl.h>
65#include <sys/uio.h>
66#include <sys/vnode.h>
67#ifdef KTRACE
68#include <sys/ktrace.h>
69#endif
70
71#include <security/mac/mac_framework.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/vm_pageout.h>
77#include <vm/vm_kern.h>
78#include <vm/vm_extern.h>
79
80#ifdef SCTP
81#include <netinet/sctp.h>
82#include <netinet/sctp_peeloff.h>
83#endif /* SCTP */
84
85static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
86static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
87
88static int accept1(struct thread *td, struct accept_args *uap, int compat);
89static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
90static int getsockname1(struct thread *td, struct getsockname_args *uap,
91 int compat);
92static int getpeername1(struct thread *td, struct getpeername_args *uap,
93 int compat);
94
95/*
96 * NSFBUFS-related variables and associated sysctls
97 */
98int nsfbufs;
99int nsfbufspeak;
100int nsfbufsused;
101
102SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
103 "Maximum number of sendfile(2) sf_bufs available");
104SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
105 "Number of sendfile(2) sf_bufs at peak usage");
106SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
107 "Number of sendfile(2) sf_bufs in use");
108
109/*
110 * Convert a user file descriptor to a kernel file entry. A reference on the
111 * file entry is held upon returning. This is lighter weight than
112 * fgetsock(), which bumps the socket reference drops the file reference
113 * count instead, as this approach avoids several additional mutex operations
114 * associated with the additional reference count. If requested, return the
115 * open file flags.
116 */
117static int
118getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
119{
120 struct file *fp;
121 int error;
122
123 fp = NULL;
124 if (fdp == NULL)
125 error = EBADF;
126 else {
127 FILEDESC_SLOCK(fdp);
128 fp = fget_locked(fdp, fd);
129 if (fp == NULL)
130 error = EBADF;
131 else if (fp->f_type != DTYPE_SOCKET) {
132 fp = NULL;
133 error = ENOTSOCK;
134 } else {
135 fhold(fp);
136 if (fflagp != NULL)
137 *fflagp = fp->f_flag;
138 error = 0;
139 }
140 FILEDESC_SUNLOCK(fdp);
141 }
142 *fpp = fp;
143 return (error);
144}
145
146/*
147 * System call interface to the socket abstraction.
148 */
149#if defined(COMPAT_43)
150#define COMPAT_OLDSOCK
151#endif
152
153int
154socket(td, uap)
155 struct thread *td;
156 struct socket_args /* {
157 int domain;
158 int type;
159 int protocol;
160 } */ *uap;
161{
162 struct filedesc *fdp;
163 struct socket *so;
164 struct file *fp;
165 int fd, error;
166
167#ifdef MAC
168 error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
169 uap->protocol);
170 if (error)
171 return (error);
172#endif
173 fdp = td->td_proc->p_fd;
174 error = falloc(td, &fp, &fd);
175 if (error)
176 return (error);
177 /* An extra reference on `fp' has been held for us by falloc(). */
178 error = socreate(uap->domain, &so, uap->type, uap->protocol,
179 td->td_ucred, td);
180 if (error) {
181 fdclose(fdp, fp, fd, td);
182 } else {
183 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
184 td->td_retval[0] = fd;
185 }
186 fdrop(fp, td);
187 return (error);
188}
189
190/* ARGSUSED */
191int
192bind(td, uap)
193 struct thread *td;
194 struct bind_args /* {
195 int s;
196 caddr_t name;
197 int namelen;
198 } */ *uap;
199{
200 struct sockaddr *sa;
201 int error;
202
203 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
204 return (error);
205
206 error = kern_bind(td, uap->s, sa);
207 free(sa, M_SONAME);
208 return (error);
209}
210
211int
212kern_bind(td, fd, sa)
213 struct thread *td;
214 int fd;
215 struct sockaddr *sa;
216{
217 struct socket *so;
218 struct file *fp;
219 int error;
220
221 error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
222 if (error)
223 return (error);
224 so = fp->f_data;
225#ifdef KTRACE
226 if (KTRPOINT(td, KTR_STRUCT))
227 ktrsockaddr(sa);
228#endif
229#ifdef MAC
230 SOCK_LOCK(so);
231 error = mac_socket_check_bind(td->td_ucred, so, sa);
232 SOCK_UNLOCK(so);
233 if (error)
234 goto done;
235#endif
236 error = sobind(so, sa, td);
237#ifdef MAC
238done:
239#endif
240 fdrop(fp, td);
241 return (error);
242}
243
244/* ARGSUSED */
245int
246listen(td, uap)
247 struct thread *td;
248 struct listen_args /* {
249 int s;
250 int backlog;
251 } */ *uap;
252{
253 struct socket *so;
254 struct file *fp;
255 int error;
256
257 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
258 if (error == 0) {
259 so = fp->f_data;
260#ifdef MAC
261 SOCK_LOCK(so);
262 error = mac_socket_check_listen(td->td_ucred, so);
263 SOCK_UNLOCK(so);
264 if (error)
265 goto done;
266#endif
267 error = solisten(so, uap->backlog, td);
268#ifdef MAC
269done:
270#endif
271 fdrop(fp, td);
272 }
273 return(error);
274}
275
276/*
277 * accept1()
278 */
279static int
280accept1(td, uap, compat)
281 struct thread *td;
282 struct accept_args /* {
283 int s;
284 struct sockaddr * __restrict name;
285 socklen_t * __restrict anamelen;
286 } */ *uap;
287 int compat;
288{
289 struct sockaddr *name;
290 socklen_t namelen;
291 struct file *fp;
292 int error;
293
294 if (uap->name == NULL)
295 return (kern_accept(td, uap->s, NULL, NULL, NULL));
296
297 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
298 if (error)
299 return (error);
300
301 error = kern_accept(td, uap->s, &name, &namelen, &fp);
302
303 /*
304 * return a namelen of zero for older code which might
305 * ignore the return value from accept.
306 */
307 if (error) {
308 (void) copyout(&namelen,
309 uap->anamelen, sizeof(*uap->anamelen));
310 return (error);
311 }
312
313 if (error == 0 && name != NULL) {
314#ifdef COMPAT_OLDSOCK
315 if (compat)
316 ((struct osockaddr *)name)->sa_family =
317 name->sa_family;
318#endif
319 error = copyout(name, uap->name, namelen);
320 }
321 if (error == 0)
322 error = copyout(&namelen, uap->anamelen,
323 sizeof(namelen));
324 if (error)
325 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
326 fdrop(fp, td);
327 free(name, M_SONAME);
328 return (error);
329}
330
331int
332kern_accept(struct thread *td, int s, struct sockaddr **name,
333 socklen_t *namelen, struct file **fp)
334{
335 struct filedesc *fdp;
336 struct file *headfp, *nfp = NULL;
337 struct sockaddr *sa = NULL;
338 int error;
339 struct socket *head, *so;
340 int fd;
341 u_int fflag;
342 pid_t pgid;
343 int tmp;
344
345 if (name) {
346 *name = NULL;
347 if (*namelen < 0)
348 return (EINVAL);
349 }
350
351 fdp = td->td_proc->p_fd;
352 error = getsock(fdp, s, &headfp, &fflag);
353 if (error)
354 return (error);
355 head = headfp->f_data;
356 if ((head->so_options & SO_ACCEPTCONN) == 0) {
357 error = EINVAL;
358 goto done;
359 }
360#ifdef MAC
361 SOCK_LOCK(head);
362 error = mac_socket_check_accept(td->td_ucred, head);
363 SOCK_UNLOCK(head);
364 if (error != 0)
365 goto done;
366#endif
367 error = falloc(td, &nfp, &fd);
368 if (error)
369 goto done;
370 ACCEPT_LOCK();
371 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
372 ACCEPT_UNLOCK();
373 error = EWOULDBLOCK;
374 goto noconnection;
375 }
376 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
377 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
378 head->so_error = ECONNABORTED;
379 break;
380 }
381 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
382 "accept", 0);
383 if (error) {
384 ACCEPT_UNLOCK();
385 goto noconnection;
386 }
387 }
388 if (head->so_error) {
389 error = head->so_error;
390 head->so_error = 0;
391 ACCEPT_UNLOCK();
392 goto noconnection;
393 }
394 so = TAILQ_FIRST(&head->so_comp);
395 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
396 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
397
398 /*
399 * Before changing the flags on the socket, we have to bump the
400 * reference count. Otherwise, if the protocol calls sofree(),
401 * the socket will be released due to a zero refcount.
402 */
403 SOCK_LOCK(so); /* soref() and so_state update */
404 soref(so); /* file descriptor reference */
405
406 TAILQ_REMOVE(&head->so_comp, so, so_list);
407 head->so_qlen--;
408 so->so_state |= (head->so_state & SS_NBIO);
409 so->so_qstate &= ~SQ_COMP;
410 so->so_head = NULL;
411
412 SOCK_UNLOCK(so);
413 ACCEPT_UNLOCK();
414
415 /* An extra reference on `nfp' has been held for us by falloc(). */
416 td->td_retval[0] = fd;
417
418 /* connection has been removed from the listen queue */
419 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
420
421 pgid = fgetown(&head->so_sigio);
422 if (pgid != 0)
423 fsetown(pgid, &so->so_sigio);
424
425 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
426 /* Sync socket nonblocking/async state with file flags */
427 tmp = fflag & FNONBLOCK;
428 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
429 tmp = fflag & FASYNC;
430 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
431 sa = 0;
432 error = soaccept(so, &sa);
433 if (error) {
434 /*
435 * return a namelen of zero for older code which might
436 * ignore the return value from accept.
437 */
438 if (name)
439 *namelen = 0;
440 goto noconnection;
441 }
442 if (sa == NULL) {
443 if (name)
444 *namelen = 0;
445 goto done;
446 }
447 if (name) {
448 /* check sa_len before it is destroyed */
449 if (*namelen > sa->sa_len)
450 *namelen = sa->sa_len;
451#ifdef KTRACE
452 if (KTRPOINT(td, KTR_STRUCT))
453 ktrsockaddr(sa);
454#endif
455 *name = sa;
456 sa = NULL;
457 }
458noconnection:
459 if (sa)
460 FREE(sa, M_SONAME);
461
462 /*
463 * close the new descriptor, assuming someone hasn't ripped it
464 * out from under us.
465 */
466 if (error)
467 fdclose(fdp, nfp, fd, td);
468
469 /*
470 * Release explicitly held references before returning. We return
471 * a reference on nfp to the caller on success if they request it.
472 */
473done:
474 if (fp != NULL) {
475 if (error == 0) {
476 *fp = nfp;
477 nfp = NULL;
478 } else
479 *fp = NULL;
480 }
481 if (nfp != NULL)
482 fdrop(nfp, td);
483 fdrop(headfp, td);
484 return (error);
485}
486
487int
488accept(td, uap)
489 struct thread *td;
490 struct accept_args *uap;
491{
492
493 return (accept1(td, uap, 0));
494}
495
496#ifdef COMPAT_OLDSOCK
497int
498oaccept(td, uap)
499 struct thread *td;
500 struct accept_args *uap;
501{
502
503 return (accept1(td, uap, 1));
504}
505#endif /* COMPAT_OLDSOCK */
506
507/* ARGSUSED */
508int
509connect(td, uap)
510 struct thread *td;
511 struct connect_args /* {
512 int s;
513 caddr_t name;
514 int namelen;
515 } */ *uap;
516{
517 struct sockaddr *sa;
518 int error;
519
520 error = getsockaddr(&sa, uap->name, uap->namelen);
521 if (error)
522 return (error);
523
524 error = kern_connect(td, uap->s, sa);
525 free(sa, M_SONAME);
526 return (error);
527}
528
529
530int
531kern_connect(td, fd, sa)
532 struct thread *td;
533 int fd;
534 struct sockaddr *sa;
535{
536 struct socket *so;
537 struct file *fp;
538 int error;
539 int interrupted = 0;
540
541 error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
542 if (error)
543 return (error);
544 so = fp->f_data;
545 if (so->so_state & SS_ISCONNECTING) {
546 error = EALREADY;
547 goto done1;
548 }
549#ifdef KTRACE
550 if (KTRPOINT(td, KTR_STRUCT))
551 ktrsockaddr(sa);
552#endif
553#ifdef MAC
554 SOCK_LOCK(so);
555 error = mac_socket_check_connect(td->td_ucred, so, sa);
556 SOCK_UNLOCK(so);
557 if (error)
558 goto bad;
559#endif
560 error = soconnect(so, sa, td);
561 if (error)
562 goto bad;
563 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
564 error = EINPROGRESS;
565 goto done1;
566 }
567 SOCK_LOCK(so);
568 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
569 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
570 "connec", 0);
571 if (error) {
572 if (error == EINTR || error == ERESTART)
573 interrupted = 1;
574 break;
575 }
576 }
577 if (error == 0) {
578 error = so->so_error;
579 so->so_error = 0;
580 }
581 SOCK_UNLOCK(so);
582bad:
583 if (!interrupted)
584 so->so_state &= ~SS_ISCONNECTING;
585 if (error == ERESTART)
586 error = EINTR;
587done1:
588 fdrop(fp, td);
589 return (error);
590}
591
592int
593socketpair(td, uap)
594 struct thread *td;
595 struct socketpair_args /* {
596 int domain;
597 int type;
598 int protocol;
599 int *rsv;
600 } */ *uap;
601{
602 struct filedesc *fdp = td->td_proc->p_fd;
603 struct file *fp1, *fp2;
604 struct socket *so1, *so2;
605 int fd, error, sv[2];
606
607#ifdef MAC
608 /* We might want to have a separate check for socket pairs. */
609 error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
610 uap->protocol);
611 if (error)
612 return (error);
613#endif
614
615 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
616 td->td_ucred, td);
617 if (error)
618 return (error);
619 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
620 td->td_ucred, td);
621 if (error)
622 goto free1;
623 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
624 error = falloc(td, &fp1, &fd);
625 if (error)
626 goto free2;
627 sv[0] = fd;
628 fp1->f_data = so1; /* so1 already has ref count */
629 error = falloc(td, &fp2, &fd);
630 if (error)
631 goto free3;
632 fp2->f_data = so2; /* so2 already has ref count */
633 sv[1] = fd;
634 error = soconnect2(so1, so2);
635 if (error)
636 goto free4;
637 if (uap->type == SOCK_DGRAM) {
638 /*
639 * Datagram socket connection is asymmetric.
640 */
641 error = soconnect2(so2, so1);
642 if (error)
643 goto free4;
644 }
645 finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
646 finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
647 so1 = so2 = NULL;
648 error = copyout(sv, uap->rsv, 2 * sizeof (int));
649 if (error)
650 goto free4;
651 fdrop(fp1, td);
652 fdrop(fp2, td);
653 return (0);
654free4:
655 fdclose(fdp, fp2, sv[1], td);
656 fdrop(fp2, td);
657free3:
658 fdclose(fdp, fp1, sv[0], td);
659 fdrop(fp1, td);
660free2:
661 if (so2 != NULL)
662 (void)soclose(so2);
663free1:
664 if (so1 != NULL)
665 (void)soclose(so1);
666 return (error);
667}
668
669static int
670sendit(td, s, mp, flags)
671 struct thread *td;
672 int s;
673 struct msghdr *mp;
674 int flags;
675{
676 struct mbuf *control;
677 struct sockaddr *to;
678 int error;
679
680 if (mp->msg_name != NULL) {
681 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
682 if (error) {
683 to = NULL;
684 goto bad;
685 }
686 mp->msg_name = to;
687 } else {
688 to = NULL;
689 }
690
691 if (mp->msg_control) {
692 if (mp->msg_controllen < sizeof(struct cmsghdr)
693#ifdef COMPAT_OLDSOCK
694 && mp->msg_flags != MSG_COMPAT
695#endif
696 ) {
697 error = EINVAL;
698 goto bad;
699 }
700 error = sockargs(&control, mp->msg_control,
701 mp->msg_controllen, MT_CONTROL);
702 if (error)
703 goto bad;
704#ifdef COMPAT_OLDSOCK
705 if (mp->msg_flags == MSG_COMPAT) {
706 struct cmsghdr *cm;
707
37
38#include "opt_sctp.h"
39#include "opt_compat.h"
40#include "opt_ktrace.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/sysproto.h>
49#include <sys/malloc.h>
50#include <sys/filedesc.h>
51#include <sys/event.h>
52#include <sys/proc.h>
53#include <sys/fcntl.h>
54#include <sys/file.h>
55#include <sys/filio.h>
56#include <sys/mount.h>
57#include <sys/mbuf.h>
58#include <sys/protosw.h>
59#include <sys/sf_buf.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/signalvar.h>
63#include <sys/syscallsubr.h>
64#include <sys/sysctl.h>
65#include <sys/uio.h>
66#include <sys/vnode.h>
67#ifdef KTRACE
68#include <sys/ktrace.h>
69#endif
70
71#include <security/mac/mac_framework.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/vm_pageout.h>
77#include <vm/vm_kern.h>
78#include <vm/vm_extern.h>
79
80#ifdef SCTP
81#include <netinet/sctp.h>
82#include <netinet/sctp_peeloff.h>
83#endif /* SCTP */
84
85static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
86static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
87
88static int accept1(struct thread *td, struct accept_args *uap, int compat);
89static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
90static int getsockname1(struct thread *td, struct getsockname_args *uap,
91 int compat);
92static int getpeername1(struct thread *td, struct getpeername_args *uap,
93 int compat);
94
95/*
96 * NSFBUFS-related variables and associated sysctls
97 */
98int nsfbufs;
99int nsfbufspeak;
100int nsfbufsused;
101
102SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
103 "Maximum number of sendfile(2) sf_bufs available");
104SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
105 "Number of sendfile(2) sf_bufs at peak usage");
106SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
107 "Number of sendfile(2) sf_bufs in use");
108
109/*
110 * Convert a user file descriptor to a kernel file entry. A reference on the
111 * file entry is held upon returning. This is lighter weight than
112 * fgetsock(), which bumps the socket reference drops the file reference
113 * count instead, as this approach avoids several additional mutex operations
114 * associated with the additional reference count. If requested, return the
115 * open file flags.
116 */
117static int
118getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
119{
120 struct file *fp;
121 int error;
122
123 fp = NULL;
124 if (fdp == NULL)
125 error = EBADF;
126 else {
127 FILEDESC_SLOCK(fdp);
128 fp = fget_locked(fdp, fd);
129 if (fp == NULL)
130 error = EBADF;
131 else if (fp->f_type != DTYPE_SOCKET) {
132 fp = NULL;
133 error = ENOTSOCK;
134 } else {
135 fhold(fp);
136 if (fflagp != NULL)
137 *fflagp = fp->f_flag;
138 error = 0;
139 }
140 FILEDESC_SUNLOCK(fdp);
141 }
142 *fpp = fp;
143 return (error);
144}
145
146/*
147 * System call interface to the socket abstraction.
148 */
149#if defined(COMPAT_43)
150#define COMPAT_OLDSOCK
151#endif
152
153int
154socket(td, uap)
155 struct thread *td;
156 struct socket_args /* {
157 int domain;
158 int type;
159 int protocol;
160 } */ *uap;
161{
162 struct filedesc *fdp;
163 struct socket *so;
164 struct file *fp;
165 int fd, error;
166
167#ifdef MAC
168 error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
169 uap->protocol);
170 if (error)
171 return (error);
172#endif
173 fdp = td->td_proc->p_fd;
174 error = falloc(td, &fp, &fd);
175 if (error)
176 return (error);
177 /* An extra reference on `fp' has been held for us by falloc(). */
178 error = socreate(uap->domain, &so, uap->type, uap->protocol,
179 td->td_ucred, td);
180 if (error) {
181 fdclose(fdp, fp, fd, td);
182 } else {
183 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
184 td->td_retval[0] = fd;
185 }
186 fdrop(fp, td);
187 return (error);
188}
189
190/* ARGSUSED */
191int
192bind(td, uap)
193 struct thread *td;
194 struct bind_args /* {
195 int s;
196 caddr_t name;
197 int namelen;
198 } */ *uap;
199{
200 struct sockaddr *sa;
201 int error;
202
203 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
204 return (error);
205
206 error = kern_bind(td, uap->s, sa);
207 free(sa, M_SONAME);
208 return (error);
209}
210
211int
212kern_bind(td, fd, sa)
213 struct thread *td;
214 int fd;
215 struct sockaddr *sa;
216{
217 struct socket *so;
218 struct file *fp;
219 int error;
220
221 error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
222 if (error)
223 return (error);
224 so = fp->f_data;
225#ifdef KTRACE
226 if (KTRPOINT(td, KTR_STRUCT))
227 ktrsockaddr(sa);
228#endif
229#ifdef MAC
230 SOCK_LOCK(so);
231 error = mac_socket_check_bind(td->td_ucred, so, sa);
232 SOCK_UNLOCK(so);
233 if (error)
234 goto done;
235#endif
236 error = sobind(so, sa, td);
237#ifdef MAC
238done:
239#endif
240 fdrop(fp, td);
241 return (error);
242}
243
244/* ARGSUSED */
245int
246listen(td, uap)
247 struct thread *td;
248 struct listen_args /* {
249 int s;
250 int backlog;
251 } */ *uap;
252{
253 struct socket *so;
254 struct file *fp;
255 int error;
256
257 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
258 if (error == 0) {
259 so = fp->f_data;
260#ifdef MAC
261 SOCK_LOCK(so);
262 error = mac_socket_check_listen(td->td_ucred, so);
263 SOCK_UNLOCK(so);
264 if (error)
265 goto done;
266#endif
267 error = solisten(so, uap->backlog, td);
268#ifdef MAC
269done:
270#endif
271 fdrop(fp, td);
272 }
273 return(error);
274}
275
276/*
277 * accept1()
278 */
279static int
280accept1(td, uap, compat)
281 struct thread *td;
282 struct accept_args /* {
283 int s;
284 struct sockaddr * __restrict name;
285 socklen_t * __restrict anamelen;
286 } */ *uap;
287 int compat;
288{
289 struct sockaddr *name;
290 socklen_t namelen;
291 struct file *fp;
292 int error;
293
294 if (uap->name == NULL)
295 return (kern_accept(td, uap->s, NULL, NULL, NULL));
296
297 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
298 if (error)
299 return (error);
300
301 error = kern_accept(td, uap->s, &name, &namelen, &fp);
302
303 /*
304 * return a namelen of zero for older code which might
305 * ignore the return value from accept.
306 */
307 if (error) {
308 (void) copyout(&namelen,
309 uap->anamelen, sizeof(*uap->anamelen));
310 return (error);
311 }
312
313 if (error == 0 && name != NULL) {
314#ifdef COMPAT_OLDSOCK
315 if (compat)
316 ((struct osockaddr *)name)->sa_family =
317 name->sa_family;
318#endif
319 error = copyout(name, uap->name, namelen);
320 }
321 if (error == 0)
322 error = copyout(&namelen, uap->anamelen,
323 sizeof(namelen));
324 if (error)
325 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
326 fdrop(fp, td);
327 free(name, M_SONAME);
328 return (error);
329}
330
331int
332kern_accept(struct thread *td, int s, struct sockaddr **name,
333 socklen_t *namelen, struct file **fp)
334{
335 struct filedesc *fdp;
336 struct file *headfp, *nfp = NULL;
337 struct sockaddr *sa = NULL;
338 int error;
339 struct socket *head, *so;
340 int fd;
341 u_int fflag;
342 pid_t pgid;
343 int tmp;
344
345 if (name) {
346 *name = NULL;
347 if (*namelen < 0)
348 return (EINVAL);
349 }
350
351 fdp = td->td_proc->p_fd;
352 error = getsock(fdp, s, &headfp, &fflag);
353 if (error)
354 return (error);
355 head = headfp->f_data;
356 if ((head->so_options & SO_ACCEPTCONN) == 0) {
357 error = EINVAL;
358 goto done;
359 }
360#ifdef MAC
361 SOCK_LOCK(head);
362 error = mac_socket_check_accept(td->td_ucred, head);
363 SOCK_UNLOCK(head);
364 if (error != 0)
365 goto done;
366#endif
367 error = falloc(td, &nfp, &fd);
368 if (error)
369 goto done;
370 ACCEPT_LOCK();
371 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
372 ACCEPT_UNLOCK();
373 error = EWOULDBLOCK;
374 goto noconnection;
375 }
376 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
377 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
378 head->so_error = ECONNABORTED;
379 break;
380 }
381 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
382 "accept", 0);
383 if (error) {
384 ACCEPT_UNLOCK();
385 goto noconnection;
386 }
387 }
388 if (head->so_error) {
389 error = head->so_error;
390 head->so_error = 0;
391 ACCEPT_UNLOCK();
392 goto noconnection;
393 }
394 so = TAILQ_FIRST(&head->so_comp);
395 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
396 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
397
398 /*
399 * Before changing the flags on the socket, we have to bump the
400 * reference count. Otherwise, if the protocol calls sofree(),
401 * the socket will be released due to a zero refcount.
402 */
403 SOCK_LOCK(so); /* soref() and so_state update */
404 soref(so); /* file descriptor reference */
405
406 TAILQ_REMOVE(&head->so_comp, so, so_list);
407 head->so_qlen--;
408 so->so_state |= (head->so_state & SS_NBIO);
409 so->so_qstate &= ~SQ_COMP;
410 so->so_head = NULL;
411
412 SOCK_UNLOCK(so);
413 ACCEPT_UNLOCK();
414
415 /* An extra reference on `nfp' has been held for us by falloc(). */
416 td->td_retval[0] = fd;
417
418 /* connection has been removed from the listen queue */
419 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
420
421 pgid = fgetown(&head->so_sigio);
422 if (pgid != 0)
423 fsetown(pgid, &so->so_sigio);
424
425 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
426 /* Sync socket nonblocking/async state with file flags */
427 tmp = fflag & FNONBLOCK;
428 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
429 tmp = fflag & FASYNC;
430 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
431 sa = 0;
432 error = soaccept(so, &sa);
433 if (error) {
434 /*
435 * return a namelen of zero for older code which might
436 * ignore the return value from accept.
437 */
438 if (name)
439 *namelen = 0;
440 goto noconnection;
441 }
442 if (sa == NULL) {
443 if (name)
444 *namelen = 0;
445 goto done;
446 }
447 if (name) {
448 /* check sa_len before it is destroyed */
449 if (*namelen > sa->sa_len)
450 *namelen = sa->sa_len;
451#ifdef KTRACE
452 if (KTRPOINT(td, KTR_STRUCT))
453 ktrsockaddr(sa);
454#endif
455 *name = sa;
456 sa = NULL;
457 }
458noconnection:
459 if (sa)
460 FREE(sa, M_SONAME);
461
462 /*
463 * close the new descriptor, assuming someone hasn't ripped it
464 * out from under us.
465 */
466 if (error)
467 fdclose(fdp, nfp, fd, td);
468
469 /*
470 * Release explicitly held references before returning. We return
471 * a reference on nfp to the caller on success if they request it.
472 */
473done:
474 if (fp != NULL) {
475 if (error == 0) {
476 *fp = nfp;
477 nfp = NULL;
478 } else
479 *fp = NULL;
480 }
481 if (nfp != NULL)
482 fdrop(nfp, td);
483 fdrop(headfp, td);
484 return (error);
485}
486
487int
488accept(td, uap)
489 struct thread *td;
490 struct accept_args *uap;
491{
492
493 return (accept1(td, uap, 0));
494}
495
496#ifdef COMPAT_OLDSOCK
497int
498oaccept(td, uap)
499 struct thread *td;
500 struct accept_args *uap;
501{
502
503 return (accept1(td, uap, 1));
504}
505#endif /* COMPAT_OLDSOCK */
506
507/* ARGSUSED */
508int
509connect(td, uap)
510 struct thread *td;
511 struct connect_args /* {
512 int s;
513 caddr_t name;
514 int namelen;
515 } */ *uap;
516{
517 struct sockaddr *sa;
518 int error;
519
520 error = getsockaddr(&sa, uap->name, uap->namelen);
521 if (error)
522 return (error);
523
524 error = kern_connect(td, uap->s, sa);
525 free(sa, M_SONAME);
526 return (error);
527}
528
529
530int
531kern_connect(td, fd, sa)
532 struct thread *td;
533 int fd;
534 struct sockaddr *sa;
535{
536 struct socket *so;
537 struct file *fp;
538 int error;
539 int interrupted = 0;
540
541 error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
542 if (error)
543 return (error);
544 so = fp->f_data;
545 if (so->so_state & SS_ISCONNECTING) {
546 error = EALREADY;
547 goto done1;
548 }
549#ifdef KTRACE
550 if (KTRPOINT(td, KTR_STRUCT))
551 ktrsockaddr(sa);
552#endif
553#ifdef MAC
554 SOCK_LOCK(so);
555 error = mac_socket_check_connect(td->td_ucred, so, sa);
556 SOCK_UNLOCK(so);
557 if (error)
558 goto bad;
559#endif
560 error = soconnect(so, sa, td);
561 if (error)
562 goto bad;
563 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
564 error = EINPROGRESS;
565 goto done1;
566 }
567 SOCK_LOCK(so);
568 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
569 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
570 "connec", 0);
571 if (error) {
572 if (error == EINTR || error == ERESTART)
573 interrupted = 1;
574 break;
575 }
576 }
577 if (error == 0) {
578 error = so->so_error;
579 so->so_error = 0;
580 }
581 SOCK_UNLOCK(so);
582bad:
583 if (!interrupted)
584 so->so_state &= ~SS_ISCONNECTING;
585 if (error == ERESTART)
586 error = EINTR;
587done1:
588 fdrop(fp, td);
589 return (error);
590}
591
592int
593socketpair(td, uap)
594 struct thread *td;
595 struct socketpair_args /* {
596 int domain;
597 int type;
598 int protocol;
599 int *rsv;
600 } */ *uap;
601{
602 struct filedesc *fdp = td->td_proc->p_fd;
603 struct file *fp1, *fp2;
604 struct socket *so1, *so2;
605 int fd, error, sv[2];
606
607#ifdef MAC
608 /* We might want to have a separate check for socket pairs. */
609 error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
610 uap->protocol);
611 if (error)
612 return (error);
613#endif
614
615 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
616 td->td_ucred, td);
617 if (error)
618 return (error);
619 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
620 td->td_ucred, td);
621 if (error)
622 goto free1;
623 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
624 error = falloc(td, &fp1, &fd);
625 if (error)
626 goto free2;
627 sv[0] = fd;
628 fp1->f_data = so1; /* so1 already has ref count */
629 error = falloc(td, &fp2, &fd);
630 if (error)
631 goto free3;
632 fp2->f_data = so2; /* so2 already has ref count */
633 sv[1] = fd;
634 error = soconnect2(so1, so2);
635 if (error)
636 goto free4;
637 if (uap->type == SOCK_DGRAM) {
638 /*
639 * Datagram socket connection is asymmetric.
640 */
641 error = soconnect2(so2, so1);
642 if (error)
643 goto free4;
644 }
645 finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
646 finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
647 so1 = so2 = NULL;
648 error = copyout(sv, uap->rsv, 2 * sizeof (int));
649 if (error)
650 goto free4;
651 fdrop(fp1, td);
652 fdrop(fp2, td);
653 return (0);
654free4:
655 fdclose(fdp, fp2, sv[1], td);
656 fdrop(fp2, td);
657free3:
658 fdclose(fdp, fp1, sv[0], td);
659 fdrop(fp1, td);
660free2:
661 if (so2 != NULL)
662 (void)soclose(so2);
663free1:
664 if (so1 != NULL)
665 (void)soclose(so1);
666 return (error);
667}
668
669static int
670sendit(td, s, mp, flags)
671 struct thread *td;
672 int s;
673 struct msghdr *mp;
674 int flags;
675{
676 struct mbuf *control;
677 struct sockaddr *to;
678 int error;
679
680 if (mp->msg_name != NULL) {
681 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
682 if (error) {
683 to = NULL;
684 goto bad;
685 }
686 mp->msg_name = to;
687 } else {
688 to = NULL;
689 }
690
691 if (mp->msg_control) {
692 if (mp->msg_controllen < sizeof(struct cmsghdr)
693#ifdef COMPAT_OLDSOCK
694 && mp->msg_flags != MSG_COMPAT
695#endif
696 ) {
697 error = EINVAL;
698 goto bad;
699 }
700 error = sockargs(&control, mp->msg_control,
701 mp->msg_controllen, MT_CONTROL);
702 if (error)
703 goto bad;
704#ifdef COMPAT_OLDSOCK
705 if (mp->msg_flags == MSG_COMPAT) {
706 struct cmsghdr *cm;
707
708 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
709 if (control == 0) {
710 error = ENOBUFS;
711 goto bad;
712 } else {
713 cm = mtod(control, struct cmsghdr *);
714 cm->cmsg_len = control->m_len;
715 cm->cmsg_level = SOL_SOCKET;
716 cm->cmsg_type = SCM_RIGHTS;
717 }
708 M_PREPEND(control, sizeof(*cm), M_WAIT);
709 cm = mtod(control, struct cmsghdr *);
710 cm->cmsg_len = control->m_len;
711 cm->cmsg_level = SOL_SOCKET;
712 cm->cmsg_type = SCM_RIGHTS;
718 }
719#endif
720 } else {
721 control = NULL;
722 }
723
724 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
725
726bad:
727 if (to)
728 FREE(to, M_SONAME);
729 return (error);
730}
731
732int
733kern_sendit(td, s, mp, flags, control, segflg)
734 struct thread *td;
735 int s;
736 struct msghdr *mp;
737 int flags;
738 struct mbuf *control;
739 enum uio_seg segflg;
740{
741 struct file *fp;
742 struct uio auio;
743 struct iovec *iov;
744 struct socket *so;
745 int i;
746 int len, error;
747#ifdef KTRACE
748 struct uio *ktruio = NULL;
749#endif
750
751 error = getsock(td->td_proc->p_fd, s, &fp, NULL);
752 if (error)
753 return (error);
754 so = (struct socket *)fp->f_data;
755
756#ifdef MAC
757 SOCK_LOCK(so);
758 error = mac_socket_check_send(td->td_ucred, so);
759 SOCK_UNLOCK(so);
760 if (error)
761 goto bad;
762#endif
763
764 auio.uio_iov = mp->msg_iov;
765 auio.uio_iovcnt = mp->msg_iovlen;
766 auio.uio_segflg = segflg;
767 auio.uio_rw = UIO_WRITE;
768 auio.uio_td = td;
769 auio.uio_offset = 0; /* XXX */
770 auio.uio_resid = 0;
771 iov = mp->msg_iov;
772 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
773 if ((auio.uio_resid += iov->iov_len) < 0) {
774 error = EINVAL;
775 goto bad;
776 }
777 }
778#ifdef KTRACE
779 if (KTRPOINT(td, KTR_GENIO))
780 ktruio = cloneuio(&auio);
781#endif
782 len = auio.uio_resid;
783 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
784 if (error) {
785 if (auio.uio_resid != len && (error == ERESTART ||
786 error == EINTR || error == EWOULDBLOCK))
787 error = 0;
788 /* Generation of SIGPIPE can be controlled per socket */
789 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
790 !(flags & MSG_NOSIGNAL)) {
791 PROC_LOCK(td->td_proc);
792 psignal(td->td_proc, SIGPIPE);
793 PROC_UNLOCK(td->td_proc);
794 }
795 }
796 if (error == 0)
797 td->td_retval[0] = len - auio.uio_resid;
798#ifdef KTRACE
799 if (ktruio != NULL) {
800 ktruio->uio_resid = td->td_retval[0];
801 ktrgenio(s, UIO_WRITE, ktruio, error);
802 }
803#endif
804bad:
805 fdrop(fp, td);
806 return (error);
807}
808
809int
810sendto(td, uap)
811 struct thread *td;
812 struct sendto_args /* {
813 int s;
814 caddr_t buf;
815 size_t len;
816 int flags;
817 caddr_t to;
818 int tolen;
819 } */ *uap;
820{
821 struct msghdr msg;
822 struct iovec aiov;
823 int error;
824
825 msg.msg_name = uap->to;
826 msg.msg_namelen = uap->tolen;
827 msg.msg_iov = &aiov;
828 msg.msg_iovlen = 1;
829 msg.msg_control = 0;
830#ifdef COMPAT_OLDSOCK
831 msg.msg_flags = 0;
832#endif
833 aiov.iov_base = uap->buf;
834 aiov.iov_len = uap->len;
835 error = sendit(td, uap->s, &msg, uap->flags);
836 return (error);
837}
838
839#ifdef COMPAT_OLDSOCK
840int
841osend(td, uap)
842 struct thread *td;
843 struct osend_args /* {
844 int s;
845 caddr_t buf;
846 int len;
847 int flags;
848 } */ *uap;
849{
850 struct msghdr msg;
851 struct iovec aiov;
852 int error;
853
854 msg.msg_name = 0;
855 msg.msg_namelen = 0;
856 msg.msg_iov = &aiov;
857 msg.msg_iovlen = 1;
858 aiov.iov_base = uap->buf;
859 aiov.iov_len = uap->len;
860 msg.msg_control = 0;
861 msg.msg_flags = 0;
862 error = sendit(td, uap->s, &msg, uap->flags);
863 return (error);
864}
865
866int
867osendmsg(td, uap)
868 struct thread *td;
869 struct osendmsg_args /* {
870 int s;
871 caddr_t msg;
872 int flags;
873 } */ *uap;
874{
875 struct msghdr msg;
876 struct iovec *iov;
877 int error;
878
879 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
880 if (error)
881 return (error);
882 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
883 if (error)
884 return (error);
885 msg.msg_iov = iov;
886 msg.msg_flags = MSG_COMPAT;
887 error = sendit(td, uap->s, &msg, uap->flags);
888 free(iov, M_IOV);
889 return (error);
890}
891#endif
892
893int
894sendmsg(td, uap)
895 struct thread *td;
896 struct sendmsg_args /* {
897 int s;
898 caddr_t msg;
899 int flags;
900 } */ *uap;
901{
902 struct msghdr msg;
903 struct iovec *iov;
904 int error;
905
906 error = copyin(uap->msg, &msg, sizeof (msg));
907 if (error)
908 return (error);
909 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
910 if (error)
911 return (error);
912 msg.msg_iov = iov;
913#ifdef COMPAT_OLDSOCK
914 msg.msg_flags = 0;
915#endif
916 error = sendit(td, uap->s, &msg, uap->flags);
917 free(iov, M_IOV);
918 return (error);
919}
920
921int
922kern_recvit(td, s, mp, fromseg, controlp)
923 struct thread *td;
924 int s;
925 struct msghdr *mp;
926 enum uio_seg fromseg;
927 struct mbuf **controlp;
928{
929 struct uio auio;
930 struct iovec *iov;
931 int i;
932 socklen_t len;
933 int error;
934 struct mbuf *m, *control = 0;
935 caddr_t ctlbuf;
936 struct file *fp;
937 struct socket *so;
938 struct sockaddr *fromsa = 0;
939#ifdef KTRACE
940 struct uio *ktruio = NULL;
941#endif
942
943 if(controlp != NULL)
944 *controlp = 0;
945
946 error = getsock(td->td_proc->p_fd, s, &fp, NULL);
947 if (error)
948 return (error);
949 so = fp->f_data;
950
951#ifdef MAC
952 SOCK_LOCK(so);
953 error = mac_socket_check_receive(td->td_ucred, so);
954 SOCK_UNLOCK(so);
955 if (error) {
956 fdrop(fp, td);
957 return (error);
958 }
959#endif
960
961 auio.uio_iov = mp->msg_iov;
962 auio.uio_iovcnt = mp->msg_iovlen;
963 auio.uio_segflg = UIO_USERSPACE;
964 auio.uio_rw = UIO_READ;
965 auio.uio_td = td;
966 auio.uio_offset = 0; /* XXX */
967 auio.uio_resid = 0;
968 iov = mp->msg_iov;
969 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
970 if ((auio.uio_resid += iov->iov_len) < 0) {
971 fdrop(fp, td);
972 return (EINVAL);
973 }
974 }
975#ifdef KTRACE
976 if (KTRPOINT(td, KTR_GENIO))
977 ktruio = cloneuio(&auio);
978#endif
979 len = auio.uio_resid;
980 error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
981 (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
982 &mp->msg_flags);
983 if (error) {
984 if (auio.uio_resid != (int)len && (error == ERESTART ||
985 error == EINTR || error == EWOULDBLOCK))
986 error = 0;
987 }
988#ifdef KTRACE
989 if (ktruio != NULL) {
990 ktruio->uio_resid = (int)len - auio.uio_resid;
991 ktrgenio(s, UIO_READ, ktruio, error);
992 }
993#endif
994 if (error)
995 goto out;
996 td->td_retval[0] = (int)len - auio.uio_resid;
997 if (mp->msg_name) {
998 len = mp->msg_namelen;
999 if (len <= 0 || fromsa == 0)
1000 len = 0;
1001 else {
1002 /* save sa_len before it is destroyed by MSG_COMPAT */
1003 len = MIN(len, fromsa->sa_len);
1004#ifdef COMPAT_OLDSOCK
1005 if (mp->msg_flags & MSG_COMPAT)
1006 ((struct osockaddr *)fromsa)->sa_family =
1007 fromsa->sa_family;
1008#endif
1009 if (fromseg == UIO_USERSPACE) {
1010 error = copyout(fromsa, mp->msg_name,
1011 (unsigned)len);
1012 if (error)
1013 goto out;
1014 } else
1015 bcopy(fromsa, mp->msg_name, len);
1016 }
1017 mp->msg_namelen = len;
1018 }
1019 if (mp->msg_control && controlp == NULL) {
1020#ifdef COMPAT_OLDSOCK
1021 /*
1022 * We assume that old recvmsg calls won't receive access
1023 * rights and other control info, esp. as control info
1024 * is always optional and those options didn't exist in 4.3.
1025 * If we receive rights, trim the cmsghdr; anything else
1026 * is tossed.
1027 */
1028 if (control && mp->msg_flags & MSG_COMPAT) {
1029 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1030 SOL_SOCKET ||
1031 mtod(control, struct cmsghdr *)->cmsg_type !=
1032 SCM_RIGHTS) {
1033 mp->msg_controllen = 0;
1034 goto out;
1035 }
1036 control->m_len -= sizeof (struct cmsghdr);
1037 control->m_data += sizeof (struct cmsghdr);
1038 }
1039#endif
1040 len = mp->msg_controllen;
1041 m = control;
1042 mp->msg_controllen = 0;
1043 ctlbuf = mp->msg_control;
1044
1045 while (m && len > 0) {
1046 unsigned int tocopy;
1047
1048 if (len >= m->m_len)
1049 tocopy = m->m_len;
1050 else {
1051 mp->msg_flags |= MSG_CTRUNC;
1052 tocopy = len;
1053 }
1054
1055 if ((error = copyout(mtod(m, caddr_t),
1056 ctlbuf, tocopy)) != 0)
1057 goto out;
1058
1059 ctlbuf += tocopy;
1060 len -= tocopy;
1061 m = m->m_next;
1062 }
1063 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1064 }
1065out:
1066 fdrop(fp, td);
1067#ifdef KTRACE
1068 if (fromsa && KTRPOINT(td, KTR_STRUCT))
1069 ktrsockaddr(fromsa);
1070#endif
1071 if (fromsa)
1072 FREE(fromsa, M_SONAME);
1073
1074 if (error == 0 && controlp != NULL)
1075 *controlp = control;
1076 else if (control)
1077 m_freem(control);
1078
1079 return (error);
1080}
1081
1082static int
1083recvit(td, s, mp, namelenp)
1084 struct thread *td;
1085 int s;
1086 struct msghdr *mp;
1087 void *namelenp;
1088{
1089 int error;
1090
1091 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1092 if (error)
1093 return (error);
1094 if (namelenp) {
1095 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1096#ifdef COMPAT_OLDSOCK
1097 if (mp->msg_flags & MSG_COMPAT)
1098 error = 0; /* old recvfrom didn't check */
1099#endif
1100 }
1101 return (error);
1102}
1103
1104int
1105recvfrom(td, uap)
1106 struct thread *td;
1107 struct recvfrom_args /* {
1108 int s;
1109 caddr_t buf;
1110 size_t len;
1111 int flags;
1112 struct sockaddr * __restrict from;
1113 socklen_t * __restrict fromlenaddr;
1114 } */ *uap;
1115{
1116 struct msghdr msg;
1117 struct iovec aiov;
1118 int error;
1119
1120 if (uap->fromlenaddr) {
1121 error = copyin(uap->fromlenaddr,
1122 &msg.msg_namelen, sizeof (msg.msg_namelen));
1123 if (error)
1124 goto done2;
1125 } else {
1126 msg.msg_namelen = 0;
1127 }
1128 msg.msg_name = uap->from;
1129 msg.msg_iov = &aiov;
1130 msg.msg_iovlen = 1;
1131 aiov.iov_base = uap->buf;
1132 aiov.iov_len = uap->len;
1133 msg.msg_control = 0;
1134 msg.msg_flags = uap->flags;
1135 error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1136done2:
1137 return(error);
1138}
1139
1140#ifdef COMPAT_OLDSOCK
1141int
1142orecvfrom(td, uap)
1143 struct thread *td;
1144 struct recvfrom_args *uap;
1145{
1146
1147 uap->flags |= MSG_COMPAT;
1148 return (recvfrom(td, uap));
1149}
1150#endif
1151
1152#ifdef COMPAT_OLDSOCK
1153int
1154orecv(td, uap)
1155 struct thread *td;
1156 struct orecv_args /* {
1157 int s;
1158 caddr_t buf;
1159 int len;
1160 int flags;
1161 } */ *uap;
1162{
1163 struct msghdr msg;
1164 struct iovec aiov;
1165 int error;
1166
1167 msg.msg_name = 0;
1168 msg.msg_namelen = 0;
1169 msg.msg_iov = &aiov;
1170 msg.msg_iovlen = 1;
1171 aiov.iov_base = uap->buf;
1172 aiov.iov_len = uap->len;
1173 msg.msg_control = 0;
1174 msg.msg_flags = uap->flags;
1175 error = recvit(td, uap->s, &msg, NULL);
1176 return (error);
1177}
1178
1179/*
1180 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1181 * overlays the new one, missing only the flags, and with the (old) access
1182 * rights where the control fields are now.
1183 */
1184int
1185orecvmsg(td, uap)
1186 struct thread *td;
1187 struct orecvmsg_args /* {
1188 int s;
1189 struct omsghdr *msg;
1190 int flags;
1191 } */ *uap;
1192{
1193 struct msghdr msg;
1194 struct iovec *iov;
1195 int error;
1196
1197 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1198 if (error)
1199 return (error);
1200 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1201 if (error)
1202 return (error);
1203 msg.msg_flags = uap->flags | MSG_COMPAT;
1204 msg.msg_iov = iov;
1205 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1206 if (msg.msg_controllen && error == 0)
1207 error = copyout(&msg.msg_controllen,
1208 &uap->msg->msg_accrightslen, sizeof (int));
1209 free(iov, M_IOV);
1210 return (error);
1211}
1212#endif
1213
1214int
1215recvmsg(td, uap)
1216 struct thread *td;
1217 struct recvmsg_args /* {
1218 int s;
1219 struct msghdr *msg;
1220 int flags;
1221 } */ *uap;
1222{
1223 struct msghdr msg;
1224 struct iovec *uiov, *iov;
1225 int error;
1226
1227 error = copyin(uap->msg, &msg, sizeof (msg));
1228 if (error)
1229 return (error);
1230 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1231 if (error)
1232 return (error);
1233 msg.msg_flags = uap->flags;
1234#ifdef COMPAT_OLDSOCK
1235 msg.msg_flags &= ~MSG_COMPAT;
1236#endif
1237 uiov = msg.msg_iov;
1238 msg.msg_iov = iov;
1239 error = recvit(td, uap->s, &msg, NULL);
1240 if (error == 0) {
1241 msg.msg_iov = uiov;
1242 error = copyout(&msg, uap->msg, sizeof(msg));
1243 }
1244 free(iov, M_IOV);
1245 return (error);
1246}
1247
1248/* ARGSUSED */
1249int
1250shutdown(td, uap)
1251 struct thread *td;
1252 struct shutdown_args /* {
1253 int s;
1254 int how;
1255 } */ *uap;
1256{
1257 struct socket *so;
1258 struct file *fp;
1259 int error;
1260
1261 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
1262 if (error == 0) {
1263 so = fp->f_data;
1264 error = soshutdown(so, uap->how);
1265 fdrop(fp, td);
1266 }
1267 return (error);
1268}
1269
1270/* ARGSUSED */
1271int
1272setsockopt(td, uap)
1273 struct thread *td;
1274 struct setsockopt_args /* {
1275 int s;
1276 int level;
1277 int name;
1278 caddr_t val;
1279 int valsize;
1280 } */ *uap;
1281{
1282
1283 return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1284 uap->val, UIO_USERSPACE, uap->valsize));
1285}
1286
1287int
1288kern_setsockopt(td, s, level, name, val, valseg, valsize)
1289 struct thread *td;
1290 int s;
1291 int level;
1292 int name;
1293 void *val;
1294 enum uio_seg valseg;
1295 socklen_t valsize;
1296{
1297 int error;
1298 struct socket *so;
1299 struct file *fp;
1300 struct sockopt sopt;
1301
1302 if (val == NULL && valsize != 0)
1303 return (EFAULT);
1304 if ((int)valsize < 0)
1305 return (EINVAL);
1306
1307 sopt.sopt_dir = SOPT_SET;
1308 sopt.sopt_level = level;
1309 sopt.sopt_name = name;
1310 sopt.sopt_val = val;
1311 sopt.sopt_valsize = valsize;
1312 switch (valseg) {
1313 case UIO_USERSPACE:
1314 sopt.sopt_td = td;
1315 break;
1316 case UIO_SYSSPACE:
1317 sopt.sopt_td = NULL;
1318 break;
1319 default:
1320 panic("kern_setsockopt called with bad valseg");
1321 }
1322
1323 error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1324 if (error == 0) {
1325 so = fp->f_data;
1326 error = sosetopt(so, &sopt);
1327 fdrop(fp, td);
1328 }
1329 return(error);
1330}
1331
1332/* ARGSUSED */
1333int
1334getsockopt(td, uap)
1335 struct thread *td;
1336 struct getsockopt_args /* {
1337 int s;
1338 int level;
1339 int name;
1340 void * __restrict val;
1341 socklen_t * __restrict avalsize;
1342 } */ *uap;
1343{
1344 socklen_t valsize;
1345 int error;
1346
1347 if (uap->val) {
1348 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1349 if (error)
1350 return (error);
1351 }
1352
1353 error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1354 uap->val, UIO_USERSPACE, &valsize);
1355
1356 if (error == 0)
1357 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1358 return (error);
1359}
1360
1361/*
1362 * Kernel version of getsockopt.
1363 * optval can be a userland or userspace. optlen is always a kernel pointer.
1364 */
1365int
1366kern_getsockopt(td, s, level, name, val, valseg, valsize)
1367 struct thread *td;
1368 int s;
1369 int level;
1370 int name;
1371 void *val;
1372 enum uio_seg valseg;
1373 socklen_t *valsize;
1374{
1375 int error;
1376 struct socket *so;
1377 struct file *fp;
1378 struct sockopt sopt;
1379
1380 if (val == NULL)
1381 *valsize = 0;
1382 if ((int)*valsize < 0)
1383 return (EINVAL);
1384
1385 sopt.sopt_dir = SOPT_GET;
1386 sopt.sopt_level = level;
1387 sopt.sopt_name = name;
1388 sopt.sopt_val = val;
1389 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1390 switch (valseg) {
1391 case UIO_USERSPACE:
1392 sopt.sopt_td = td;
1393 break;
1394 case UIO_SYSSPACE:
1395 sopt.sopt_td = NULL;
1396 break;
1397 default:
1398 panic("kern_getsockopt called with bad valseg");
1399 }
1400
1401 error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1402 if (error == 0) {
1403 so = fp->f_data;
1404 error = sogetopt(so, &sopt);
1405 *valsize = sopt.sopt_valsize;
1406 fdrop(fp, td);
1407 }
1408 return (error);
1409}
1410
1411/*
1412 * getsockname1() - Get socket name.
1413 */
1414/* ARGSUSED */
1415static int
1416getsockname1(td, uap, compat)
1417 struct thread *td;
1418 struct getsockname_args /* {
1419 int fdes;
1420 struct sockaddr * __restrict asa;
1421 socklen_t * __restrict alen;
1422 } */ *uap;
1423 int compat;
1424{
1425 struct sockaddr *sa;
1426 socklen_t len;
1427 int error;
1428
1429 error = copyin(uap->alen, &len, sizeof(len));
1430 if (error)
1431 return (error);
1432
1433 error = kern_getsockname(td, uap->fdes, &sa, &len);
1434 if (error)
1435 return (error);
1436
1437 if (len != 0) {
1438#ifdef COMPAT_OLDSOCK
1439 if (compat)
1440 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1441#endif
1442 error = copyout(sa, uap->asa, (u_int)len);
1443 }
1444 free(sa, M_SONAME);
1445 if (error == 0)
1446 error = copyout(&len, uap->alen, sizeof(len));
1447 return (error);
1448}
1449
1450int
1451kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1452 socklen_t *alen)
1453{
1454 struct socket *so;
1455 struct file *fp;
1456 socklen_t len;
1457 int error;
1458
1459 if (*alen < 0)
1460 return (EINVAL);
1461
1462 error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1463 if (error)
1464 return (error);
1465 so = fp->f_data;
1466 *sa = NULL;
1467 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1468 if (error)
1469 goto bad;
1470 if (*sa == NULL)
1471 len = 0;
1472 else
1473 len = MIN(*alen, (*sa)->sa_len);
1474 *alen = len;
1475#ifdef KTRACE
1476 if (KTRPOINT(td, KTR_STRUCT))
1477 ktrsockaddr(*sa);
1478#endif
1479bad:
1480 fdrop(fp, td);
1481 if (error && *sa) {
1482 free(*sa, M_SONAME);
1483 *sa = NULL;
1484 }
1485 return (error);
1486}
1487
1488int
1489getsockname(td, uap)
1490 struct thread *td;
1491 struct getsockname_args *uap;
1492{
1493
1494 return (getsockname1(td, uap, 0));
1495}
1496
1497#ifdef COMPAT_OLDSOCK
1498int
1499ogetsockname(td, uap)
1500 struct thread *td;
1501 struct getsockname_args *uap;
1502{
1503
1504 return (getsockname1(td, uap, 1));
1505}
1506#endif /* COMPAT_OLDSOCK */
1507
1508/*
1509 * getpeername1() - Get name of peer for connected socket.
1510 */
1511/* ARGSUSED */
1512static int
1513getpeername1(td, uap, compat)
1514 struct thread *td;
1515 struct getpeername_args /* {
1516 int fdes;
1517 struct sockaddr * __restrict asa;
1518 socklen_t * __restrict alen;
1519 } */ *uap;
1520 int compat;
1521{
1522 struct sockaddr *sa;
1523 socklen_t len;
1524 int error;
1525
1526 error = copyin(uap->alen, &len, sizeof (len));
1527 if (error)
1528 return (error);
1529
1530 error = kern_getpeername(td, uap->fdes, &sa, &len);
1531 if (error)
1532 return (error);
1533
1534 if (len != 0) {
1535#ifdef COMPAT_OLDSOCK
1536 if (compat)
1537 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1538#endif
1539 error = copyout(sa, uap->asa, (u_int)len);
1540 }
1541 free(sa, M_SONAME);
1542 if (error == 0)
1543 error = copyout(&len, uap->alen, sizeof(len));
1544 return (error);
1545}
1546
1547int
1548kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1549 socklen_t *alen)
1550{
1551 struct socket *so;
1552 struct file *fp;
1553 socklen_t len;
1554 int error;
1555
1556 if (*alen < 0)
1557 return (EINVAL);
1558
1559 error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1560 if (error)
1561 return (error);
1562 so = fp->f_data;
1563 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1564 error = ENOTCONN;
1565 goto done;
1566 }
1567 *sa = NULL;
1568 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1569 if (error)
1570 goto bad;
1571 if (*sa == NULL)
1572 len = 0;
1573 else
1574 len = MIN(*alen, (*sa)->sa_len);
1575 *alen = len;
1576#ifdef KTRACE
1577 if (KTRPOINT(td, KTR_STRUCT))
1578 ktrsockaddr(*sa);
1579#endif
1580bad:
1581 if (error && *sa) {
1582 free(*sa, M_SONAME);
1583 *sa = NULL;
1584 }
1585done:
1586 fdrop(fp, td);
1587 return (error);
1588}
1589
1590int
1591getpeername(td, uap)
1592 struct thread *td;
1593 struct getpeername_args *uap;
1594{
1595
1596 return (getpeername1(td, uap, 0));
1597}
1598
1599#ifdef COMPAT_OLDSOCK
1600int
1601ogetpeername(td, uap)
1602 struct thread *td;
1603 struct ogetpeername_args *uap;
1604{
1605
1606 /* XXX uap should have type `getpeername_args *' to begin with. */
1607 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1608}
1609#endif /* COMPAT_OLDSOCK */
1610
1611int
1612sockargs(mp, buf, buflen, type)
1613 struct mbuf **mp;
1614 caddr_t buf;
1615 int buflen, type;
1616{
1617 struct sockaddr *sa;
1618 struct mbuf *m;
1619 int error;
1620
1621 if ((u_int)buflen > MLEN) {
1622#ifdef COMPAT_OLDSOCK
1623 if (type == MT_SONAME && (u_int)buflen <= 112)
1624 buflen = MLEN; /* unix domain compat. hack */
1625 else
1626#endif
1627 if ((u_int)buflen > MCLBYTES)
1628 return (EINVAL);
1629 }
713 }
714#endif
715 } else {
716 control = NULL;
717 }
718
719 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
720
721bad:
722 if (to)
723 FREE(to, M_SONAME);
724 return (error);
725}
726
727int
728kern_sendit(td, s, mp, flags, control, segflg)
729 struct thread *td;
730 int s;
731 struct msghdr *mp;
732 int flags;
733 struct mbuf *control;
734 enum uio_seg segflg;
735{
736 struct file *fp;
737 struct uio auio;
738 struct iovec *iov;
739 struct socket *so;
740 int i;
741 int len, error;
742#ifdef KTRACE
743 struct uio *ktruio = NULL;
744#endif
745
746 error = getsock(td->td_proc->p_fd, s, &fp, NULL);
747 if (error)
748 return (error);
749 so = (struct socket *)fp->f_data;
750
751#ifdef MAC
752 SOCK_LOCK(so);
753 error = mac_socket_check_send(td->td_ucred, so);
754 SOCK_UNLOCK(so);
755 if (error)
756 goto bad;
757#endif
758
759 auio.uio_iov = mp->msg_iov;
760 auio.uio_iovcnt = mp->msg_iovlen;
761 auio.uio_segflg = segflg;
762 auio.uio_rw = UIO_WRITE;
763 auio.uio_td = td;
764 auio.uio_offset = 0; /* XXX */
765 auio.uio_resid = 0;
766 iov = mp->msg_iov;
767 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
768 if ((auio.uio_resid += iov->iov_len) < 0) {
769 error = EINVAL;
770 goto bad;
771 }
772 }
773#ifdef KTRACE
774 if (KTRPOINT(td, KTR_GENIO))
775 ktruio = cloneuio(&auio);
776#endif
777 len = auio.uio_resid;
778 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
779 if (error) {
780 if (auio.uio_resid != len && (error == ERESTART ||
781 error == EINTR || error == EWOULDBLOCK))
782 error = 0;
783 /* Generation of SIGPIPE can be controlled per socket */
784 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
785 !(flags & MSG_NOSIGNAL)) {
786 PROC_LOCK(td->td_proc);
787 psignal(td->td_proc, SIGPIPE);
788 PROC_UNLOCK(td->td_proc);
789 }
790 }
791 if (error == 0)
792 td->td_retval[0] = len - auio.uio_resid;
793#ifdef KTRACE
794 if (ktruio != NULL) {
795 ktruio->uio_resid = td->td_retval[0];
796 ktrgenio(s, UIO_WRITE, ktruio, error);
797 }
798#endif
799bad:
800 fdrop(fp, td);
801 return (error);
802}
803
804int
805sendto(td, uap)
806 struct thread *td;
807 struct sendto_args /* {
808 int s;
809 caddr_t buf;
810 size_t len;
811 int flags;
812 caddr_t to;
813 int tolen;
814 } */ *uap;
815{
816 struct msghdr msg;
817 struct iovec aiov;
818 int error;
819
820 msg.msg_name = uap->to;
821 msg.msg_namelen = uap->tolen;
822 msg.msg_iov = &aiov;
823 msg.msg_iovlen = 1;
824 msg.msg_control = 0;
825#ifdef COMPAT_OLDSOCK
826 msg.msg_flags = 0;
827#endif
828 aiov.iov_base = uap->buf;
829 aiov.iov_len = uap->len;
830 error = sendit(td, uap->s, &msg, uap->flags);
831 return (error);
832}
833
834#ifdef COMPAT_OLDSOCK
835int
836osend(td, uap)
837 struct thread *td;
838 struct osend_args /* {
839 int s;
840 caddr_t buf;
841 int len;
842 int flags;
843 } */ *uap;
844{
845 struct msghdr msg;
846 struct iovec aiov;
847 int error;
848
849 msg.msg_name = 0;
850 msg.msg_namelen = 0;
851 msg.msg_iov = &aiov;
852 msg.msg_iovlen = 1;
853 aiov.iov_base = uap->buf;
854 aiov.iov_len = uap->len;
855 msg.msg_control = 0;
856 msg.msg_flags = 0;
857 error = sendit(td, uap->s, &msg, uap->flags);
858 return (error);
859}
860
861int
862osendmsg(td, uap)
863 struct thread *td;
864 struct osendmsg_args /* {
865 int s;
866 caddr_t msg;
867 int flags;
868 } */ *uap;
869{
870 struct msghdr msg;
871 struct iovec *iov;
872 int error;
873
874 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
875 if (error)
876 return (error);
877 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
878 if (error)
879 return (error);
880 msg.msg_iov = iov;
881 msg.msg_flags = MSG_COMPAT;
882 error = sendit(td, uap->s, &msg, uap->flags);
883 free(iov, M_IOV);
884 return (error);
885}
886#endif
887
888int
889sendmsg(td, uap)
890 struct thread *td;
891 struct sendmsg_args /* {
892 int s;
893 caddr_t msg;
894 int flags;
895 } */ *uap;
896{
897 struct msghdr msg;
898 struct iovec *iov;
899 int error;
900
901 error = copyin(uap->msg, &msg, sizeof (msg));
902 if (error)
903 return (error);
904 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
905 if (error)
906 return (error);
907 msg.msg_iov = iov;
908#ifdef COMPAT_OLDSOCK
909 msg.msg_flags = 0;
910#endif
911 error = sendit(td, uap->s, &msg, uap->flags);
912 free(iov, M_IOV);
913 return (error);
914}
915
916int
917kern_recvit(td, s, mp, fromseg, controlp)
918 struct thread *td;
919 int s;
920 struct msghdr *mp;
921 enum uio_seg fromseg;
922 struct mbuf **controlp;
923{
924 struct uio auio;
925 struct iovec *iov;
926 int i;
927 socklen_t len;
928 int error;
929 struct mbuf *m, *control = 0;
930 caddr_t ctlbuf;
931 struct file *fp;
932 struct socket *so;
933 struct sockaddr *fromsa = 0;
934#ifdef KTRACE
935 struct uio *ktruio = NULL;
936#endif
937
938 if(controlp != NULL)
939 *controlp = 0;
940
941 error = getsock(td->td_proc->p_fd, s, &fp, NULL);
942 if (error)
943 return (error);
944 so = fp->f_data;
945
946#ifdef MAC
947 SOCK_LOCK(so);
948 error = mac_socket_check_receive(td->td_ucred, so);
949 SOCK_UNLOCK(so);
950 if (error) {
951 fdrop(fp, td);
952 return (error);
953 }
954#endif
955
956 auio.uio_iov = mp->msg_iov;
957 auio.uio_iovcnt = mp->msg_iovlen;
958 auio.uio_segflg = UIO_USERSPACE;
959 auio.uio_rw = UIO_READ;
960 auio.uio_td = td;
961 auio.uio_offset = 0; /* XXX */
962 auio.uio_resid = 0;
963 iov = mp->msg_iov;
964 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
965 if ((auio.uio_resid += iov->iov_len) < 0) {
966 fdrop(fp, td);
967 return (EINVAL);
968 }
969 }
970#ifdef KTRACE
971 if (KTRPOINT(td, KTR_GENIO))
972 ktruio = cloneuio(&auio);
973#endif
974 len = auio.uio_resid;
975 error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
976 (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
977 &mp->msg_flags);
978 if (error) {
979 if (auio.uio_resid != (int)len && (error == ERESTART ||
980 error == EINTR || error == EWOULDBLOCK))
981 error = 0;
982 }
983#ifdef KTRACE
984 if (ktruio != NULL) {
985 ktruio->uio_resid = (int)len - auio.uio_resid;
986 ktrgenio(s, UIO_READ, ktruio, error);
987 }
988#endif
989 if (error)
990 goto out;
991 td->td_retval[0] = (int)len - auio.uio_resid;
992 if (mp->msg_name) {
993 len = mp->msg_namelen;
994 if (len <= 0 || fromsa == 0)
995 len = 0;
996 else {
997 /* save sa_len before it is destroyed by MSG_COMPAT */
998 len = MIN(len, fromsa->sa_len);
999#ifdef COMPAT_OLDSOCK
1000 if (mp->msg_flags & MSG_COMPAT)
1001 ((struct osockaddr *)fromsa)->sa_family =
1002 fromsa->sa_family;
1003#endif
1004 if (fromseg == UIO_USERSPACE) {
1005 error = copyout(fromsa, mp->msg_name,
1006 (unsigned)len);
1007 if (error)
1008 goto out;
1009 } else
1010 bcopy(fromsa, mp->msg_name, len);
1011 }
1012 mp->msg_namelen = len;
1013 }
1014 if (mp->msg_control && controlp == NULL) {
1015#ifdef COMPAT_OLDSOCK
1016 /*
1017 * We assume that old recvmsg calls won't receive access
1018 * rights and other control info, esp. as control info
1019 * is always optional and those options didn't exist in 4.3.
1020 * If we receive rights, trim the cmsghdr; anything else
1021 * is tossed.
1022 */
1023 if (control && mp->msg_flags & MSG_COMPAT) {
1024 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1025 SOL_SOCKET ||
1026 mtod(control, struct cmsghdr *)->cmsg_type !=
1027 SCM_RIGHTS) {
1028 mp->msg_controllen = 0;
1029 goto out;
1030 }
1031 control->m_len -= sizeof (struct cmsghdr);
1032 control->m_data += sizeof (struct cmsghdr);
1033 }
1034#endif
1035 len = mp->msg_controllen;
1036 m = control;
1037 mp->msg_controllen = 0;
1038 ctlbuf = mp->msg_control;
1039
1040 while (m && len > 0) {
1041 unsigned int tocopy;
1042
1043 if (len >= m->m_len)
1044 tocopy = m->m_len;
1045 else {
1046 mp->msg_flags |= MSG_CTRUNC;
1047 tocopy = len;
1048 }
1049
1050 if ((error = copyout(mtod(m, caddr_t),
1051 ctlbuf, tocopy)) != 0)
1052 goto out;
1053
1054 ctlbuf += tocopy;
1055 len -= tocopy;
1056 m = m->m_next;
1057 }
1058 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1059 }
1060out:
1061 fdrop(fp, td);
1062#ifdef KTRACE
1063 if (fromsa && KTRPOINT(td, KTR_STRUCT))
1064 ktrsockaddr(fromsa);
1065#endif
1066 if (fromsa)
1067 FREE(fromsa, M_SONAME);
1068
1069 if (error == 0 && controlp != NULL)
1070 *controlp = control;
1071 else if (control)
1072 m_freem(control);
1073
1074 return (error);
1075}
1076
1077static int
1078recvit(td, s, mp, namelenp)
1079 struct thread *td;
1080 int s;
1081 struct msghdr *mp;
1082 void *namelenp;
1083{
1084 int error;
1085
1086 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
1087 if (error)
1088 return (error);
1089 if (namelenp) {
1090 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
1091#ifdef COMPAT_OLDSOCK
1092 if (mp->msg_flags & MSG_COMPAT)
1093 error = 0; /* old recvfrom didn't check */
1094#endif
1095 }
1096 return (error);
1097}
1098
1099int
1100recvfrom(td, uap)
1101 struct thread *td;
1102 struct recvfrom_args /* {
1103 int s;
1104 caddr_t buf;
1105 size_t len;
1106 int flags;
1107 struct sockaddr * __restrict from;
1108 socklen_t * __restrict fromlenaddr;
1109 } */ *uap;
1110{
1111 struct msghdr msg;
1112 struct iovec aiov;
1113 int error;
1114
1115 if (uap->fromlenaddr) {
1116 error = copyin(uap->fromlenaddr,
1117 &msg.msg_namelen, sizeof (msg.msg_namelen));
1118 if (error)
1119 goto done2;
1120 } else {
1121 msg.msg_namelen = 0;
1122 }
1123 msg.msg_name = uap->from;
1124 msg.msg_iov = &aiov;
1125 msg.msg_iovlen = 1;
1126 aiov.iov_base = uap->buf;
1127 aiov.iov_len = uap->len;
1128 msg.msg_control = 0;
1129 msg.msg_flags = uap->flags;
1130 error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1131done2:
1132 return(error);
1133}
1134
1135#ifdef COMPAT_OLDSOCK
1136int
1137orecvfrom(td, uap)
1138 struct thread *td;
1139 struct recvfrom_args *uap;
1140{
1141
1142 uap->flags |= MSG_COMPAT;
1143 return (recvfrom(td, uap));
1144}
1145#endif
1146
1147#ifdef COMPAT_OLDSOCK
1148int
1149orecv(td, uap)
1150 struct thread *td;
1151 struct orecv_args /* {
1152 int s;
1153 caddr_t buf;
1154 int len;
1155 int flags;
1156 } */ *uap;
1157{
1158 struct msghdr msg;
1159 struct iovec aiov;
1160 int error;
1161
1162 msg.msg_name = 0;
1163 msg.msg_namelen = 0;
1164 msg.msg_iov = &aiov;
1165 msg.msg_iovlen = 1;
1166 aiov.iov_base = uap->buf;
1167 aiov.iov_len = uap->len;
1168 msg.msg_control = 0;
1169 msg.msg_flags = uap->flags;
1170 error = recvit(td, uap->s, &msg, NULL);
1171 return (error);
1172}
1173
1174/*
1175 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1176 * overlays the new one, missing only the flags, and with the (old) access
1177 * rights where the control fields are now.
1178 */
1179int
1180orecvmsg(td, uap)
1181 struct thread *td;
1182 struct orecvmsg_args /* {
1183 int s;
1184 struct omsghdr *msg;
1185 int flags;
1186 } */ *uap;
1187{
1188 struct msghdr msg;
1189 struct iovec *iov;
1190 int error;
1191
1192 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1193 if (error)
1194 return (error);
1195 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1196 if (error)
1197 return (error);
1198 msg.msg_flags = uap->flags | MSG_COMPAT;
1199 msg.msg_iov = iov;
1200 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1201 if (msg.msg_controllen && error == 0)
1202 error = copyout(&msg.msg_controllen,
1203 &uap->msg->msg_accrightslen, sizeof (int));
1204 free(iov, M_IOV);
1205 return (error);
1206}
1207#endif
1208
1209int
1210recvmsg(td, uap)
1211 struct thread *td;
1212 struct recvmsg_args /* {
1213 int s;
1214 struct msghdr *msg;
1215 int flags;
1216 } */ *uap;
1217{
1218 struct msghdr msg;
1219 struct iovec *uiov, *iov;
1220 int error;
1221
1222 error = copyin(uap->msg, &msg, sizeof (msg));
1223 if (error)
1224 return (error);
1225 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1226 if (error)
1227 return (error);
1228 msg.msg_flags = uap->flags;
1229#ifdef COMPAT_OLDSOCK
1230 msg.msg_flags &= ~MSG_COMPAT;
1231#endif
1232 uiov = msg.msg_iov;
1233 msg.msg_iov = iov;
1234 error = recvit(td, uap->s, &msg, NULL);
1235 if (error == 0) {
1236 msg.msg_iov = uiov;
1237 error = copyout(&msg, uap->msg, sizeof(msg));
1238 }
1239 free(iov, M_IOV);
1240 return (error);
1241}
1242
1243/* ARGSUSED */
1244int
1245shutdown(td, uap)
1246 struct thread *td;
1247 struct shutdown_args /* {
1248 int s;
1249 int how;
1250 } */ *uap;
1251{
1252 struct socket *so;
1253 struct file *fp;
1254 int error;
1255
1256 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
1257 if (error == 0) {
1258 so = fp->f_data;
1259 error = soshutdown(so, uap->how);
1260 fdrop(fp, td);
1261 }
1262 return (error);
1263}
1264
1265/* ARGSUSED */
1266int
1267setsockopt(td, uap)
1268 struct thread *td;
1269 struct setsockopt_args /* {
1270 int s;
1271 int level;
1272 int name;
1273 caddr_t val;
1274 int valsize;
1275 } */ *uap;
1276{
1277
1278 return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1279 uap->val, UIO_USERSPACE, uap->valsize));
1280}
1281
1282int
1283kern_setsockopt(td, s, level, name, val, valseg, valsize)
1284 struct thread *td;
1285 int s;
1286 int level;
1287 int name;
1288 void *val;
1289 enum uio_seg valseg;
1290 socklen_t valsize;
1291{
1292 int error;
1293 struct socket *so;
1294 struct file *fp;
1295 struct sockopt sopt;
1296
1297 if (val == NULL && valsize != 0)
1298 return (EFAULT);
1299 if ((int)valsize < 0)
1300 return (EINVAL);
1301
1302 sopt.sopt_dir = SOPT_SET;
1303 sopt.sopt_level = level;
1304 sopt.sopt_name = name;
1305 sopt.sopt_val = val;
1306 sopt.sopt_valsize = valsize;
1307 switch (valseg) {
1308 case UIO_USERSPACE:
1309 sopt.sopt_td = td;
1310 break;
1311 case UIO_SYSSPACE:
1312 sopt.sopt_td = NULL;
1313 break;
1314 default:
1315 panic("kern_setsockopt called with bad valseg");
1316 }
1317
1318 error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1319 if (error == 0) {
1320 so = fp->f_data;
1321 error = sosetopt(so, &sopt);
1322 fdrop(fp, td);
1323 }
1324 return(error);
1325}
1326
1327/* ARGSUSED */
1328int
1329getsockopt(td, uap)
1330 struct thread *td;
1331 struct getsockopt_args /* {
1332 int s;
1333 int level;
1334 int name;
1335 void * __restrict val;
1336 socklen_t * __restrict avalsize;
1337 } */ *uap;
1338{
1339 socklen_t valsize;
1340 int error;
1341
1342 if (uap->val) {
1343 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1344 if (error)
1345 return (error);
1346 }
1347
1348 error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1349 uap->val, UIO_USERSPACE, &valsize);
1350
1351 if (error == 0)
1352 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1353 return (error);
1354}
1355
1356/*
1357 * Kernel version of getsockopt.
1358 * optval can be a userland or userspace. optlen is always a kernel pointer.
1359 */
1360int
1361kern_getsockopt(td, s, level, name, val, valseg, valsize)
1362 struct thread *td;
1363 int s;
1364 int level;
1365 int name;
1366 void *val;
1367 enum uio_seg valseg;
1368 socklen_t *valsize;
1369{
1370 int error;
1371 struct socket *so;
1372 struct file *fp;
1373 struct sockopt sopt;
1374
1375 if (val == NULL)
1376 *valsize = 0;
1377 if ((int)*valsize < 0)
1378 return (EINVAL);
1379
1380 sopt.sopt_dir = SOPT_GET;
1381 sopt.sopt_level = level;
1382 sopt.sopt_name = name;
1383 sopt.sopt_val = val;
1384 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1385 switch (valseg) {
1386 case UIO_USERSPACE:
1387 sopt.sopt_td = td;
1388 break;
1389 case UIO_SYSSPACE:
1390 sopt.sopt_td = NULL;
1391 break;
1392 default:
1393 panic("kern_getsockopt called with bad valseg");
1394 }
1395
1396 error = getsock(td->td_proc->p_fd, s, &fp, NULL);
1397 if (error == 0) {
1398 so = fp->f_data;
1399 error = sogetopt(so, &sopt);
1400 *valsize = sopt.sopt_valsize;
1401 fdrop(fp, td);
1402 }
1403 return (error);
1404}
1405
1406/*
1407 * getsockname1() - Get socket name.
1408 */
1409/* ARGSUSED */
1410static int
1411getsockname1(td, uap, compat)
1412 struct thread *td;
1413 struct getsockname_args /* {
1414 int fdes;
1415 struct sockaddr * __restrict asa;
1416 socklen_t * __restrict alen;
1417 } */ *uap;
1418 int compat;
1419{
1420 struct sockaddr *sa;
1421 socklen_t len;
1422 int error;
1423
1424 error = copyin(uap->alen, &len, sizeof(len));
1425 if (error)
1426 return (error);
1427
1428 error = kern_getsockname(td, uap->fdes, &sa, &len);
1429 if (error)
1430 return (error);
1431
1432 if (len != 0) {
1433#ifdef COMPAT_OLDSOCK
1434 if (compat)
1435 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1436#endif
1437 error = copyout(sa, uap->asa, (u_int)len);
1438 }
1439 free(sa, M_SONAME);
1440 if (error == 0)
1441 error = copyout(&len, uap->alen, sizeof(len));
1442 return (error);
1443}
1444
1445int
1446kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
1447 socklen_t *alen)
1448{
1449 struct socket *so;
1450 struct file *fp;
1451 socklen_t len;
1452 int error;
1453
1454 if (*alen < 0)
1455 return (EINVAL);
1456
1457 error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1458 if (error)
1459 return (error);
1460 so = fp->f_data;
1461 *sa = NULL;
1462 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
1463 if (error)
1464 goto bad;
1465 if (*sa == NULL)
1466 len = 0;
1467 else
1468 len = MIN(*alen, (*sa)->sa_len);
1469 *alen = len;
1470#ifdef KTRACE
1471 if (KTRPOINT(td, KTR_STRUCT))
1472 ktrsockaddr(*sa);
1473#endif
1474bad:
1475 fdrop(fp, td);
1476 if (error && *sa) {
1477 free(*sa, M_SONAME);
1478 *sa = NULL;
1479 }
1480 return (error);
1481}
1482
1483int
1484getsockname(td, uap)
1485 struct thread *td;
1486 struct getsockname_args *uap;
1487{
1488
1489 return (getsockname1(td, uap, 0));
1490}
1491
1492#ifdef COMPAT_OLDSOCK
1493int
1494ogetsockname(td, uap)
1495 struct thread *td;
1496 struct getsockname_args *uap;
1497{
1498
1499 return (getsockname1(td, uap, 1));
1500}
1501#endif /* COMPAT_OLDSOCK */
1502
1503/*
1504 * getpeername1() - Get name of peer for connected socket.
1505 */
1506/* ARGSUSED */
1507static int
1508getpeername1(td, uap, compat)
1509 struct thread *td;
1510 struct getpeername_args /* {
1511 int fdes;
1512 struct sockaddr * __restrict asa;
1513 socklen_t * __restrict alen;
1514 } */ *uap;
1515 int compat;
1516{
1517 struct sockaddr *sa;
1518 socklen_t len;
1519 int error;
1520
1521 error = copyin(uap->alen, &len, sizeof (len));
1522 if (error)
1523 return (error);
1524
1525 error = kern_getpeername(td, uap->fdes, &sa, &len);
1526 if (error)
1527 return (error);
1528
1529 if (len != 0) {
1530#ifdef COMPAT_OLDSOCK
1531 if (compat)
1532 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1533#endif
1534 error = copyout(sa, uap->asa, (u_int)len);
1535 }
1536 free(sa, M_SONAME);
1537 if (error == 0)
1538 error = copyout(&len, uap->alen, sizeof(len));
1539 return (error);
1540}
1541
1542int
1543kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
1544 socklen_t *alen)
1545{
1546 struct socket *so;
1547 struct file *fp;
1548 socklen_t len;
1549 int error;
1550
1551 if (*alen < 0)
1552 return (EINVAL);
1553
1554 error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
1555 if (error)
1556 return (error);
1557 so = fp->f_data;
1558 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1559 error = ENOTCONN;
1560 goto done;
1561 }
1562 *sa = NULL;
1563 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
1564 if (error)
1565 goto bad;
1566 if (*sa == NULL)
1567 len = 0;
1568 else
1569 len = MIN(*alen, (*sa)->sa_len);
1570 *alen = len;
1571#ifdef KTRACE
1572 if (KTRPOINT(td, KTR_STRUCT))
1573 ktrsockaddr(*sa);
1574#endif
1575bad:
1576 if (error && *sa) {
1577 free(*sa, M_SONAME);
1578 *sa = NULL;
1579 }
1580done:
1581 fdrop(fp, td);
1582 return (error);
1583}
1584
1585int
1586getpeername(td, uap)
1587 struct thread *td;
1588 struct getpeername_args *uap;
1589{
1590
1591 return (getpeername1(td, uap, 0));
1592}
1593
1594#ifdef COMPAT_OLDSOCK
1595int
1596ogetpeername(td, uap)
1597 struct thread *td;
1598 struct ogetpeername_args *uap;
1599{
1600
1601 /* XXX uap should have type `getpeername_args *' to begin with. */
1602 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1603}
1604#endif /* COMPAT_OLDSOCK */
1605
1606int
1607sockargs(mp, buf, buflen, type)
1608 struct mbuf **mp;
1609 caddr_t buf;
1610 int buflen, type;
1611{
1612 struct sockaddr *sa;
1613 struct mbuf *m;
1614 int error;
1615
1616 if ((u_int)buflen > MLEN) {
1617#ifdef COMPAT_OLDSOCK
1618 if (type == MT_SONAME && (u_int)buflen <= 112)
1619 buflen = MLEN; /* unix domain compat. hack */
1620 else
1621#endif
1622 if ((u_int)buflen > MCLBYTES)
1623 return (EINVAL);
1624 }
1630 m = m_get(M_TRYWAIT, type);
1631 if (m == NULL)
1632 return (ENOBUFS);
1633 if ((u_int)buflen > MLEN) {
1634 MCLGET(m, M_TRYWAIT);
1635 if ((m->m_flags & M_EXT) == 0) {
1636 m_free(m);
1637 return (ENOBUFS);
1638 }
1639 }
1625 m = m_get(M_WAIT, type);
1626 if ((u_int)buflen > MLEN)
1627 MCLGET(m, M_WAIT);
1640 m->m_len = buflen;
1641 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1642 if (error)
1643 (void) m_free(m);
1644 else {
1645 *mp = m;
1646 if (type == MT_SONAME) {
1647 sa = mtod(m, struct sockaddr *);
1648
1649#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1650 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1651 sa->sa_family = sa->sa_len;
1652#endif
1653 sa->sa_len = buflen;
1654 }
1655 }
1656 return (error);
1657}
1658
1659int
1660getsockaddr(namp, uaddr, len)
1661 struct sockaddr **namp;
1662 caddr_t uaddr;
1663 size_t len;
1664{
1665 struct sockaddr *sa;
1666 int error;
1667
1668 if (len > SOCK_MAXADDRLEN)
1669 return (ENAMETOOLONG);
1670 if (len < offsetof(struct sockaddr, sa_data[0]))
1671 return (EINVAL);
1672 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1673 error = copyin(uaddr, sa, len);
1674 if (error) {
1675 FREE(sa, M_SONAME);
1676 } else {
1677#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1678 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1679 sa->sa_family = sa->sa_len;
1680#endif
1681 sa->sa_len = len;
1682 *namp = sa;
1683 }
1684 return (error);
1685}
1686
1687#include <sys/condvar.h>
1688
1689struct sendfile_sync {
1690 struct mtx mtx;
1691 struct cv cv;
1692 unsigned count;
1693};
1694
1695/*
1696 * Detach mapped page and release resources back to the system.
1697 */
1698void
1699sf_buf_mext(void *addr, void *args)
1700{
1701 vm_page_t m;
1702 struct sendfile_sync *sfs;
1703
1704 m = sf_buf_page(args);
1705 sf_buf_free(args);
1706 vm_page_lock_queues();
1707 vm_page_unwire(m, 0);
1708 /*
1709 * Check for the object going away on us. This can
1710 * happen since we don't hold a reference to it.
1711 * If so, we're responsible for freeing the page.
1712 */
1713 if (m->wire_count == 0 && m->object == NULL)
1714 vm_page_free(m);
1715 vm_page_unlock_queues();
1716 if (addr == NULL)
1717 return;
1718 sfs = addr;
1719 mtx_lock(&sfs->mtx);
1720 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
1721 if (--sfs->count == 0)
1722 cv_signal(&sfs->cv);
1723 mtx_unlock(&sfs->mtx);
1724}
1725
1726/*
1727 * sendfile(2)
1728 *
1729 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1730 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1731 *
1732 * Send a file specified by 'fd' and starting at 'offset' to a socket
1733 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
1734 * 0. Optionally add a header and/or trailer to the socket output. If
1735 * specified, write the total number of bytes sent into *sbytes.
1736 */
1737int
1738sendfile(struct thread *td, struct sendfile_args *uap)
1739{
1740
1741 return (do_sendfile(td, uap, 0));
1742}
1743
1744static int
1745do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1746{
1747 struct sf_hdtr hdtr;
1748 struct uio *hdr_uio, *trl_uio;
1749 int error;
1750
1751 hdr_uio = trl_uio = NULL;
1752
1753 if (uap->hdtr != NULL) {
1754 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1755 if (error)
1756 goto out;
1757 if (hdtr.headers != NULL) {
1758 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1759 if (error)
1760 goto out;
1761 }
1762 if (hdtr.trailers != NULL) {
1763 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1764 if (error)
1765 goto out;
1766
1767 }
1768 }
1769
1770 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1771out:
1772 if (hdr_uio)
1773 free(hdr_uio, M_IOV);
1774 if (trl_uio)
1775 free(trl_uio, M_IOV);
1776 return (error);
1777}
1778
1779#ifdef COMPAT_FREEBSD4
1780int
1781freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1782{
1783 struct sendfile_args args;
1784
1785 args.fd = uap->fd;
1786 args.s = uap->s;
1787 args.offset = uap->offset;
1788 args.nbytes = uap->nbytes;
1789 args.hdtr = uap->hdtr;
1790 args.sbytes = uap->sbytes;
1791 args.flags = uap->flags;
1792
1793 return (do_sendfile(td, &args, 1));
1794}
1795#endif /* COMPAT_FREEBSD4 */
1796
1797int
1798kern_sendfile(struct thread *td, struct sendfile_args *uap,
1799 struct uio *hdr_uio, struct uio *trl_uio, int compat)
1800{
1801 struct file *sock_fp;
1802 struct vnode *vp;
1803 struct vm_object *obj = NULL;
1804 struct socket *so = NULL;
1805 struct mbuf *m = NULL;
1806 struct sf_buf *sf;
1807 struct vm_page *pg;
1808 off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
1809 int error, hdrlen = 0, mnw = 0;
1810 int vfslocked;
1811 struct sendfile_sync *sfs = NULL;
1812
1813 /*
1814 * The file descriptor must be a regular file and have a
1815 * backing VM object.
1816 * File offset must be positive. If it goes beyond EOF
1817 * we send only the header/trailer and no payload data.
1818 */
1819 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1820 goto out;
1821 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1822 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1823 if (vp->v_type == VREG) {
1824 obj = vp->v_object;
1825 if (obj != NULL) {
1826 /*
1827 * Temporarily increase the backing VM
1828 * object's reference count so that a forced
1829 * reclamation of its vnode does not
1830 * immediately destroy it.
1831 */
1832 VM_OBJECT_LOCK(obj);
1833 if ((obj->flags & OBJ_DEAD) == 0) {
1834 vm_object_reference_locked(obj);
1835 VM_OBJECT_UNLOCK(obj);
1836 } else {
1837 VM_OBJECT_UNLOCK(obj);
1838 obj = NULL;
1839 }
1840 }
1841 }
1842 VOP_UNLOCK(vp, 0);
1843 VFS_UNLOCK_GIANT(vfslocked);
1844 if (obj == NULL) {
1845 error = EINVAL;
1846 goto out;
1847 }
1848 if (uap->offset < 0) {
1849 error = EINVAL;
1850 goto out;
1851 }
1852
1853 /*
1854 * The socket must be a stream socket and connected.
1855 * Remember if it a blocking or non-blocking socket.
1856 */
1857 if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
1858 NULL)) != 0)
1859 goto out;
1860 so = sock_fp->f_data;
1861 if (so->so_type != SOCK_STREAM) {
1862 error = EINVAL;
1863 goto out;
1864 }
1865 if ((so->so_state & SS_ISCONNECTED) == 0) {
1866 error = ENOTCONN;
1867 goto out;
1868 }
1869 /*
1870 * Do not wait on memory allocations but return ENOMEM for
1871 * caller to retry later.
1872 * XXX: Experimental.
1873 */
1874 if (uap->flags & SF_MNOWAIT)
1875 mnw = 1;
1876
1877 if (uap->flags & SF_SYNC) {
1878 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK);
1879 memset(sfs, 0, sizeof *sfs);
1880 mtx_init(&sfs->mtx, "sendfile", MTX_DEF, 0);
1881 cv_init(&sfs->cv, "sendfile");
1882 }
1883
1884#ifdef MAC
1885 SOCK_LOCK(so);
1886 error = mac_socket_check_send(td->td_ucred, so);
1887 SOCK_UNLOCK(so);
1888 if (error)
1889 goto out;
1890#endif
1891
1892 /* If headers are specified copy them into mbufs. */
1893 if (hdr_uio != NULL) {
1894 hdr_uio->uio_td = td;
1895 hdr_uio->uio_rw = UIO_WRITE;
1896 if (hdr_uio->uio_resid > 0) {
1897 /*
1898 * In FBSD < 5.0 the nbytes to send also included
1899 * the header. If compat is specified subtract the
1900 * header size from nbytes.
1901 */
1902 if (compat) {
1903 if (uap->nbytes > hdr_uio->uio_resid)
1904 uap->nbytes -= hdr_uio->uio_resid;
1905 else
1906 uap->nbytes = 0;
1907 }
1908 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
1909 0, 0, 0);
1910 if (m == NULL) {
1911 error = mnw ? EAGAIN : ENOBUFS;
1912 goto out;
1913 }
1914 hdrlen = m_length(m, NULL);
1915 }
1916 }
1917
1918 /*
1919 * Protect against multiple writers to the socket.
1920 *
1921 * XXXRW: Historically this has assumed non-interruptibility, so now
1922 * we implement that, but possibly shouldn't.
1923 */
1924 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
1925
1926 /*
1927 * Loop through the pages of the file, starting with the requested
1928 * offset. Get a file page (do I/O if necessary), map the file page
1929 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1930 * it on the socket.
1931 * This is done in two loops. The inner loop turns as many pages
1932 * as it can, up to available socket buffer space, without blocking
1933 * into mbufs to have it bulk delivered into the socket send buffer.
1934 * The outer loop checks the state and available space of the socket
1935 * and takes care of the overall progress.
1936 */
1937 for (off = uap->offset, rem = uap->nbytes; ; ) {
1938 int loopbytes = 0;
1939 int space = 0;
1940 int done = 0;
1941
1942 /*
1943 * Check the socket state for ongoing connection,
1944 * no errors and space in socket buffer.
1945 * If space is low allow for the remainder of the
1946 * file to be processed if it fits the socket buffer.
1947 * Otherwise block in waiting for sufficient space
1948 * to proceed, or if the socket is nonblocking, return
1949 * to userland with EAGAIN while reporting how far
1950 * we've come.
1951 * We wait until the socket buffer has significant free
1952 * space to do bulk sends. This makes good use of file
1953 * system read ahead and allows packet segmentation
1954 * offloading hardware to take over lots of work. If
1955 * we were not careful here we would send off only one
1956 * sfbuf at a time.
1957 */
1958 SOCKBUF_LOCK(&so->so_snd);
1959 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
1960 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
1961retry_space:
1962 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1963 error = EPIPE;
1964 SOCKBUF_UNLOCK(&so->so_snd);
1965 goto done;
1966 } else if (so->so_error) {
1967 error = so->so_error;
1968 so->so_error = 0;
1969 SOCKBUF_UNLOCK(&so->so_snd);
1970 goto done;
1971 }
1972 space = sbspace(&so->so_snd);
1973 if (space < rem &&
1974 (space <= 0 ||
1975 space < so->so_snd.sb_lowat)) {
1976 if (so->so_state & SS_NBIO) {
1977 SOCKBUF_UNLOCK(&so->so_snd);
1978 error = EAGAIN;
1979 goto done;
1980 }
1981 /*
1982 * sbwait drops the lock while sleeping.
1983 * When we loop back to retry_space the
1984 * state may have changed and we retest
1985 * for it.
1986 */
1987 error = sbwait(&so->so_snd);
1988 /*
1989 * An error from sbwait usually indicates that we've
1990 * been interrupted by a signal. If we've sent anything
1991 * then return bytes sent, otherwise return the error.
1992 */
1993 if (error) {
1994 SOCKBUF_UNLOCK(&so->so_snd);
1995 goto done;
1996 }
1997 goto retry_space;
1998 }
1999 SOCKBUF_UNLOCK(&so->so_snd);
2000
2001 /*
2002 * Reduce space in the socket buffer by the size of
2003 * the header mbuf chain.
2004 * hdrlen is set to 0 after the first loop.
2005 */
2006 space -= hdrlen;
2007
2008 /*
2009 * Loop and construct maximum sized mbuf chain to be bulk
2010 * dumped into socket buffer.
2011 */
2012 while(space > loopbytes) {
2013 vm_pindex_t pindex;
2014 vm_offset_t pgoff;
2015 struct mbuf *m0;
2016
2017 VM_OBJECT_LOCK(obj);
2018 /*
2019 * Calculate the amount to transfer.
2020 * Not to exceed a page, the EOF,
2021 * or the passed in nbytes.
2022 */
2023 pgoff = (vm_offset_t)(off & PAGE_MASK);
2024 xfsize = omin(PAGE_SIZE - pgoff,
2025 obj->un_pager.vnp.vnp_size - uap->offset -
2026 fsbytes - loopbytes);
2027 if (uap->nbytes)
2028 rem = (uap->nbytes - fsbytes - loopbytes);
2029 else
2030 rem = obj->un_pager.vnp.vnp_size -
2031 uap->offset - fsbytes - loopbytes;
2032 xfsize = omin(rem, xfsize);
2033 if (xfsize <= 0) {
2034 VM_OBJECT_UNLOCK(obj);
2035 done = 1; /* all data sent */
2036 break;
2037 }
2038 /*
2039 * Don't overflow the send buffer.
2040 * Stop here and send out what we've
2041 * already got.
2042 */
2043 if (space < loopbytes + xfsize) {
2044 VM_OBJECT_UNLOCK(obj);
2045 break;
2046 }
2047
2048 /*
2049 * Attempt to look up the page. Allocate
2050 * if not found or wait and loop if busy.
2051 */
2052 pindex = OFF_TO_IDX(off);
2053 pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
2054 VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
2055
2056 /*
2057 * Check if page is valid for what we need,
2058 * otherwise initiate I/O.
2059 * If we already turned some pages into mbufs,
2060 * send them off before we come here again and
2061 * block.
2062 */
2063 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2064 VM_OBJECT_UNLOCK(obj);
2065 else if (m != NULL)
2066 error = EAGAIN; /* send what we already got */
2067 else if (uap->flags & SF_NODISKIO)
2068 error = EBUSY;
2069 else {
2070 int bsize, resid;
2071
2072 /*
2073 * Ensure that our page is still around
2074 * when the I/O completes.
2075 */
2076 vm_page_io_start(pg);
2077 VM_OBJECT_UNLOCK(obj);
2078
2079 /*
2080 * Get the page from backing store.
2081 */
2082 bsize = vp->v_mount->mnt_stat.f_iosize;
2083 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2084 vn_lock(vp, LK_SHARED | LK_RETRY);
2085
2086 /*
2087 * XXXMAC: Because we don't have fp->f_cred
2088 * here, we pass in NOCRED. This is probably
2089 * wrong, but is consistent with our original
2090 * implementation.
2091 */
2092 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2093 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2094 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2095 td->td_ucred, NOCRED, &resid, td);
2096 VOP_UNLOCK(vp, 0);
2097 VFS_UNLOCK_GIANT(vfslocked);
2098 VM_OBJECT_LOCK(obj);
2099 vm_page_io_finish(pg);
2100 if (!error)
2101 VM_OBJECT_UNLOCK(obj);
2102 mbstat.sf_iocnt++;
2103 }
2104 if (error) {
2105 vm_page_lock_queues();
2106 vm_page_unwire(pg, 0);
2107 /*
2108 * See if anyone else might know about
2109 * this page. If not and it is not valid,
2110 * then free it.
2111 */
2112 if (pg->wire_count == 0 && pg->valid == 0 &&
2113 pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
2114 pg->hold_count == 0) {
2115 vm_page_free(pg);
2116 }
2117 vm_page_unlock_queues();
2118 VM_OBJECT_UNLOCK(obj);
2119 if (error == EAGAIN)
2120 error = 0; /* not a real error */
2121 break;
2122 }
2123
2124 /*
2125 * Get a sendfile buf. We usually wait as long
2126 * as necessary, but this wait can be interrupted.
2127 */
2128 if ((sf = sf_buf_alloc(pg,
2129 (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
2130 mbstat.sf_allocfail++;
2131 vm_page_lock_queues();
2132 vm_page_unwire(pg, 0);
2133 /*
2134 * XXX: Not same check as above!?
2135 */
2136 if (pg->wire_count == 0 && pg->object == NULL)
2137 vm_page_free(pg);
2138 vm_page_unlock_queues();
2139 error = (mnw ? EAGAIN : EINTR);
2140 break;
2141 }
2142
2143 /*
2144 * Get an mbuf and set it up as having
2145 * external storage.
2146 */
2147 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
2148 if (m0 == NULL) {
2149 error = (mnw ? EAGAIN : ENOBUFS);
2150 sf_buf_mext((void *)sf_buf_kva(sf), sf);
2151 break;
2152 }
2153 MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
2154 sfs, sf, M_RDONLY, EXT_SFBUF);
2155 m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
2156 m0->m_len = xfsize;
2157
2158 /* Append to mbuf chain. */
2159 if (m != NULL)
2160 m_cat(m, m0);
2161 else
2162 m = m0;
2163
2164 /* Keep track of bits processed. */
2165 loopbytes += xfsize;
2166 off += xfsize;
2167
2168 if (sfs != NULL) {
2169 mtx_lock(&sfs->mtx);
2170 sfs->count++;
2171 mtx_unlock(&sfs->mtx);
2172 }
2173 }
2174
2175 /* Add the buffer chain to the socket buffer. */
2176 if (m != NULL) {
2177 int mlen, err;
2178
2179 mlen = m_length(m, NULL);
2180 SOCKBUF_LOCK(&so->so_snd);
2181 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2182 error = EPIPE;
2183 SOCKBUF_UNLOCK(&so->so_snd);
2184 goto done;
2185 }
2186 SOCKBUF_UNLOCK(&so->so_snd);
2187 /* Avoid error aliasing. */
2188 err = (*so->so_proto->pr_usrreqs->pru_send)
2189 (so, 0, m, NULL, NULL, td);
2190 if (err == 0) {
2191 /*
2192 * We need two counters to get the
2193 * file offset and nbytes to send
2194 * right:
2195 * - sbytes contains the total amount
2196 * of bytes sent, including headers.
2197 * - fsbytes contains the total amount
2198 * of bytes sent from the file.
2199 */
2200 sbytes += mlen;
2201 fsbytes += mlen;
2202 if (hdrlen) {
2203 fsbytes -= hdrlen;
2204 hdrlen = 0;
2205 }
2206 } else if (error == 0)
2207 error = err;
2208 m = NULL; /* pru_send always consumes */
2209 }
2210
2211 /* Quit outer loop on error or when we're done. */
2212 if (done)
2213 break;
2214 if (error)
2215 goto done;
2216 }
2217
2218 /*
2219 * Send trailers. Wimp out and use writev(2).
2220 */
2221 if (trl_uio != NULL) {
2222 error = kern_writev(td, uap->s, trl_uio);
2223 if (error)
2224 goto done;
2225 sbytes += td->td_retval[0];
2226 }
2227
2228done:
2229 sbunlock(&so->so_snd);
2230out:
2231 /*
2232 * If there was no error we have to clear td->td_retval[0]
2233 * because it may have been set by writev.
2234 */
2235 if (error == 0) {
2236 td->td_retval[0] = 0;
2237 }
2238 if (uap->sbytes != NULL) {
2239 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2240 }
2241 if (obj != NULL)
2242 vm_object_deallocate(obj);
2243 if (vp != NULL) {
2244 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2245 vrele(vp);
2246 VFS_UNLOCK_GIANT(vfslocked);
2247 }
2248 if (so)
2249 fdrop(sock_fp, td);
2250 if (m)
2251 m_freem(m);
2252
2253 if (sfs != NULL) {
2254 mtx_lock(&sfs->mtx);
2255 if (sfs->count != 0)
2256 cv_wait(&sfs->cv, &sfs->mtx);
2257 KASSERT(sfs->count == 0, ("sendfile sync still busy"));
2258 cv_destroy(&sfs->cv);
2259 mtx_destroy(&sfs->mtx);
2260 free(sfs, M_TEMP);
2261 }
2262
2263 if (error == ERESTART)
2264 error = EINTR;
2265
2266 return (error);
2267}
2268
2269/*
2270 * SCTP syscalls.
2271 * Functionality only compiled in if SCTP is defined in the kernel Makefile,
2272 * otherwise all return EOPNOTSUPP.
2273 * XXX: We should make this loadable one day.
2274 */
2275int
2276sctp_peeloff(td, uap)
2277 struct thread *td;
2278 struct sctp_peeloff_args /* {
2279 int sd;
2280 caddr_t name;
2281 } */ *uap;
2282{
2283#ifdef SCTP
2284 struct filedesc *fdp;
2285 struct file *nfp = NULL;
2286 int error;
2287 struct socket *head, *so;
2288 int fd;
2289 u_int fflag;
2290
2291 fdp = td->td_proc->p_fd;
2292 error = fgetsock(td, uap->sd, &head, &fflag);
2293 if (error)
2294 goto done2;
2295 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
2296 if (error)
2297 goto done2;
2298 /*
2299 * At this point we know we do have a assoc to pull
2300 * we proceed to get the fd setup. This may block
2301 * but that is ok.
2302 */
2303
2304 error = falloc(td, &nfp, &fd);
2305 if (error)
2306 goto done;
2307 td->td_retval[0] = fd;
2308
2309 so = sonewconn(head, SS_ISCONNECTED);
2310 if (so == NULL)
2311 goto noconnection;
2312 /*
2313 * Before changing the flags on the socket, we have to bump the
2314 * reference count. Otherwise, if the protocol calls sofree(),
2315 * the socket will be released due to a zero refcount.
2316 */
2317 SOCK_LOCK(so);
2318 soref(so); /* file descriptor reference */
2319 SOCK_UNLOCK(so);
2320
2321 ACCEPT_LOCK();
2322
2323 TAILQ_REMOVE(&head->so_comp, so, so_list);
2324 head->so_qlen--;
2325 so->so_state |= (head->so_state & SS_NBIO);
2326 so->so_state &= ~SS_NOFDREF;
2327 so->so_qstate &= ~SQ_COMP;
2328 so->so_head = NULL;
2329 ACCEPT_UNLOCK();
2330 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
2331 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
2332 if (error)
2333 goto noconnection;
2334 if (head->so_sigio != NULL)
2335 fsetown(fgetown(&head->so_sigio), &so->so_sigio);
2336
2337noconnection:
2338 /*
2339 * close the new descriptor, assuming someone hasn't ripped it
2340 * out from under us.
2341 */
2342 if (error)
2343 fdclose(fdp, nfp, fd, td);
2344
2345 /*
2346 * Release explicitly held references before returning.
2347 */
2348done:
2349 if (nfp != NULL)
2350 fdrop(nfp, td);
2351 fputsock(head);
2352done2:
2353 return (error);
2354#else /* SCTP */
2355 return (EOPNOTSUPP);
2356#endif /* SCTP */
2357}
2358
2359int
2360sctp_generic_sendmsg (td, uap)
2361 struct thread *td;
2362 struct sctp_generic_sendmsg_args /* {
2363 int sd,
2364 caddr_t msg,
2365 int mlen,
2366 caddr_t to,
2367 __socklen_t tolen,
2368 struct sctp_sndrcvinfo *sinfo,
2369 int flags
2370 } */ *uap;
2371{
2372#ifdef SCTP
2373 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2374 struct socket *so;
2375 struct file *fp = NULL;
2376 int use_rcvinfo = 1;
2377 int error = 0, len;
2378 struct sockaddr *to = NULL;
2379#ifdef KTRACE
2380 struct uio *ktruio = NULL;
2381#endif
2382 struct uio auio;
2383 struct iovec iov[1];
2384
2385 if (uap->sinfo) {
2386 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2387 if (error)
2388 return (error);
2389 u_sinfo = &sinfo;
2390 }
2391 if (uap->tolen) {
2392 error = getsockaddr(&to, uap->to, uap->tolen);
2393 if (error) {
2394 to = NULL;
2395 goto sctp_bad2;
2396 }
2397 }
2398
2399 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2400 if (error)
2401 goto sctp_bad;
2402#ifdef KTRACE
2403 if (KTRPOINT(td, KTR_STRUCT))
2404 ktrsockaddr(to);
2405#endif
2406
2407 iov[0].iov_base = uap->msg;
2408 iov[0].iov_len = uap->mlen;
2409
2410 so = (struct socket *)fp->f_data;
2411#ifdef MAC
2412 SOCK_LOCK(so);
2413 error = mac_socket_check_send(td->td_ucred, so);
2414 SOCK_UNLOCK(so);
2415 if (error)
2416 goto sctp_bad;
2417#endif /* MAC */
2418
2419 auio.uio_iov = iov;
2420 auio.uio_iovcnt = 1;
2421 auio.uio_segflg = UIO_USERSPACE;
2422 auio.uio_rw = UIO_WRITE;
2423 auio.uio_td = td;
2424 auio.uio_offset = 0; /* XXX */
2425 auio.uio_resid = 0;
2426 len = auio.uio_resid = uap->mlen;
2427 error = sctp_lower_sosend(so, to, &auio,
2428 (struct mbuf *)NULL, (struct mbuf *)NULL,
2429 uap->flags, use_rcvinfo, u_sinfo, td);
2430 if (error) {
2431 if (auio.uio_resid != len && (error == ERESTART ||
2432 error == EINTR || error == EWOULDBLOCK))
2433 error = 0;
2434 /* Generation of SIGPIPE can be controlled per socket. */
2435 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2436 !(uap->flags & MSG_NOSIGNAL)) {
2437 PROC_LOCK(td->td_proc);
2438 psignal(td->td_proc, SIGPIPE);
2439 PROC_UNLOCK(td->td_proc);
2440 }
2441 }
2442 if (error == 0)
2443 td->td_retval[0] = len - auio.uio_resid;
2444#ifdef KTRACE
2445 if (ktruio != NULL) {
2446 ktruio->uio_resid = td->td_retval[0];
2447 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2448 }
2449#endif /* KTRACE */
2450sctp_bad:
2451 if (fp)
2452 fdrop(fp, td);
2453sctp_bad2:
2454 if (to)
2455 free(to, M_SONAME);
2456 return (error);
2457#else /* SCTP */
2458 return (EOPNOTSUPP);
2459#endif /* SCTP */
2460}
2461
2462int
2463sctp_generic_sendmsg_iov(td, uap)
2464 struct thread *td;
2465 struct sctp_generic_sendmsg_iov_args /* {
2466 int sd,
2467 struct iovec *iov,
2468 int iovlen,
2469 caddr_t to,
2470 __socklen_t tolen,
2471 struct sctp_sndrcvinfo *sinfo,
2472 int flags
2473 } */ *uap;
2474{
2475#ifdef SCTP
2476 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2477 struct socket *so;
2478 struct file *fp = NULL;
2479 int use_rcvinfo = 1;
2480 int error=0, len, i;
2481 struct sockaddr *to = NULL;
2482#ifdef KTRACE
2483 struct uio *ktruio = NULL;
2484#endif
2485 struct uio auio;
2486 struct iovec *iov, *tiov;
2487
2488 if (uap->sinfo) {
2489 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2490 if (error)
2491 return (error);
2492 u_sinfo = &sinfo;
2493 }
2494 if (uap->tolen) {
2495 error = getsockaddr(&to, uap->to, uap->tolen);
2496 if (error) {
2497 to = NULL;
2498 goto sctp_bad2;
2499 }
2500 }
2501
2502 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2503 if (error)
2504 goto sctp_bad1;
2505
2506 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2507 if (error)
2508 goto sctp_bad1;
2509#ifdef KTRACE
2510 if (KTRPOINT(td, KTR_STRUCT))
2511 ktrsockaddr(to);
2512#endif
2513
2514 so = (struct socket *)fp->f_data;
2515#ifdef MAC
2516 SOCK_LOCK(so);
2517 error = mac_socket_check_send(td->td_ucred, so);
2518 SOCK_UNLOCK(so);
2519 if (error)
2520 goto sctp_bad;
2521#endif /* MAC */
2522
2523 auio.uio_iov = iov;
2524 auio.uio_iovcnt = uap->iovlen;
2525 auio.uio_segflg = UIO_USERSPACE;
2526 auio.uio_rw = UIO_WRITE;
2527 auio.uio_td = td;
2528 auio.uio_offset = 0; /* XXX */
2529 auio.uio_resid = 0;
2530 tiov = iov;
2531 for (i = 0; i <uap->iovlen; i++, tiov++) {
2532 if ((auio.uio_resid += tiov->iov_len) < 0) {
2533 error = EINVAL;
2534 goto sctp_bad;
2535 }
2536 }
2537 len = auio.uio_resid;
2538 error = sctp_lower_sosend(so, to, &auio,
2539 (struct mbuf *)NULL, (struct mbuf *)NULL,
2540 uap->flags, use_rcvinfo, u_sinfo, td);
2541 if (error) {
2542 if (auio.uio_resid != len && (error == ERESTART ||
2543 error == EINTR || error == EWOULDBLOCK))
2544 error = 0;
2545 /* Generation of SIGPIPE can be controlled per socket */
2546 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2547 !(uap->flags & MSG_NOSIGNAL)) {
2548 PROC_LOCK(td->td_proc);
2549 psignal(td->td_proc, SIGPIPE);
2550 PROC_UNLOCK(td->td_proc);
2551 }
2552 }
2553 if (error == 0)
2554 td->td_retval[0] = len - auio.uio_resid;
2555#ifdef KTRACE
2556 if (ktruio != NULL) {
2557 ktruio->uio_resid = td->td_retval[0];
2558 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2559 }
2560#endif /* KTRACE */
2561sctp_bad:
2562 free(iov, M_IOV);
2563sctp_bad1:
2564 if (fp)
2565 fdrop(fp, td);
2566sctp_bad2:
2567 if (to)
2568 free(to, M_SONAME);
2569 return (error);
2570#else /* SCTP */
2571 return (EOPNOTSUPP);
2572#endif /* SCTP */
2573}
2574
2575int
2576sctp_generic_recvmsg(td, uap)
2577 struct thread *td;
2578 struct sctp_generic_recvmsg_args /* {
2579 int sd,
2580 struct iovec *iov,
2581 int iovlen,
2582 struct sockaddr *from,
2583 __socklen_t *fromlenaddr,
2584 struct sctp_sndrcvinfo *sinfo,
2585 int *msg_flags
2586 } */ *uap;
2587{
2588#ifdef SCTP
2589 u_int8_t sockbufstore[256];
2590 struct uio auio;
2591 struct iovec *iov, *tiov;
2592 struct sctp_sndrcvinfo sinfo;
2593 struct socket *so;
2594 struct file *fp = NULL;
2595 struct sockaddr *fromsa;
2596 int fromlen;
2597 int len, i, msg_flags;
2598 int error = 0;
2599#ifdef KTRACE
2600 struct uio *ktruio = NULL;
2601#endif
2602 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2603 if (error) {
2604 return (error);
2605 }
2606 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2607 if (error) {
2608 goto out1;
2609 }
2610
2611 so = fp->f_data;
2612#ifdef MAC
2613 SOCK_LOCK(so);
2614 error = mac_socket_check_receive(td->td_ucred, so);
2615 SOCK_UNLOCK(so);
2616 if (error) {
2617 goto out;
2618 return (error);
2619 }
2620#endif /* MAC */
2621
2622 if (uap->fromlenaddr) {
2623 error = copyin(uap->fromlenaddr,
2624 &fromlen, sizeof (fromlen));
2625 if (error) {
2626 goto out;
2627 }
2628 } else {
2629 fromlen = 0;
2630 }
2631 if(uap->msg_flags) {
2632 error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
2633 if (error) {
2634 goto out;
2635 }
2636 } else {
2637 msg_flags = 0;
2638 }
2639 auio.uio_iov = iov;
2640 auio.uio_iovcnt = uap->iovlen;
2641 auio.uio_segflg = UIO_USERSPACE;
2642 auio.uio_rw = UIO_READ;
2643 auio.uio_td = td;
2644 auio.uio_offset = 0; /* XXX */
2645 auio.uio_resid = 0;
2646 tiov = iov;
2647 for (i = 0; i <uap->iovlen; i++, tiov++) {
2648 if ((auio.uio_resid += tiov->iov_len) < 0) {
2649 error = EINVAL;
2650 goto out;
2651 }
2652 }
2653 len = auio.uio_resid;
2654 fromsa = (struct sockaddr *)sockbufstore;
2655
2656#ifdef KTRACE
2657 if (KTRPOINT(td, KTR_GENIO))
2658 ktruio = cloneuio(&auio);
2659#endif /* KTRACE */
2660 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
2661 fromsa, fromlen, &msg_flags,
2662 (struct sctp_sndrcvinfo *)&sinfo, 1);
2663 if (error) {
2664 if (auio.uio_resid != (int)len && (error == ERESTART ||
2665 error == EINTR || error == EWOULDBLOCK))
2666 error = 0;
2667 } else {
2668 if (uap->sinfo)
2669 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
2670 }
2671#ifdef KTRACE
2672 if (ktruio != NULL) {
2673 ktruio->uio_resid = (int)len - auio.uio_resid;
2674 ktrgenio(uap->sd, UIO_READ, ktruio, error);
2675 }
2676#endif /* KTRACE */
2677 if (error)
2678 goto out;
2679 td->td_retval[0] = (int)len - auio.uio_resid;
2680
2681 if (fromlen && uap->from) {
2682 len = fromlen;
2683 if (len <= 0 || fromsa == 0)
2684 len = 0;
2685 else {
2686 len = MIN(len, fromsa->sa_len);
2687 error = copyout(fromsa, uap->from, (unsigned)len);
2688 if (error)
2689 goto out;
2690 }
2691 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
2692 if (error) {
2693 goto out;
2694 }
2695 }
2696#ifdef KTRACE
2697 if (KTRPOINT(td, KTR_STRUCT))
2698 ktrsockaddr(fromsa);
2699#endif
2700 if (uap->msg_flags) {
2701 error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
2702 if (error) {
2703 goto out;
2704 }
2705 }
2706out:
2707 free(iov, M_IOV);
2708out1:
2709 if (fp)
2710 fdrop(fp, td);
2711
2712 return (error);
2713#else /* SCTP */
2714 return (EOPNOTSUPP);
2715#endif /* SCTP */
2716}
1628 m->m_len = buflen;
1629 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1630 if (error)
1631 (void) m_free(m);
1632 else {
1633 *mp = m;
1634 if (type == MT_SONAME) {
1635 sa = mtod(m, struct sockaddr *);
1636
1637#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1638 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1639 sa->sa_family = sa->sa_len;
1640#endif
1641 sa->sa_len = buflen;
1642 }
1643 }
1644 return (error);
1645}
1646
1647int
1648getsockaddr(namp, uaddr, len)
1649 struct sockaddr **namp;
1650 caddr_t uaddr;
1651 size_t len;
1652{
1653 struct sockaddr *sa;
1654 int error;
1655
1656 if (len > SOCK_MAXADDRLEN)
1657 return (ENAMETOOLONG);
1658 if (len < offsetof(struct sockaddr, sa_data[0]))
1659 return (EINVAL);
1660 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1661 error = copyin(uaddr, sa, len);
1662 if (error) {
1663 FREE(sa, M_SONAME);
1664 } else {
1665#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1666 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1667 sa->sa_family = sa->sa_len;
1668#endif
1669 sa->sa_len = len;
1670 *namp = sa;
1671 }
1672 return (error);
1673}
1674
1675#include <sys/condvar.h>
1676
1677struct sendfile_sync {
1678 struct mtx mtx;
1679 struct cv cv;
1680 unsigned count;
1681};
1682
1683/*
1684 * Detach mapped page and release resources back to the system.
1685 */
1686void
1687sf_buf_mext(void *addr, void *args)
1688{
1689 vm_page_t m;
1690 struct sendfile_sync *sfs;
1691
1692 m = sf_buf_page(args);
1693 sf_buf_free(args);
1694 vm_page_lock_queues();
1695 vm_page_unwire(m, 0);
1696 /*
1697 * Check for the object going away on us. This can
1698 * happen since we don't hold a reference to it.
1699 * If so, we're responsible for freeing the page.
1700 */
1701 if (m->wire_count == 0 && m->object == NULL)
1702 vm_page_free(m);
1703 vm_page_unlock_queues();
1704 if (addr == NULL)
1705 return;
1706 sfs = addr;
1707 mtx_lock(&sfs->mtx);
1708 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
1709 if (--sfs->count == 0)
1710 cv_signal(&sfs->cv);
1711 mtx_unlock(&sfs->mtx);
1712}
1713
1714/*
1715 * sendfile(2)
1716 *
1717 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1718 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1719 *
1720 * Send a file specified by 'fd' and starting at 'offset' to a socket
1721 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
1722 * 0. Optionally add a header and/or trailer to the socket output. If
1723 * specified, write the total number of bytes sent into *sbytes.
1724 */
1725int
1726sendfile(struct thread *td, struct sendfile_args *uap)
1727{
1728
1729 return (do_sendfile(td, uap, 0));
1730}
1731
1732static int
1733do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1734{
1735 struct sf_hdtr hdtr;
1736 struct uio *hdr_uio, *trl_uio;
1737 int error;
1738
1739 hdr_uio = trl_uio = NULL;
1740
1741 if (uap->hdtr != NULL) {
1742 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1743 if (error)
1744 goto out;
1745 if (hdtr.headers != NULL) {
1746 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1747 if (error)
1748 goto out;
1749 }
1750 if (hdtr.trailers != NULL) {
1751 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1752 if (error)
1753 goto out;
1754
1755 }
1756 }
1757
1758 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1759out:
1760 if (hdr_uio)
1761 free(hdr_uio, M_IOV);
1762 if (trl_uio)
1763 free(trl_uio, M_IOV);
1764 return (error);
1765}
1766
1767#ifdef COMPAT_FREEBSD4
1768int
1769freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1770{
1771 struct sendfile_args args;
1772
1773 args.fd = uap->fd;
1774 args.s = uap->s;
1775 args.offset = uap->offset;
1776 args.nbytes = uap->nbytes;
1777 args.hdtr = uap->hdtr;
1778 args.sbytes = uap->sbytes;
1779 args.flags = uap->flags;
1780
1781 return (do_sendfile(td, &args, 1));
1782}
1783#endif /* COMPAT_FREEBSD4 */
1784
1785int
1786kern_sendfile(struct thread *td, struct sendfile_args *uap,
1787 struct uio *hdr_uio, struct uio *trl_uio, int compat)
1788{
1789 struct file *sock_fp;
1790 struct vnode *vp;
1791 struct vm_object *obj = NULL;
1792 struct socket *so = NULL;
1793 struct mbuf *m = NULL;
1794 struct sf_buf *sf;
1795 struct vm_page *pg;
1796 off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
1797 int error, hdrlen = 0, mnw = 0;
1798 int vfslocked;
1799 struct sendfile_sync *sfs = NULL;
1800
1801 /*
1802 * The file descriptor must be a regular file and have a
1803 * backing VM object.
1804 * File offset must be positive. If it goes beyond EOF
1805 * we send only the header/trailer and no payload data.
1806 */
1807 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1808 goto out;
1809 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1810 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1811 if (vp->v_type == VREG) {
1812 obj = vp->v_object;
1813 if (obj != NULL) {
1814 /*
1815 * Temporarily increase the backing VM
1816 * object's reference count so that a forced
1817 * reclamation of its vnode does not
1818 * immediately destroy it.
1819 */
1820 VM_OBJECT_LOCK(obj);
1821 if ((obj->flags & OBJ_DEAD) == 0) {
1822 vm_object_reference_locked(obj);
1823 VM_OBJECT_UNLOCK(obj);
1824 } else {
1825 VM_OBJECT_UNLOCK(obj);
1826 obj = NULL;
1827 }
1828 }
1829 }
1830 VOP_UNLOCK(vp, 0);
1831 VFS_UNLOCK_GIANT(vfslocked);
1832 if (obj == NULL) {
1833 error = EINVAL;
1834 goto out;
1835 }
1836 if (uap->offset < 0) {
1837 error = EINVAL;
1838 goto out;
1839 }
1840
1841 /*
1842 * The socket must be a stream socket and connected.
1843 * Remember if it a blocking or non-blocking socket.
1844 */
1845 if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
1846 NULL)) != 0)
1847 goto out;
1848 so = sock_fp->f_data;
1849 if (so->so_type != SOCK_STREAM) {
1850 error = EINVAL;
1851 goto out;
1852 }
1853 if ((so->so_state & SS_ISCONNECTED) == 0) {
1854 error = ENOTCONN;
1855 goto out;
1856 }
1857 /*
1858 * Do not wait on memory allocations but return ENOMEM for
1859 * caller to retry later.
1860 * XXX: Experimental.
1861 */
1862 if (uap->flags & SF_MNOWAIT)
1863 mnw = 1;
1864
1865 if (uap->flags & SF_SYNC) {
1866 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK);
1867 memset(sfs, 0, sizeof *sfs);
1868 mtx_init(&sfs->mtx, "sendfile", MTX_DEF, 0);
1869 cv_init(&sfs->cv, "sendfile");
1870 }
1871
1872#ifdef MAC
1873 SOCK_LOCK(so);
1874 error = mac_socket_check_send(td->td_ucred, so);
1875 SOCK_UNLOCK(so);
1876 if (error)
1877 goto out;
1878#endif
1879
1880 /* If headers are specified copy them into mbufs. */
1881 if (hdr_uio != NULL) {
1882 hdr_uio->uio_td = td;
1883 hdr_uio->uio_rw = UIO_WRITE;
1884 if (hdr_uio->uio_resid > 0) {
1885 /*
1886 * In FBSD < 5.0 the nbytes to send also included
1887 * the header. If compat is specified subtract the
1888 * header size from nbytes.
1889 */
1890 if (compat) {
1891 if (uap->nbytes > hdr_uio->uio_resid)
1892 uap->nbytes -= hdr_uio->uio_resid;
1893 else
1894 uap->nbytes = 0;
1895 }
1896 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
1897 0, 0, 0);
1898 if (m == NULL) {
1899 error = mnw ? EAGAIN : ENOBUFS;
1900 goto out;
1901 }
1902 hdrlen = m_length(m, NULL);
1903 }
1904 }
1905
1906 /*
1907 * Protect against multiple writers to the socket.
1908 *
1909 * XXXRW: Historically this has assumed non-interruptibility, so now
1910 * we implement that, but possibly shouldn't.
1911 */
1912 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
1913
1914 /*
1915 * Loop through the pages of the file, starting with the requested
1916 * offset. Get a file page (do I/O if necessary), map the file page
1917 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1918 * it on the socket.
1919 * This is done in two loops. The inner loop turns as many pages
1920 * as it can, up to available socket buffer space, without blocking
1921 * into mbufs to have it bulk delivered into the socket send buffer.
1922 * The outer loop checks the state and available space of the socket
1923 * and takes care of the overall progress.
1924 */
1925 for (off = uap->offset, rem = uap->nbytes; ; ) {
1926 int loopbytes = 0;
1927 int space = 0;
1928 int done = 0;
1929
1930 /*
1931 * Check the socket state for ongoing connection,
1932 * no errors and space in socket buffer.
1933 * If space is low allow for the remainder of the
1934 * file to be processed if it fits the socket buffer.
1935 * Otherwise block in waiting for sufficient space
1936 * to proceed, or if the socket is nonblocking, return
1937 * to userland with EAGAIN while reporting how far
1938 * we've come.
1939 * We wait until the socket buffer has significant free
1940 * space to do bulk sends. This makes good use of file
1941 * system read ahead and allows packet segmentation
1942 * offloading hardware to take over lots of work. If
1943 * we were not careful here we would send off only one
1944 * sfbuf at a time.
1945 */
1946 SOCKBUF_LOCK(&so->so_snd);
1947 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
1948 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
1949retry_space:
1950 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1951 error = EPIPE;
1952 SOCKBUF_UNLOCK(&so->so_snd);
1953 goto done;
1954 } else if (so->so_error) {
1955 error = so->so_error;
1956 so->so_error = 0;
1957 SOCKBUF_UNLOCK(&so->so_snd);
1958 goto done;
1959 }
1960 space = sbspace(&so->so_snd);
1961 if (space < rem &&
1962 (space <= 0 ||
1963 space < so->so_snd.sb_lowat)) {
1964 if (so->so_state & SS_NBIO) {
1965 SOCKBUF_UNLOCK(&so->so_snd);
1966 error = EAGAIN;
1967 goto done;
1968 }
1969 /*
1970 * sbwait drops the lock while sleeping.
1971 * When we loop back to retry_space the
1972 * state may have changed and we retest
1973 * for it.
1974 */
1975 error = sbwait(&so->so_snd);
1976 /*
1977 * An error from sbwait usually indicates that we've
1978 * been interrupted by a signal. If we've sent anything
1979 * then return bytes sent, otherwise return the error.
1980 */
1981 if (error) {
1982 SOCKBUF_UNLOCK(&so->so_snd);
1983 goto done;
1984 }
1985 goto retry_space;
1986 }
1987 SOCKBUF_UNLOCK(&so->so_snd);
1988
1989 /*
1990 * Reduce space in the socket buffer by the size of
1991 * the header mbuf chain.
1992 * hdrlen is set to 0 after the first loop.
1993 */
1994 space -= hdrlen;
1995
1996 /*
1997 * Loop and construct maximum sized mbuf chain to be bulk
1998 * dumped into socket buffer.
1999 */
2000 while(space > loopbytes) {
2001 vm_pindex_t pindex;
2002 vm_offset_t pgoff;
2003 struct mbuf *m0;
2004
2005 VM_OBJECT_LOCK(obj);
2006 /*
2007 * Calculate the amount to transfer.
2008 * Not to exceed a page, the EOF,
2009 * or the passed in nbytes.
2010 */
2011 pgoff = (vm_offset_t)(off & PAGE_MASK);
2012 xfsize = omin(PAGE_SIZE - pgoff,
2013 obj->un_pager.vnp.vnp_size - uap->offset -
2014 fsbytes - loopbytes);
2015 if (uap->nbytes)
2016 rem = (uap->nbytes - fsbytes - loopbytes);
2017 else
2018 rem = obj->un_pager.vnp.vnp_size -
2019 uap->offset - fsbytes - loopbytes;
2020 xfsize = omin(rem, xfsize);
2021 if (xfsize <= 0) {
2022 VM_OBJECT_UNLOCK(obj);
2023 done = 1; /* all data sent */
2024 break;
2025 }
2026 /*
2027 * Don't overflow the send buffer.
2028 * Stop here and send out what we've
2029 * already got.
2030 */
2031 if (space < loopbytes + xfsize) {
2032 VM_OBJECT_UNLOCK(obj);
2033 break;
2034 }
2035
2036 /*
2037 * Attempt to look up the page. Allocate
2038 * if not found or wait and loop if busy.
2039 */
2040 pindex = OFF_TO_IDX(off);
2041 pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
2042 VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
2043
2044 /*
2045 * Check if page is valid for what we need,
2046 * otherwise initiate I/O.
2047 * If we already turned some pages into mbufs,
2048 * send them off before we come here again and
2049 * block.
2050 */
2051 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2052 VM_OBJECT_UNLOCK(obj);
2053 else if (m != NULL)
2054 error = EAGAIN; /* send what we already got */
2055 else if (uap->flags & SF_NODISKIO)
2056 error = EBUSY;
2057 else {
2058 int bsize, resid;
2059
2060 /*
2061 * Ensure that our page is still around
2062 * when the I/O completes.
2063 */
2064 vm_page_io_start(pg);
2065 VM_OBJECT_UNLOCK(obj);
2066
2067 /*
2068 * Get the page from backing store.
2069 */
2070 bsize = vp->v_mount->mnt_stat.f_iosize;
2071 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2072 vn_lock(vp, LK_SHARED | LK_RETRY);
2073
2074 /*
2075 * XXXMAC: Because we don't have fp->f_cred
2076 * here, we pass in NOCRED. This is probably
2077 * wrong, but is consistent with our original
2078 * implementation.
2079 */
2080 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2081 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2082 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2083 td->td_ucred, NOCRED, &resid, td);
2084 VOP_UNLOCK(vp, 0);
2085 VFS_UNLOCK_GIANT(vfslocked);
2086 VM_OBJECT_LOCK(obj);
2087 vm_page_io_finish(pg);
2088 if (!error)
2089 VM_OBJECT_UNLOCK(obj);
2090 mbstat.sf_iocnt++;
2091 }
2092 if (error) {
2093 vm_page_lock_queues();
2094 vm_page_unwire(pg, 0);
2095 /*
2096 * See if anyone else might know about
2097 * this page. If not and it is not valid,
2098 * then free it.
2099 */
2100 if (pg->wire_count == 0 && pg->valid == 0 &&
2101 pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
2102 pg->hold_count == 0) {
2103 vm_page_free(pg);
2104 }
2105 vm_page_unlock_queues();
2106 VM_OBJECT_UNLOCK(obj);
2107 if (error == EAGAIN)
2108 error = 0; /* not a real error */
2109 break;
2110 }
2111
2112 /*
2113 * Get a sendfile buf. We usually wait as long
2114 * as necessary, but this wait can be interrupted.
2115 */
2116 if ((sf = sf_buf_alloc(pg,
2117 (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
2118 mbstat.sf_allocfail++;
2119 vm_page_lock_queues();
2120 vm_page_unwire(pg, 0);
2121 /*
2122 * XXX: Not same check as above!?
2123 */
2124 if (pg->wire_count == 0 && pg->object == NULL)
2125 vm_page_free(pg);
2126 vm_page_unlock_queues();
2127 error = (mnw ? EAGAIN : EINTR);
2128 break;
2129 }
2130
2131 /*
2132 * Get an mbuf and set it up as having
2133 * external storage.
2134 */
2135 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
2136 if (m0 == NULL) {
2137 error = (mnw ? EAGAIN : ENOBUFS);
2138 sf_buf_mext((void *)sf_buf_kva(sf), sf);
2139 break;
2140 }
2141 MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
2142 sfs, sf, M_RDONLY, EXT_SFBUF);
2143 m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
2144 m0->m_len = xfsize;
2145
2146 /* Append to mbuf chain. */
2147 if (m != NULL)
2148 m_cat(m, m0);
2149 else
2150 m = m0;
2151
2152 /* Keep track of bits processed. */
2153 loopbytes += xfsize;
2154 off += xfsize;
2155
2156 if (sfs != NULL) {
2157 mtx_lock(&sfs->mtx);
2158 sfs->count++;
2159 mtx_unlock(&sfs->mtx);
2160 }
2161 }
2162
2163 /* Add the buffer chain to the socket buffer. */
2164 if (m != NULL) {
2165 int mlen, err;
2166
2167 mlen = m_length(m, NULL);
2168 SOCKBUF_LOCK(&so->so_snd);
2169 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2170 error = EPIPE;
2171 SOCKBUF_UNLOCK(&so->so_snd);
2172 goto done;
2173 }
2174 SOCKBUF_UNLOCK(&so->so_snd);
2175 /* Avoid error aliasing. */
2176 err = (*so->so_proto->pr_usrreqs->pru_send)
2177 (so, 0, m, NULL, NULL, td);
2178 if (err == 0) {
2179 /*
2180 * We need two counters to get the
2181 * file offset and nbytes to send
2182 * right:
2183 * - sbytes contains the total amount
2184 * of bytes sent, including headers.
2185 * - fsbytes contains the total amount
2186 * of bytes sent from the file.
2187 */
2188 sbytes += mlen;
2189 fsbytes += mlen;
2190 if (hdrlen) {
2191 fsbytes -= hdrlen;
2192 hdrlen = 0;
2193 }
2194 } else if (error == 0)
2195 error = err;
2196 m = NULL; /* pru_send always consumes */
2197 }
2198
2199 /* Quit outer loop on error or when we're done. */
2200 if (done)
2201 break;
2202 if (error)
2203 goto done;
2204 }
2205
2206 /*
2207 * Send trailers. Wimp out and use writev(2).
2208 */
2209 if (trl_uio != NULL) {
2210 error = kern_writev(td, uap->s, trl_uio);
2211 if (error)
2212 goto done;
2213 sbytes += td->td_retval[0];
2214 }
2215
2216done:
2217 sbunlock(&so->so_snd);
2218out:
2219 /*
2220 * If there was no error we have to clear td->td_retval[0]
2221 * because it may have been set by writev.
2222 */
2223 if (error == 0) {
2224 td->td_retval[0] = 0;
2225 }
2226 if (uap->sbytes != NULL) {
2227 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2228 }
2229 if (obj != NULL)
2230 vm_object_deallocate(obj);
2231 if (vp != NULL) {
2232 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2233 vrele(vp);
2234 VFS_UNLOCK_GIANT(vfslocked);
2235 }
2236 if (so)
2237 fdrop(sock_fp, td);
2238 if (m)
2239 m_freem(m);
2240
2241 if (sfs != NULL) {
2242 mtx_lock(&sfs->mtx);
2243 if (sfs->count != 0)
2244 cv_wait(&sfs->cv, &sfs->mtx);
2245 KASSERT(sfs->count == 0, ("sendfile sync still busy"));
2246 cv_destroy(&sfs->cv);
2247 mtx_destroy(&sfs->mtx);
2248 free(sfs, M_TEMP);
2249 }
2250
2251 if (error == ERESTART)
2252 error = EINTR;
2253
2254 return (error);
2255}
2256
2257/*
2258 * SCTP syscalls.
2259 * Functionality only compiled in if SCTP is defined in the kernel Makefile,
2260 * otherwise all return EOPNOTSUPP.
2261 * XXX: We should make this loadable one day.
2262 */
2263int
2264sctp_peeloff(td, uap)
2265 struct thread *td;
2266 struct sctp_peeloff_args /* {
2267 int sd;
2268 caddr_t name;
2269 } */ *uap;
2270{
2271#ifdef SCTP
2272 struct filedesc *fdp;
2273 struct file *nfp = NULL;
2274 int error;
2275 struct socket *head, *so;
2276 int fd;
2277 u_int fflag;
2278
2279 fdp = td->td_proc->p_fd;
2280 error = fgetsock(td, uap->sd, &head, &fflag);
2281 if (error)
2282 goto done2;
2283 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
2284 if (error)
2285 goto done2;
2286 /*
2287 * At this point we know we do have a assoc to pull
2288 * we proceed to get the fd setup. This may block
2289 * but that is ok.
2290 */
2291
2292 error = falloc(td, &nfp, &fd);
2293 if (error)
2294 goto done;
2295 td->td_retval[0] = fd;
2296
2297 so = sonewconn(head, SS_ISCONNECTED);
2298 if (so == NULL)
2299 goto noconnection;
2300 /*
2301 * Before changing the flags on the socket, we have to bump the
2302 * reference count. Otherwise, if the protocol calls sofree(),
2303 * the socket will be released due to a zero refcount.
2304 */
2305 SOCK_LOCK(so);
2306 soref(so); /* file descriptor reference */
2307 SOCK_UNLOCK(so);
2308
2309 ACCEPT_LOCK();
2310
2311 TAILQ_REMOVE(&head->so_comp, so, so_list);
2312 head->so_qlen--;
2313 so->so_state |= (head->so_state & SS_NBIO);
2314 so->so_state &= ~SS_NOFDREF;
2315 so->so_qstate &= ~SQ_COMP;
2316 so->so_head = NULL;
2317 ACCEPT_UNLOCK();
2318 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
2319 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
2320 if (error)
2321 goto noconnection;
2322 if (head->so_sigio != NULL)
2323 fsetown(fgetown(&head->so_sigio), &so->so_sigio);
2324
2325noconnection:
2326 /*
2327 * close the new descriptor, assuming someone hasn't ripped it
2328 * out from under us.
2329 */
2330 if (error)
2331 fdclose(fdp, nfp, fd, td);
2332
2333 /*
2334 * Release explicitly held references before returning.
2335 */
2336done:
2337 if (nfp != NULL)
2338 fdrop(nfp, td);
2339 fputsock(head);
2340done2:
2341 return (error);
2342#else /* SCTP */
2343 return (EOPNOTSUPP);
2344#endif /* SCTP */
2345}
2346
2347int
2348sctp_generic_sendmsg (td, uap)
2349 struct thread *td;
2350 struct sctp_generic_sendmsg_args /* {
2351 int sd,
2352 caddr_t msg,
2353 int mlen,
2354 caddr_t to,
2355 __socklen_t tolen,
2356 struct sctp_sndrcvinfo *sinfo,
2357 int flags
2358 } */ *uap;
2359{
2360#ifdef SCTP
2361 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2362 struct socket *so;
2363 struct file *fp = NULL;
2364 int use_rcvinfo = 1;
2365 int error = 0, len;
2366 struct sockaddr *to = NULL;
2367#ifdef KTRACE
2368 struct uio *ktruio = NULL;
2369#endif
2370 struct uio auio;
2371 struct iovec iov[1];
2372
2373 if (uap->sinfo) {
2374 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2375 if (error)
2376 return (error);
2377 u_sinfo = &sinfo;
2378 }
2379 if (uap->tolen) {
2380 error = getsockaddr(&to, uap->to, uap->tolen);
2381 if (error) {
2382 to = NULL;
2383 goto sctp_bad2;
2384 }
2385 }
2386
2387 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2388 if (error)
2389 goto sctp_bad;
2390#ifdef KTRACE
2391 if (KTRPOINT(td, KTR_STRUCT))
2392 ktrsockaddr(to);
2393#endif
2394
2395 iov[0].iov_base = uap->msg;
2396 iov[0].iov_len = uap->mlen;
2397
2398 so = (struct socket *)fp->f_data;
2399#ifdef MAC
2400 SOCK_LOCK(so);
2401 error = mac_socket_check_send(td->td_ucred, so);
2402 SOCK_UNLOCK(so);
2403 if (error)
2404 goto sctp_bad;
2405#endif /* MAC */
2406
2407 auio.uio_iov = iov;
2408 auio.uio_iovcnt = 1;
2409 auio.uio_segflg = UIO_USERSPACE;
2410 auio.uio_rw = UIO_WRITE;
2411 auio.uio_td = td;
2412 auio.uio_offset = 0; /* XXX */
2413 auio.uio_resid = 0;
2414 len = auio.uio_resid = uap->mlen;
2415 error = sctp_lower_sosend(so, to, &auio,
2416 (struct mbuf *)NULL, (struct mbuf *)NULL,
2417 uap->flags, use_rcvinfo, u_sinfo, td);
2418 if (error) {
2419 if (auio.uio_resid != len && (error == ERESTART ||
2420 error == EINTR || error == EWOULDBLOCK))
2421 error = 0;
2422 /* Generation of SIGPIPE can be controlled per socket. */
2423 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2424 !(uap->flags & MSG_NOSIGNAL)) {
2425 PROC_LOCK(td->td_proc);
2426 psignal(td->td_proc, SIGPIPE);
2427 PROC_UNLOCK(td->td_proc);
2428 }
2429 }
2430 if (error == 0)
2431 td->td_retval[0] = len - auio.uio_resid;
2432#ifdef KTRACE
2433 if (ktruio != NULL) {
2434 ktruio->uio_resid = td->td_retval[0];
2435 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2436 }
2437#endif /* KTRACE */
2438sctp_bad:
2439 if (fp)
2440 fdrop(fp, td);
2441sctp_bad2:
2442 if (to)
2443 free(to, M_SONAME);
2444 return (error);
2445#else /* SCTP */
2446 return (EOPNOTSUPP);
2447#endif /* SCTP */
2448}
2449
2450int
2451sctp_generic_sendmsg_iov(td, uap)
2452 struct thread *td;
2453 struct sctp_generic_sendmsg_iov_args /* {
2454 int sd,
2455 struct iovec *iov,
2456 int iovlen,
2457 caddr_t to,
2458 __socklen_t tolen,
2459 struct sctp_sndrcvinfo *sinfo,
2460 int flags
2461 } */ *uap;
2462{
2463#ifdef SCTP
2464 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
2465 struct socket *so;
2466 struct file *fp = NULL;
2467 int use_rcvinfo = 1;
2468 int error=0, len, i;
2469 struct sockaddr *to = NULL;
2470#ifdef KTRACE
2471 struct uio *ktruio = NULL;
2472#endif
2473 struct uio auio;
2474 struct iovec *iov, *tiov;
2475
2476 if (uap->sinfo) {
2477 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
2478 if (error)
2479 return (error);
2480 u_sinfo = &sinfo;
2481 }
2482 if (uap->tolen) {
2483 error = getsockaddr(&to, uap->to, uap->tolen);
2484 if (error) {
2485 to = NULL;
2486 goto sctp_bad2;
2487 }
2488 }
2489
2490 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2491 if (error)
2492 goto sctp_bad1;
2493
2494 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2495 if (error)
2496 goto sctp_bad1;
2497#ifdef KTRACE
2498 if (KTRPOINT(td, KTR_STRUCT))
2499 ktrsockaddr(to);
2500#endif
2501
2502 so = (struct socket *)fp->f_data;
2503#ifdef MAC
2504 SOCK_LOCK(so);
2505 error = mac_socket_check_send(td->td_ucred, so);
2506 SOCK_UNLOCK(so);
2507 if (error)
2508 goto sctp_bad;
2509#endif /* MAC */
2510
2511 auio.uio_iov = iov;
2512 auio.uio_iovcnt = uap->iovlen;
2513 auio.uio_segflg = UIO_USERSPACE;
2514 auio.uio_rw = UIO_WRITE;
2515 auio.uio_td = td;
2516 auio.uio_offset = 0; /* XXX */
2517 auio.uio_resid = 0;
2518 tiov = iov;
2519 for (i = 0; i <uap->iovlen; i++, tiov++) {
2520 if ((auio.uio_resid += tiov->iov_len) < 0) {
2521 error = EINVAL;
2522 goto sctp_bad;
2523 }
2524 }
2525 len = auio.uio_resid;
2526 error = sctp_lower_sosend(so, to, &auio,
2527 (struct mbuf *)NULL, (struct mbuf *)NULL,
2528 uap->flags, use_rcvinfo, u_sinfo, td);
2529 if (error) {
2530 if (auio.uio_resid != len && (error == ERESTART ||
2531 error == EINTR || error == EWOULDBLOCK))
2532 error = 0;
2533 /* Generation of SIGPIPE can be controlled per socket */
2534 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
2535 !(uap->flags & MSG_NOSIGNAL)) {
2536 PROC_LOCK(td->td_proc);
2537 psignal(td->td_proc, SIGPIPE);
2538 PROC_UNLOCK(td->td_proc);
2539 }
2540 }
2541 if (error == 0)
2542 td->td_retval[0] = len - auio.uio_resid;
2543#ifdef KTRACE
2544 if (ktruio != NULL) {
2545 ktruio->uio_resid = td->td_retval[0];
2546 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
2547 }
2548#endif /* KTRACE */
2549sctp_bad:
2550 free(iov, M_IOV);
2551sctp_bad1:
2552 if (fp)
2553 fdrop(fp, td);
2554sctp_bad2:
2555 if (to)
2556 free(to, M_SONAME);
2557 return (error);
2558#else /* SCTP */
2559 return (EOPNOTSUPP);
2560#endif /* SCTP */
2561}
2562
2563int
2564sctp_generic_recvmsg(td, uap)
2565 struct thread *td;
2566 struct sctp_generic_recvmsg_args /* {
2567 int sd,
2568 struct iovec *iov,
2569 int iovlen,
2570 struct sockaddr *from,
2571 __socklen_t *fromlenaddr,
2572 struct sctp_sndrcvinfo *sinfo,
2573 int *msg_flags
2574 } */ *uap;
2575{
2576#ifdef SCTP
2577 u_int8_t sockbufstore[256];
2578 struct uio auio;
2579 struct iovec *iov, *tiov;
2580 struct sctp_sndrcvinfo sinfo;
2581 struct socket *so;
2582 struct file *fp = NULL;
2583 struct sockaddr *fromsa;
2584 int fromlen;
2585 int len, i, msg_flags;
2586 int error = 0;
2587#ifdef KTRACE
2588 struct uio *ktruio = NULL;
2589#endif
2590 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
2591 if (error) {
2592 return (error);
2593 }
2594 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
2595 if (error) {
2596 goto out1;
2597 }
2598
2599 so = fp->f_data;
2600#ifdef MAC
2601 SOCK_LOCK(so);
2602 error = mac_socket_check_receive(td->td_ucred, so);
2603 SOCK_UNLOCK(so);
2604 if (error) {
2605 goto out;
2606 return (error);
2607 }
2608#endif /* MAC */
2609
2610 if (uap->fromlenaddr) {
2611 error = copyin(uap->fromlenaddr,
2612 &fromlen, sizeof (fromlen));
2613 if (error) {
2614 goto out;
2615 }
2616 } else {
2617 fromlen = 0;
2618 }
2619 if(uap->msg_flags) {
2620 error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
2621 if (error) {
2622 goto out;
2623 }
2624 } else {
2625 msg_flags = 0;
2626 }
2627 auio.uio_iov = iov;
2628 auio.uio_iovcnt = uap->iovlen;
2629 auio.uio_segflg = UIO_USERSPACE;
2630 auio.uio_rw = UIO_READ;
2631 auio.uio_td = td;
2632 auio.uio_offset = 0; /* XXX */
2633 auio.uio_resid = 0;
2634 tiov = iov;
2635 for (i = 0; i <uap->iovlen; i++, tiov++) {
2636 if ((auio.uio_resid += tiov->iov_len) < 0) {
2637 error = EINVAL;
2638 goto out;
2639 }
2640 }
2641 len = auio.uio_resid;
2642 fromsa = (struct sockaddr *)sockbufstore;
2643
2644#ifdef KTRACE
2645 if (KTRPOINT(td, KTR_GENIO))
2646 ktruio = cloneuio(&auio);
2647#endif /* KTRACE */
2648 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
2649 fromsa, fromlen, &msg_flags,
2650 (struct sctp_sndrcvinfo *)&sinfo, 1);
2651 if (error) {
2652 if (auio.uio_resid != (int)len && (error == ERESTART ||
2653 error == EINTR || error == EWOULDBLOCK))
2654 error = 0;
2655 } else {
2656 if (uap->sinfo)
2657 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
2658 }
2659#ifdef KTRACE
2660 if (ktruio != NULL) {
2661 ktruio->uio_resid = (int)len - auio.uio_resid;
2662 ktrgenio(uap->sd, UIO_READ, ktruio, error);
2663 }
2664#endif /* KTRACE */
2665 if (error)
2666 goto out;
2667 td->td_retval[0] = (int)len - auio.uio_resid;
2668
2669 if (fromlen && uap->from) {
2670 len = fromlen;
2671 if (len <= 0 || fromsa == 0)
2672 len = 0;
2673 else {
2674 len = MIN(len, fromsa->sa_len);
2675 error = copyout(fromsa, uap->from, (unsigned)len);
2676 if (error)
2677 goto out;
2678 }
2679 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
2680 if (error) {
2681 goto out;
2682 }
2683 }
2684#ifdef KTRACE
2685 if (KTRPOINT(td, KTR_STRUCT))
2686 ktrsockaddr(fromsa);
2687#endif
2688 if (uap->msg_flags) {
2689 error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
2690 if (error) {
2691 goto out;
2692 }
2693 }
2694out:
2695 free(iov, M_IOV);
2696out1:
2697 if (fp)
2698 fdrop(fp, td);
2699
2700 return (error);
2701#else /* SCTP */
2702 return (EOPNOTSUPP);
2703#endif /* SCTP */
2704}