Deleted Added
full compact
kern_sendfile.c (129043) kern_sendfile.c (129906)
1/*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
1/*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 129043 2004-05-08 02:24:21Z rwatson $");
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 129906 2004-05-31 21:46:06Z bmilekic $");
37
38#include "opt_compat.h"
39#include "opt_ktrace.h"
40#include "opt_mac.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/lock.h>
46#include <sys/mac.h>
47#include <sys/mutex.h>
48#include <sys/sysproto.h>
49#include <sys/malloc.h>
50#include <sys/filedesc.h>
51#include <sys/event.h>
52#include <sys/proc.h>
53#include <sys/fcntl.h>
54#include <sys/file.h>
55#include <sys/filio.h>
56#include <sys/mount.h>
57#include <sys/mbuf.h>
58#include <sys/protosw.h>
59#include <sys/sf_buf.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/signalvar.h>
63#include <sys/syscallsubr.h>
37
38#include "opt_compat.h"
39#include "opt_ktrace.h"
40#include "opt_mac.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/lock.h>
46#include <sys/mac.h>
47#include <sys/mutex.h>
48#include <sys/sysproto.h>
49#include <sys/malloc.h>
50#include <sys/filedesc.h>
51#include <sys/event.h>
52#include <sys/proc.h>
53#include <sys/fcntl.h>
54#include <sys/file.h>
55#include <sys/filio.h>
56#include <sys/mount.h>
57#include <sys/mbuf.h>
58#include <sys/protosw.h>
59#include <sys/sf_buf.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/signalvar.h>
63#include <sys/syscallsubr.h>
64#include <sys/sysctl.h>
64#include <sys/uio.h>
65#include <sys/vnode.h>
66#ifdef KTRACE
67#include <sys/ktrace.h>
68#endif
69
70#include <vm/vm.h>
71#include <vm/vm_object.h>
72#include <vm/vm_page.h>
73#include <vm/vm_pageout.h>
74#include <vm/vm_kern.h>
75#include <vm/vm_extern.h>
76
77static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
78static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
79
80static int accept1(struct thread *td, struct accept_args *uap, int compat);
81static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
82static int getsockname1(struct thread *td, struct getsockname_args *uap,
83 int compat);
84static int getpeername1(struct thread *td, struct getpeername_args *uap,
85 int compat);
86
87/*
65#include <sys/uio.h>
66#include <sys/vnode.h>
67#ifdef KTRACE
68#include <sys/ktrace.h>
69#endif
70
71#include <vm/vm.h>
72#include <vm/vm_object.h>
73#include <vm/vm_page.h>
74#include <vm/vm_pageout.h>
75#include <vm/vm_kern.h>
76#include <vm/vm_extern.h>
77
78static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
79static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
80
81static int accept1(struct thread *td, struct accept_args *uap, int compat);
82static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
83static int getsockname1(struct thread *td, struct getsockname_args *uap,
84 int compat);
85static int getpeername1(struct thread *td, struct getpeername_args *uap,
86 int compat);
87
88/*
89 * NSFBUFS-related variables and associated sysctls
90 */
91int nsfbufs;
92int nsfbufspeak;
93int nsfbufsused;
94
95SYSCTL_DECL(_kern_ipc);
96SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
97 "Maximum number of sendfile(2) sf_bufs available");
98SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
99 "Number of sendfile(2) sf_bufs at peak usage");
100SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
101 "Number of sendfile(2) sf_bufs in use");
102
103/*
88 * System call interface to the socket abstraction.
89 */
90#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
91#define COMPAT_OLDSOCK
92#endif
93
94/*
95 * MPSAFE
96 */
97int
98socket(td, uap)
99 struct thread *td;
100 register struct socket_args /* {
101 int domain;
102 int type;
103 int protocol;
104 } */ *uap;
105{
106 struct filedesc *fdp;
107 struct socket *so;
108 struct file *fp;
109 int fd, error;
110
111 fdp = td->td_proc->p_fd;
112 error = falloc(td, &fp, &fd);
113 if (error)
114 return (error);
115 /* An extra reference on `fp' has been held for us by falloc(). */
116 NET_LOCK_GIANT();
117 error = socreate(uap->domain, &so, uap->type, uap->protocol,
118 td->td_ucred, td);
119 NET_UNLOCK_GIANT();
120 FILEDESC_LOCK(fdp);
121 if (error) {
122 if (fdp->fd_ofiles[fd] == fp) {
123 fdp->fd_ofiles[fd] = NULL;
124 fdunused(fdp, fd);
125 FILEDESC_UNLOCK(fdp);
126 fdrop(fp, td);
127 } else {
128 FILEDESC_UNLOCK(fdp);
129 }
130 } else {
131 fp->f_data = so; /* already has ref count */
132 fp->f_flag = FREAD|FWRITE;
133 fp->f_ops = &socketops;
134 fp->f_type = DTYPE_SOCKET;
135 FILEDESC_UNLOCK(fdp);
136 td->td_retval[0] = fd;
137 }
138 fdrop(fp, td);
139 return (error);
140}
141
142/*
143 * MPSAFE
144 */
145/* ARGSUSED */
146int
147bind(td, uap)
148 struct thread *td;
149 register struct bind_args /* {
150 int s;
151 caddr_t name;
152 int namelen;
153 } */ *uap;
154{
155 struct sockaddr *sa;
156 int error;
157
158 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
159 return (error);
160
161 return (kern_bind(td, uap->s, sa));
162}
163
164int
165kern_bind(td, fd, sa)
166 struct thread *td;
167 int fd;
168 struct sockaddr *sa;
169{
170 struct socket *so;
171 int error;
172
173 NET_LOCK_GIANT();
174 if ((error = fgetsock(td, fd, &so, NULL)) != 0)
175 goto done2;
176#ifdef MAC
177 error = mac_check_socket_bind(td->td_ucred, so, sa);
178 if (error)
179 goto done1;
180#endif
181 error = sobind(so, sa, td);
182#ifdef MAC
183done1:
184#endif
185 fputsock(so);
186done2:
187 NET_UNLOCK_GIANT();
188 FREE(sa, M_SONAME);
189 return (error);
190}
191
192/*
193 * MPSAFE
194 */
195/* ARGSUSED */
196int
197listen(td, uap)
198 struct thread *td;
199 register struct listen_args /* {
200 int s;
201 int backlog;
202 } */ *uap;
203{
204 struct socket *so;
205 int error;
206
207 NET_LOCK_GIANT();
208 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
209#ifdef MAC
210 error = mac_check_socket_listen(td->td_ucred, so);
211 if (error)
212 goto done;
213#endif
214 error = solisten(so, uap->backlog, td);
215#ifdef MAC
216done:
217#endif
218 fputsock(so);
219 }
220 NET_UNLOCK_GIANT();
221 return(error);
222}
223
224/*
225 * accept1()
226 * MPSAFE
227 */
228static int
229accept1(td, uap, compat)
230 struct thread *td;
231 register struct accept_args /* {
232 int s;
233 struct sockaddr * __restrict name;
234 socklen_t * __restrict anamelen;
235 } */ *uap;
236 int compat;
237{
238 struct filedesc *fdp;
239 struct file *nfp = NULL;
240 struct sockaddr *sa;
241 socklen_t namelen;
242 int error, s;
243 struct socket *head, *so;
244 int fd;
245 u_int fflag;
246 pid_t pgid;
247 int tmp;
248
249 fdp = td->td_proc->p_fd;
250 if (uap->name) {
251 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
252 if(error)
253 goto done3;
254 if (namelen < 0) {
255 error = EINVAL;
256 goto done3;
257 }
258 }
259 NET_LOCK_GIANT();
260 error = fgetsock(td, uap->s, &head, &fflag);
261 if (error)
262 goto done2;
263 s = splnet();
264 if ((head->so_options & SO_ACCEPTCONN) == 0) {
265 splx(s);
266 error = EINVAL;
267 goto done;
268 }
269 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
270 if (head->so_state & SS_CANTRCVMORE) {
271 head->so_error = ECONNABORTED;
272 break;
273 }
274 if ((head->so_state & SS_NBIO) != 0) {
275 head->so_error = EWOULDBLOCK;
276 break;
277 }
278 error = tsleep(&head->so_timeo, PSOCK | PCATCH,
279 "accept", 0);
280 if (error) {
281 splx(s);
282 goto done;
283 }
284 }
285 if (head->so_error) {
286 error = head->so_error;
287 head->so_error = 0;
288 splx(s);
289 goto done;
290 }
291
292 /*
293 * At this point we know that there is at least one connection
294 * ready to be accepted. Remove it from the queue prior to
295 * allocating the file descriptor for it since falloc() may
296 * block allowing another process to accept the connection
297 * instead.
298 */
299 so = TAILQ_FIRST(&head->so_comp);
300 TAILQ_REMOVE(&head->so_comp, so, so_list);
301 head->so_qlen--;
302
303 error = falloc(td, &nfp, &fd);
304 if (error) {
305 /*
306 * Probably ran out of file descriptors. Put the
307 * unaccepted connection back onto the queue and
308 * do another wakeup so some other process might
309 * have a chance at it.
310 */
311 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
312 head->so_qlen++;
313 wakeup_one(&head->so_timeo);
314 splx(s);
315 goto done;
316 }
317 /* An extra reference on `nfp' has been held for us by falloc(). */
318 td->td_retval[0] = fd;
319
320 /* connection has been removed from the listen queue */
321 KNOTE(&head->so_rcv.sb_sel.si_note, 0);
322
323 so->so_state &= ~SS_COMP;
324 so->so_head = NULL;
325 pgid = fgetown(&head->so_sigio);
326 if (pgid != 0)
327 fsetown(pgid, &so->so_sigio);
328
329 FILE_LOCK(nfp);
330 soref(so); /* file descriptor reference */
331 nfp->f_data = so; /* nfp has ref count from falloc */
332 nfp->f_flag = fflag;
333 nfp->f_ops = &socketops;
334 nfp->f_type = DTYPE_SOCKET;
335 FILE_UNLOCK(nfp);
336 /* Sync socket nonblocking/async state with file flags */
337 tmp = fflag & FNONBLOCK;
338 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
339 tmp = fflag & FASYNC;
340 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
341 sa = 0;
342 error = soaccept(so, &sa);
343 if (error) {
344 /*
345 * return a namelen of zero for older code which might
346 * ignore the return value from accept.
347 */
348 if (uap->name != NULL) {
349 namelen = 0;
350 (void) copyout(&namelen,
351 uap->anamelen, sizeof(*uap->anamelen));
352 }
353 goto noconnection;
354 }
355 if (sa == NULL) {
356 namelen = 0;
357 if (uap->name)
358 goto gotnoname;
359 splx(s);
360 error = 0;
361 goto done;
362 }
363 if (uap->name) {
364 /* check sa_len before it is destroyed */
365 if (namelen > sa->sa_len)
366 namelen = sa->sa_len;
367#ifdef COMPAT_OLDSOCK
368 if (compat)
369 ((struct osockaddr *)sa)->sa_family =
370 sa->sa_family;
371#endif
372 error = copyout(sa, uap->name, (u_int)namelen);
373 if (!error)
374gotnoname:
375 error = copyout(&namelen,
376 uap->anamelen, sizeof (*uap->anamelen));
377 }
378noconnection:
379 if (sa)
380 FREE(sa, M_SONAME);
381
382 /*
383 * close the new descriptor, assuming someone hasn't ripped it
384 * out from under us.
385 */
386 if (error) {
387 FILEDESC_LOCK(fdp);
388 if (fdp->fd_ofiles[fd] == nfp) {
389 fdp->fd_ofiles[fd] = NULL;
390 fdunused(fdp, fd);
391 FILEDESC_UNLOCK(fdp);
392 fdrop(nfp, td);
393 } else {
394 FILEDESC_UNLOCK(fdp);
395 }
396 }
397 splx(s);
398
399 /*
400 * Release explicitly held references before returning.
401 */
402done:
403 if (nfp != NULL)
404 fdrop(nfp, td);
405 fputsock(head);
406done2:
407 NET_UNLOCK_GIANT();
408done3:
409 return (error);
410}
411
412/*
413 * MPSAFE (accept1() is MPSAFE)
414 */
415int
416accept(td, uap)
417 struct thread *td;
418 struct accept_args *uap;
419{
420
421 return (accept1(td, uap, 0));
422}
423
424#ifdef COMPAT_OLDSOCK
425/*
426 * MPSAFE (accept1() is MPSAFE)
427 */
428int
429oaccept(td, uap)
430 struct thread *td;
431 struct accept_args *uap;
432{
433
434 return (accept1(td, uap, 1));
435}
436#endif /* COMPAT_OLDSOCK */
437
438/*
439 * MPSAFE
440 */
441/* ARGSUSED */
442int
443connect(td, uap)
444 struct thread *td;
445 register struct connect_args /* {
446 int s;
447 caddr_t name;
448 int namelen;
449 } */ *uap;
450{
451 struct sockaddr *sa;
452 int error;
453
454 error = getsockaddr(&sa, uap->name, uap->namelen);
455 if (error)
456 return (error);
457
458 return (kern_connect(td, uap->s, sa));
459}
460
461
462int
463kern_connect(td, fd, sa)
464 struct thread *td;
465 int fd;
466 struct sockaddr *sa;
467{
468 struct socket *so;
469 int error, s;
470 int interrupted = 0;
471
472 NET_LOCK_GIANT();
473 if ((error = fgetsock(td, fd, &so, NULL)) != 0)
474 goto done2;
475 if (so->so_state & SS_ISCONNECTING) {
476 error = EALREADY;
477 goto done1;
478 }
479#ifdef MAC
480 error = mac_check_socket_connect(td->td_ucred, so, sa);
481 if (error)
482 goto bad;
483#endif
484 error = soconnect(so, sa, td);
485 if (error)
486 goto bad;
487 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
488 error = EINPROGRESS;
489 goto done1;
490 }
491 s = splnet();
492 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
493 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0);
494 if (error) {
495 if (error == EINTR || error == ERESTART)
496 interrupted = 1;
497 break;
498 }
499 }
500 if (error == 0) {
501 error = so->so_error;
502 so->so_error = 0;
503 }
504 splx(s);
505bad:
506 if (!interrupted)
507 so->so_state &= ~SS_ISCONNECTING;
508 if (error == ERESTART)
509 error = EINTR;
510done1:
511 fputsock(so);
512done2:
513 NET_UNLOCK_GIANT();
514 FREE(sa, M_SONAME);
515 return (error);
516}
517
518/*
519 * MPSAFE
520 */
521int
522socketpair(td, uap)
523 struct thread *td;
524 register struct socketpair_args /* {
525 int domain;
526 int type;
527 int protocol;
528 int *rsv;
529 } */ *uap;
530{
531 register struct filedesc *fdp = td->td_proc->p_fd;
532 struct file *fp1, *fp2;
533 struct socket *so1, *so2;
534 int fd, error, sv[2];
535
536 NET_LOCK_GIANT();
537 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
538 td->td_ucred, td);
539 if (error)
540 goto done2;
541 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
542 td->td_ucred, td);
543 if (error)
544 goto free1;
545 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
546 error = falloc(td, &fp1, &fd);
547 if (error)
548 goto free2;
549 sv[0] = fd;
550 fp1->f_data = so1; /* so1 already has ref count */
551 error = falloc(td, &fp2, &fd);
552 if (error)
553 goto free3;
554 fp2->f_data = so2; /* so2 already has ref count */
555 sv[1] = fd;
556 error = soconnect2(so1, so2);
557 if (error)
558 goto free4;
559 if (uap->type == SOCK_DGRAM) {
560 /*
561 * Datagram socket connection is asymmetric.
562 */
563 error = soconnect2(so2, so1);
564 if (error)
565 goto free4;
566 }
567 FILE_LOCK(fp1);
568 fp1->f_flag = FREAD|FWRITE;
569 fp1->f_ops = &socketops;
570 fp1->f_type = DTYPE_SOCKET;
571 FILE_UNLOCK(fp1);
572 FILE_LOCK(fp2);
573 fp2->f_flag = FREAD|FWRITE;
574 fp2->f_ops = &socketops;
575 fp2->f_type = DTYPE_SOCKET;
576 FILE_UNLOCK(fp2);
577 error = copyout(sv, uap->rsv, 2 * sizeof (int));
578 fdrop(fp1, td);
579 fdrop(fp2, td);
580 goto done2;
581free4:
582 FILEDESC_LOCK(fdp);
583 if (fdp->fd_ofiles[sv[1]] == fp2) {
584 fdp->fd_ofiles[sv[1]] = NULL;
585 fdunused(fdp, sv[1]);
586 FILEDESC_UNLOCK(fdp);
587 fdrop(fp2, td);
588 } else {
589 FILEDESC_UNLOCK(fdp);
590 }
591 fdrop(fp2, td);
592free3:
593 FILEDESC_LOCK(fdp);
594 if (fdp->fd_ofiles[sv[0]] == fp1) {
595 fdp->fd_ofiles[sv[0]] = NULL;
596 fdunused(fdp, sv[0]);
597 FILEDESC_UNLOCK(fdp);
598 fdrop(fp1, td);
599 } else {
600 FILEDESC_UNLOCK(fdp);
601 }
602 fdrop(fp1, td);
603free2:
604 (void)soclose(so2);
605free1:
606 (void)soclose(so1);
607done2:
608 NET_UNLOCK_GIANT();
609 return (error);
610}
611
612static int
613sendit(td, s, mp, flags)
614 register struct thread *td;
615 int s;
616 register struct msghdr *mp;
617 int flags;
618{
619 struct mbuf *control;
620 struct sockaddr *to;
621 int error;
622
623 if (mp->msg_name != NULL) {
624 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
625 if (error) {
626 to = NULL;
627 goto bad;
628 }
629 mp->msg_name = to;
630 } else {
631 to = NULL;
632 }
633
634 if (mp->msg_control) {
635 if (mp->msg_controllen < sizeof(struct cmsghdr)
636#ifdef COMPAT_OLDSOCK
637 && mp->msg_flags != MSG_COMPAT
638#endif
639 ) {
640 error = EINVAL;
641 goto bad;
642 }
643 error = sockargs(&control, mp->msg_control,
644 mp->msg_controllen, MT_CONTROL);
645 if (error)
646 goto bad;
647#ifdef COMPAT_OLDSOCK
648 if (mp->msg_flags == MSG_COMPAT) {
649 register struct cmsghdr *cm;
650
651 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
652 if (control == 0) {
653 error = ENOBUFS;
654 goto bad;
655 } else {
656 cm = mtod(control, struct cmsghdr *);
657 cm->cmsg_len = control->m_len;
658 cm->cmsg_level = SOL_SOCKET;
659 cm->cmsg_type = SCM_RIGHTS;
660 }
661 }
662#endif
663 } else {
664 control = NULL;
665 }
666
667 error = kern_sendit(td, s, mp, flags, control);
668
669bad:
670 if (to)
671 FREE(to, M_SONAME);
672 return (error);
673}
674
675int
676kern_sendit(td, s, mp, flags, control)
677 struct thread *td;
678 int s;
679 struct msghdr *mp;
680 int flags;
681 struct mbuf *control;
682{
683 struct uio auio;
684 struct iovec *iov;
685 struct socket *so;
686 int i;
687 int len, error;
688#ifdef KTRACE
689 struct iovec *ktriov = NULL;
690 struct uio ktruio;
691 int iovlen;
692#endif
693
694 NET_LOCK_GIANT();
695 if ((error = fgetsock(td, s, &so, NULL)) != 0)
696 goto bad2;
697
698#ifdef MAC
699 error = mac_check_socket_send(td->td_ucred, so);
700 if (error)
701 goto bad;
702#endif
703
704 auio.uio_iov = mp->msg_iov;
705 auio.uio_iovcnt = mp->msg_iovlen;
706 auio.uio_segflg = UIO_USERSPACE;
707 auio.uio_rw = UIO_WRITE;
708 auio.uio_td = td;
709 auio.uio_offset = 0; /* XXX */
710 auio.uio_resid = 0;
711 iov = mp->msg_iov;
712 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
713 if ((auio.uio_resid += iov->iov_len) < 0) {
714 error = EINVAL;
715 goto bad;
716 }
717 }
718#ifdef KTRACE
719 if (KTRPOINT(td, KTR_GENIO)) {
720 iovlen = auio.uio_iovcnt * sizeof (struct iovec);
721 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
722 bcopy(auio.uio_iov, ktriov, iovlen);
723 ktruio = auio;
724 }
725#endif
726 len = auio.uio_resid;
727 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
728 0, control, flags, td);
729 if (error) {
730 if (auio.uio_resid != len && (error == ERESTART ||
731 error == EINTR || error == EWOULDBLOCK))
732 error = 0;
733 /* Generation of SIGPIPE can be controlled per socket */
734 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
735 PROC_LOCK(td->td_proc);
736 psignal(td->td_proc, SIGPIPE);
737 PROC_UNLOCK(td->td_proc);
738 }
739 }
740 if (error == 0)
741 td->td_retval[0] = len - auio.uio_resid;
742#ifdef KTRACE
743 if (ktriov != NULL) {
744 if (error == 0) {
745 ktruio.uio_iov = ktriov;
746 ktruio.uio_resid = td->td_retval[0];
747 ktrgenio(s, UIO_WRITE, &ktruio, error);
748 }
749 FREE(ktriov, M_TEMP);
750 }
751#endif
752bad:
753 fputsock(so);
754bad2:
755 NET_UNLOCK_GIANT();
756 return (error);
757}
758
759/*
760 * MPSAFE
761 */
762int
763sendto(td, uap)
764 struct thread *td;
765 register struct sendto_args /* {
766 int s;
767 caddr_t buf;
768 size_t len;
769 int flags;
770 caddr_t to;
771 int tolen;
772 } */ *uap;
773{
774 struct msghdr msg;
775 struct iovec aiov;
776 int error;
777
778 msg.msg_name = uap->to;
779 msg.msg_namelen = uap->tolen;
780 msg.msg_iov = &aiov;
781 msg.msg_iovlen = 1;
782 msg.msg_control = 0;
783#ifdef COMPAT_OLDSOCK
784 msg.msg_flags = 0;
785#endif
786 aiov.iov_base = uap->buf;
787 aiov.iov_len = uap->len;
788 error = sendit(td, uap->s, &msg, uap->flags);
789 return (error);
790}
791
792#ifdef COMPAT_OLDSOCK
793/*
794 * MPSAFE
795 */
796int
797osend(td, uap)
798 struct thread *td;
799 register struct osend_args /* {
800 int s;
801 caddr_t buf;
802 int len;
803 int flags;
804 } */ *uap;
805{
806 struct msghdr msg;
807 struct iovec aiov;
808 int error;
809
810 msg.msg_name = 0;
811 msg.msg_namelen = 0;
812 msg.msg_iov = &aiov;
813 msg.msg_iovlen = 1;
814 aiov.iov_base = uap->buf;
815 aiov.iov_len = uap->len;
816 msg.msg_control = 0;
817 msg.msg_flags = 0;
818 error = sendit(td, uap->s, &msg, uap->flags);
819 return (error);
820}
821
822/*
823 * MPSAFE
824 */
825int
826osendmsg(td, uap)
827 struct thread *td;
828 register struct osendmsg_args /* {
829 int s;
830 caddr_t msg;
831 int flags;
832 } */ *uap;
833{
834 struct msghdr msg;
835 struct iovec aiov[UIO_SMALLIOV], *iov;
836 int error;
837
838 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
839 if (error)
840 goto done2;
841 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
842 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
843 error = EMSGSIZE;
844 goto done2;
845 }
846 MALLOC(iov, struct iovec *,
847 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
848 M_WAITOK);
849 } else {
850 iov = aiov;
851 }
852 error = copyin(msg.msg_iov, iov,
853 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
854 if (error)
855 goto done;
856 msg.msg_flags = MSG_COMPAT;
857 msg.msg_iov = iov;
858 error = sendit(td, uap->s, &msg, uap->flags);
859done:
860 if (iov != aiov)
861 FREE(iov, M_IOV);
862done2:
863 return (error);
864}
865#endif
866
867/*
868 * MPSAFE
869 */
870int
871sendmsg(td, uap)
872 struct thread *td;
873 register struct sendmsg_args /* {
874 int s;
875 caddr_t msg;
876 int flags;
877 } */ *uap;
878{
879 struct msghdr msg;
880 struct iovec aiov[UIO_SMALLIOV], *iov;
881 int error;
882
883 error = copyin(uap->msg, &msg, sizeof (msg));
884 if (error)
885 goto done2;
886 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
887 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
888 error = EMSGSIZE;
889 goto done2;
890 }
891 MALLOC(iov, struct iovec *,
892 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
893 M_WAITOK);
894 } else {
895 iov = aiov;
896 }
897 if (msg.msg_iovlen &&
898 (error = copyin(msg.msg_iov, iov,
899 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
900 goto done;
901 msg.msg_iov = iov;
902#ifdef COMPAT_OLDSOCK
903 msg.msg_flags = 0;
904#endif
905 error = sendit(td, uap->s, &msg, uap->flags);
906done:
907 if (iov != aiov)
908 FREE(iov, M_IOV);
909done2:
910 return (error);
911}
912
913static int
914recvit(td, s, mp, namelenp)
915 register struct thread *td;
916 int s;
917 register struct msghdr *mp;
918 void *namelenp;
919{
920 struct uio auio;
921 register struct iovec *iov;
922 register int i;
923 socklen_t len;
924 int error;
925 struct mbuf *m, *control = 0;
926 caddr_t ctlbuf;
927 struct socket *so;
928 struct sockaddr *fromsa = 0;
929#ifdef KTRACE
930 struct iovec *ktriov = NULL;
931 struct uio ktruio;
932 int iovlen;
933#endif
934
935 NET_LOCK_GIANT();
936 if ((error = fgetsock(td, s, &so, NULL)) != 0) {
937 NET_UNLOCK_GIANT();
938 return (error);
939 }
940
941#ifdef MAC
942 error = mac_check_socket_receive(td->td_ucred, so);
943 if (error) {
944 fputsock(so);
945 NET_UNLOCK_GIANT();
946 return (error);
947 }
948#endif
949
950 auio.uio_iov = mp->msg_iov;
951 auio.uio_iovcnt = mp->msg_iovlen;
952 auio.uio_segflg = UIO_USERSPACE;
953 auio.uio_rw = UIO_READ;
954 auio.uio_td = td;
955 auio.uio_offset = 0; /* XXX */
956 auio.uio_resid = 0;
957 iov = mp->msg_iov;
958 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
959 if ((auio.uio_resid += iov->iov_len) < 0) {
960 fputsock(so);
961 NET_UNLOCK_GIANT();
962 return (EINVAL);
963 }
964 }
965#ifdef KTRACE
966 if (KTRPOINT(td, KTR_GENIO)) {
967 iovlen = auio.uio_iovcnt * sizeof (struct iovec);
968 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
969 bcopy(auio.uio_iov, ktriov, iovlen);
970 ktruio = auio;
971 }
972#endif
973 len = auio.uio_resid;
974 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
975 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
976 &mp->msg_flags);
977 if (error) {
978 if (auio.uio_resid != (int)len && (error == ERESTART ||
979 error == EINTR || error == EWOULDBLOCK))
980 error = 0;
981 }
982#ifdef KTRACE
983 if (ktriov != NULL) {
984 if (error == 0) {
985 ktruio.uio_iov = ktriov;
986 ktruio.uio_resid = (int)len - auio.uio_resid;
987 ktrgenio(s, UIO_READ, &ktruio, error);
988 }
989 FREE(ktriov, M_TEMP);
990 }
991#endif
992 if (error)
993 goto out;
994 td->td_retval[0] = (int)len - auio.uio_resid;
995 if (mp->msg_name) {
996 len = mp->msg_namelen;
997 if (len <= 0 || fromsa == 0)
998 len = 0;
999 else {
1000 /* save sa_len before it is destroyed by MSG_COMPAT */
1001 len = MIN(len, fromsa->sa_len);
1002#ifdef COMPAT_OLDSOCK
1003 if (mp->msg_flags & MSG_COMPAT)
1004 ((struct osockaddr *)fromsa)->sa_family =
1005 fromsa->sa_family;
1006#endif
1007 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1008 if (error)
1009 goto out;
1010 }
1011 mp->msg_namelen = len;
1012 if (namelenp &&
1013 (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
1014#ifdef COMPAT_OLDSOCK
1015 if (mp->msg_flags & MSG_COMPAT)
1016 error = 0; /* old recvfrom didn't check */
1017 else
1018#endif
1019 goto out;
1020 }
1021 }
1022 if (mp->msg_control) {
1023#ifdef COMPAT_OLDSOCK
1024 /*
1025 * We assume that old recvmsg calls won't receive access
1026 * rights and other control info, esp. as control info
1027 * is always optional and those options didn't exist in 4.3.
1028 * If we receive rights, trim the cmsghdr; anything else
1029 * is tossed.
1030 */
1031 if (control && mp->msg_flags & MSG_COMPAT) {
1032 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1033 SOL_SOCKET ||
1034 mtod(control, struct cmsghdr *)->cmsg_type !=
1035 SCM_RIGHTS) {
1036 mp->msg_controllen = 0;
1037 goto out;
1038 }
1039 control->m_len -= sizeof (struct cmsghdr);
1040 control->m_data += sizeof (struct cmsghdr);
1041 }
1042#endif
1043 len = mp->msg_controllen;
1044 m = control;
1045 mp->msg_controllen = 0;
1046 ctlbuf = mp->msg_control;
1047
1048 while (m && len > 0) {
1049 unsigned int tocopy;
1050
1051 if (len >= m->m_len)
1052 tocopy = m->m_len;
1053 else {
1054 mp->msg_flags |= MSG_CTRUNC;
1055 tocopy = len;
1056 }
1057
1058 if ((error = copyout(mtod(m, caddr_t),
1059 ctlbuf, tocopy)) != 0)
1060 goto out;
1061
1062 ctlbuf += tocopy;
1063 len -= tocopy;
1064 m = m->m_next;
1065 }
1066 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1067 }
1068out:
1069 fputsock(so);
1070 NET_UNLOCK_GIANT();
1071 if (fromsa)
1072 FREE(fromsa, M_SONAME);
1073 if (control)
1074 m_freem(control);
1075 return (error);
1076}
1077
1078/*
1079 * MPSAFE
1080 */
1081int
1082recvfrom(td, uap)
1083 struct thread *td;
1084 register struct recvfrom_args /* {
1085 int s;
1086 caddr_t buf;
1087 size_t len;
1088 int flags;
1089 struct sockaddr * __restrict from;
1090 socklen_t * __restrict fromlenaddr;
1091 } */ *uap;
1092{
1093 struct msghdr msg;
1094 struct iovec aiov;
1095 int error;
1096
1097 if (uap->fromlenaddr) {
1098 error = copyin(uap->fromlenaddr,
1099 &msg.msg_namelen, sizeof (msg.msg_namelen));
1100 if (error)
1101 goto done2;
1102 } else {
1103 msg.msg_namelen = 0;
1104 }
1105 msg.msg_name = uap->from;
1106 msg.msg_iov = &aiov;
1107 msg.msg_iovlen = 1;
1108 aiov.iov_base = uap->buf;
1109 aiov.iov_len = uap->len;
1110 msg.msg_control = 0;
1111 msg.msg_flags = uap->flags;
1112 error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1113done2:
1114 return(error);
1115}
1116
1117#ifdef COMPAT_OLDSOCK
1118/*
1119 * MPSAFE
1120 */
1121int
1122orecvfrom(td, uap)
1123 struct thread *td;
1124 struct recvfrom_args *uap;
1125{
1126
1127 uap->flags |= MSG_COMPAT;
1128 return (recvfrom(td, uap));
1129}
1130#endif
1131
1132
1133#ifdef COMPAT_OLDSOCK
1134/*
1135 * MPSAFE
1136 */
1137int
1138orecv(td, uap)
1139 struct thread *td;
1140 register struct orecv_args /* {
1141 int s;
1142 caddr_t buf;
1143 int len;
1144 int flags;
1145 } */ *uap;
1146{
1147 struct msghdr msg;
1148 struct iovec aiov;
1149 int error;
1150
1151 msg.msg_name = 0;
1152 msg.msg_namelen = 0;
1153 msg.msg_iov = &aiov;
1154 msg.msg_iovlen = 1;
1155 aiov.iov_base = uap->buf;
1156 aiov.iov_len = uap->len;
1157 msg.msg_control = 0;
1158 msg.msg_flags = uap->flags;
1159 error = recvit(td, uap->s, &msg, NULL);
1160 return (error);
1161}
1162
1163/*
1164 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1165 * overlays the new one, missing only the flags, and with the (old) access
1166 * rights where the control fields are now.
1167 *
1168 * MPSAFE
1169 */
1170int
1171orecvmsg(td, uap)
1172 struct thread *td;
1173 register struct orecvmsg_args /* {
1174 int s;
1175 struct omsghdr *msg;
1176 int flags;
1177 } */ *uap;
1178{
1179 struct msghdr msg;
1180 struct iovec aiov[UIO_SMALLIOV], *iov;
1181 int error;
1182
1183 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1184 if (error)
1185 return (error);
1186
1187 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1188 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1189 error = EMSGSIZE;
1190 goto done2;
1191 }
1192 MALLOC(iov, struct iovec *,
1193 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1194 M_WAITOK);
1195 } else {
1196 iov = aiov;
1197 }
1198 msg.msg_flags = uap->flags | MSG_COMPAT;
1199 error = copyin(msg.msg_iov, iov,
1200 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1201 if (error)
1202 goto done;
1203 msg.msg_iov = iov;
1204 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1205
1206 if (msg.msg_controllen && error == 0)
1207 error = copyout(&msg.msg_controllen,
1208 &uap->msg->msg_accrightslen, sizeof (int));
1209done:
1210 if (iov != aiov)
1211 FREE(iov, M_IOV);
1212done2:
1213 return (error);
1214}
1215#endif
1216
1217/*
1218 * MPSAFE
1219 */
1220int
1221recvmsg(td, uap)
1222 struct thread *td;
1223 register struct recvmsg_args /* {
1224 int s;
1225 struct msghdr *msg;
1226 int flags;
1227 } */ *uap;
1228{
1229 struct msghdr msg;
1230 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1231 register int error;
1232
1233 error = copyin(uap->msg, &msg, sizeof (msg));
1234 if (error)
1235 goto done2;
1236 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1237 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1238 error = EMSGSIZE;
1239 goto done2;
1240 }
1241 MALLOC(iov, struct iovec *,
1242 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1243 M_WAITOK);
1244 } else {
1245 iov = aiov;
1246 }
1247#ifdef COMPAT_OLDSOCK
1248 msg.msg_flags = uap->flags &~ MSG_COMPAT;
1249#else
1250 msg.msg_flags = uap->flags;
1251#endif
1252 uiov = msg.msg_iov;
1253 msg.msg_iov = iov;
1254 error = copyin(uiov, iov,
1255 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1256 if (error)
1257 goto done;
1258 error = recvit(td, uap->s, &msg, NULL);
1259 if (!error) {
1260 msg.msg_iov = uiov;
1261 error = copyout(&msg, uap->msg, sizeof(msg));
1262 }
1263done:
1264 if (iov != aiov)
1265 FREE(iov, M_IOV);
1266done2:
1267 return (error);
1268}
1269
1270/*
1271 * MPSAFE
1272 */
1273/* ARGSUSED */
1274int
1275shutdown(td, uap)
1276 struct thread *td;
1277 register struct shutdown_args /* {
1278 int s;
1279 int how;
1280 } */ *uap;
1281{
1282 struct socket *so;
1283 int error;
1284
1285 NET_LOCK_GIANT();
1286 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1287 error = soshutdown(so, uap->how);
1288 fputsock(so);
1289 }
1290 NET_UNLOCK_GIANT();
1291 return(error);
1292}
1293
1294/*
1295 * MPSAFE
1296 */
1297/* ARGSUSED */
1298int
1299setsockopt(td, uap)
1300 struct thread *td;
1301 register struct setsockopt_args /* {
1302 int s;
1303 int level;
1304 int name;
1305 caddr_t val;
1306 int valsize;
1307 } */ *uap;
1308{
1309 struct socket *so;
1310 struct sockopt sopt;
1311 int error;
1312
1313 if (uap->val == 0 && uap->valsize != 0)
1314 return (EFAULT);
1315 if (uap->valsize < 0)
1316 return (EINVAL);
1317
1318 NET_LOCK_GIANT();
1319 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1320 sopt.sopt_dir = SOPT_SET;
1321 sopt.sopt_level = uap->level;
1322 sopt.sopt_name = uap->name;
1323 sopt.sopt_val = uap->val;
1324 sopt.sopt_valsize = uap->valsize;
1325 sopt.sopt_td = td;
1326 error = sosetopt(so, &sopt);
1327 fputsock(so);
1328 }
1329 NET_UNLOCK_GIANT();
1330 return(error);
1331}
1332
1333/*
1334 * MPSAFE
1335 */
1336/* ARGSUSED */
1337int
1338getsockopt(td, uap)
1339 struct thread *td;
1340 register struct getsockopt_args /* {
1341 int s;
1342 int level;
1343 int name;
1344 void * __restrict val;
1345 socklen_t * __restrict avalsize;
1346 } */ *uap;
1347{
1348 socklen_t valsize;
1349 int error;
1350 struct socket *so;
1351 struct sockopt sopt;
1352
1353 NET_LOCK_GIANT();
1354 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1355 goto done2;
1356 if (uap->val) {
1357 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1358 if (error)
1359 goto done1;
1360 if (valsize < 0) {
1361 error = EINVAL;
1362 goto done1;
1363 }
1364 } else {
1365 valsize = 0;
1366 }
1367
1368 sopt.sopt_dir = SOPT_GET;
1369 sopt.sopt_level = uap->level;
1370 sopt.sopt_name = uap->name;
1371 sopt.sopt_val = uap->val;
1372 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1373 sopt.sopt_td = td;
1374
1375 error = sogetopt(so, &sopt);
1376 if (error == 0) {
1377 valsize = sopt.sopt_valsize;
1378 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1379 }
1380done1:
1381 fputsock(so);
1382done2:
1383 NET_UNLOCK_GIANT();
1384 return (error);
1385}
1386
1387/*
1388 * getsockname1() - Get socket name.
1389 *
1390 * MPSAFE
1391 */
1392/* ARGSUSED */
1393static int
1394getsockname1(td, uap, compat)
1395 struct thread *td;
1396 register struct getsockname_args /* {
1397 int fdes;
1398 struct sockaddr * __restrict asa;
1399 socklen_t * __restrict alen;
1400 } */ *uap;
1401 int compat;
1402{
1403 struct socket *so;
1404 struct sockaddr *sa;
1405 socklen_t len;
1406 int error;
1407
1408 NET_LOCK_GIANT();
1409 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1410 goto done2;
1411 error = copyin(uap->alen, &len, sizeof (len));
1412 if (error)
1413 goto done1;
1414 if (len < 0) {
1415 error = EINVAL;
1416 goto done1;
1417 }
1418 sa = 0;
1419 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1420 if (error)
1421 goto bad;
1422 if (sa == 0) {
1423 len = 0;
1424 goto gotnothing;
1425 }
1426
1427 len = MIN(len, sa->sa_len);
1428#ifdef COMPAT_OLDSOCK
1429 if (compat)
1430 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1431#endif
1432 error = copyout(sa, uap->asa, (u_int)len);
1433 if (error == 0)
1434gotnothing:
1435 error = copyout(&len, uap->alen, sizeof (len));
1436bad:
1437 if (sa)
1438 FREE(sa, M_SONAME);
1439done1:
1440 fputsock(so);
1441done2:
1442 NET_UNLOCK_GIANT();
1443 return (error);
1444}
1445
1446/*
1447 * MPSAFE
1448 */
1449int
1450getsockname(td, uap)
1451 struct thread *td;
1452 struct getsockname_args *uap;
1453{
1454
1455 return (getsockname1(td, uap, 0));
1456}
1457
1458#ifdef COMPAT_OLDSOCK
1459/*
1460 * MPSAFE
1461 */
1462int
1463ogetsockname(td, uap)
1464 struct thread *td;
1465 struct getsockname_args *uap;
1466{
1467
1468 return (getsockname1(td, uap, 1));
1469}
1470#endif /* COMPAT_OLDSOCK */
1471
1472/*
1473 * getpeername1() - Get name of peer for connected socket.
1474 *
1475 * MPSAFE
1476 */
1477/* ARGSUSED */
1478static int
1479getpeername1(td, uap, compat)
1480 struct thread *td;
1481 register struct getpeername_args /* {
1482 int fdes;
1483 struct sockaddr * __restrict asa;
1484 socklen_t * __restrict alen;
1485 } */ *uap;
1486 int compat;
1487{
1488 struct socket *so;
1489 struct sockaddr *sa;
1490 socklen_t len;
1491 int error;
1492
1493 NET_LOCK_GIANT();
1494 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1495 goto done2;
1496 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1497 error = ENOTCONN;
1498 goto done1;
1499 }
1500 error = copyin(uap->alen, &len, sizeof (len));
1501 if (error)
1502 goto done1;
1503 if (len < 0) {
1504 error = EINVAL;
1505 goto done1;
1506 }
1507 sa = 0;
1508 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1509 if (error)
1510 goto bad;
1511 if (sa == 0) {
1512 len = 0;
1513 goto gotnothing;
1514 }
1515 len = MIN(len, sa->sa_len);
1516#ifdef COMPAT_OLDSOCK
1517 if (compat)
1518 ((struct osockaddr *)sa)->sa_family =
1519 sa->sa_family;
1520#endif
1521 error = copyout(sa, uap->asa, (u_int)len);
1522 if (error)
1523 goto bad;
1524gotnothing:
1525 error = copyout(&len, uap->alen, sizeof (len));
1526bad:
1527 if (sa)
1528 FREE(sa, M_SONAME);
1529done1:
1530 fputsock(so);
1531done2:
1532 NET_UNLOCK_GIANT();
1533 return (error);
1534}
1535
1536/*
1537 * MPSAFE
1538 */
1539int
1540getpeername(td, uap)
1541 struct thread *td;
1542 struct getpeername_args *uap;
1543{
1544
1545 return (getpeername1(td, uap, 0));
1546}
1547
1548#ifdef COMPAT_OLDSOCK
1549/*
1550 * MPSAFE
1551 */
1552int
1553ogetpeername(td, uap)
1554 struct thread *td;
1555 struct ogetpeername_args *uap;
1556{
1557
1558 /* XXX uap should have type `getpeername_args *' to begin with. */
1559 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1560}
1561#endif /* COMPAT_OLDSOCK */
1562
1563int
1564sockargs(mp, buf, buflen, type)
1565 struct mbuf **mp;
1566 caddr_t buf;
1567 int buflen, type;
1568{
1569 register struct sockaddr *sa;
1570 register struct mbuf *m;
1571 int error;
1572
1573 if ((u_int)buflen > MLEN) {
1574#ifdef COMPAT_OLDSOCK
1575 if (type == MT_SONAME && (u_int)buflen <= 112)
1576 buflen = MLEN; /* unix domain compat. hack */
1577 else
1578#endif
1579 return (EINVAL);
1580 }
1581 m = m_get(M_TRYWAIT, type);
1582 if (m == NULL)
1583 return (ENOBUFS);
1584 m->m_len = buflen;
1585 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1586 if (error)
1587 (void) m_free(m);
1588 else {
1589 *mp = m;
1590 if (type == MT_SONAME) {
1591 sa = mtod(m, struct sockaddr *);
1592
1593#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1594 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1595 sa->sa_family = sa->sa_len;
1596#endif
1597 sa->sa_len = buflen;
1598 }
1599 }
1600 return (error);
1601}
1602
1603int
1604getsockaddr(namp, uaddr, len)
1605 struct sockaddr **namp;
1606 caddr_t uaddr;
1607 size_t len;
1608{
1609 struct sockaddr *sa;
1610 int error;
1611
1612 if (len > SOCK_MAXADDRLEN)
1613 return (ENAMETOOLONG);
1614 if (len < offsetof(struct sockaddr, sa_data[0]))
1615 return (EINVAL);
1616 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1617 error = copyin(uaddr, sa, len);
1618 if (error) {
1619 FREE(sa, M_SONAME);
1620 } else {
1621#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1622 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1623 sa->sa_family = sa->sa_len;
1624#endif
1625 sa->sa_len = len;
1626 *namp = sa;
1627 }
1628 return (error);
1629}
1630
1631/*
1632 * Detach mapped page and release resources back to the system.
1633 */
1634void
1635sf_buf_mext(void *addr, void *args)
1636{
1637 vm_page_t m;
1638
1639 m = sf_buf_page(args);
1640 sf_buf_free(args);
1641 vm_page_lock_queues();
1642 vm_page_unwire(m, 0);
1643 /*
1644 * Check for the object going away on us. This can
1645 * happen since we don't hold a reference to it.
1646 * If so, we're responsible for freeing the page.
1647 */
1648 if (m->wire_count == 0 && m->object == NULL)
1649 vm_page_free(m);
1650 vm_page_unlock_queues();
1651}
1652
1653/*
1654 * sendfile(2)
1655 *
1656 * MPSAFE
1657 *
1658 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1659 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1660 *
1661 * Send a file specified by 'fd' and starting at 'offset' to a socket
1662 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1663 * nbytes == 0. Optionally add a header and/or trailer to the socket
1664 * output. If specified, write the total number of bytes sent into *sbytes.
1665 *
1666 */
1667int
1668sendfile(struct thread *td, struct sendfile_args *uap)
1669{
1670
1671 return (do_sendfile(td, uap, 0));
1672}
1673
1674#ifdef COMPAT_FREEBSD4
1675int
1676freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1677{
1678 struct sendfile_args args;
1679
1680 args.fd = uap->fd;
1681 args.s = uap->s;
1682 args.offset = uap->offset;
1683 args.nbytes = uap->nbytes;
1684 args.hdtr = uap->hdtr;
1685 args.sbytes = uap->sbytes;
1686 args.flags = uap->flags;
1687
1688 return (do_sendfile(td, &args, 1));
1689}
1690#endif /* COMPAT_FREEBSD4 */
1691
1692static int
1693do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1694{
1695 struct vnode *vp;
1696 struct vm_object *obj;
1697 struct socket *so = NULL;
1698 struct mbuf *m, *m_header = NULL;
1699 struct sf_buf *sf;
1700 struct vm_page *pg;
1701 struct writev_args nuap;
1702 struct sf_hdtr hdtr;
1703 struct uio hdr_uio;
1704 off_t off, xfsize, hdtr_size, sbytes = 0;
1705 int error, s, headersize = 0, headersent = 0;
1706 struct iovec *hdr_iov = NULL;
1707
1708 mtx_lock(&Giant);
1709
1710 hdtr_size = 0;
1711
1712 /*
1713 * The descriptor must be a regular file and have a backing VM object.
1714 */
1715 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1716 goto done;
1717 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1718 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1719 error = EINVAL;
1720 VOP_UNLOCK(vp, 0, td);
1721 goto done;
1722 }
1723 VOP_UNLOCK(vp, 0, td);
1724 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1725 goto done;
1726 if (so->so_type != SOCK_STREAM) {
1727 error = EINVAL;
1728 goto done;
1729 }
1730 if ((so->so_state & SS_ISCONNECTED) == 0) {
1731 error = ENOTCONN;
1732 goto done;
1733 }
1734 if (uap->offset < 0) {
1735 error = EINVAL;
1736 goto done;
1737 }
1738
1739#ifdef MAC
1740 error = mac_check_socket_send(td->td_ucred, so);
1741 if (error)
1742 goto done;
1743#endif
1744
1745 /*
1746 * If specified, get the pointer to the sf_hdtr struct for
1747 * any headers/trailers.
1748 */
1749 if (uap->hdtr != NULL) {
1750 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1751 if (error)
1752 goto done;
1753 /*
1754 * Send any headers.
1755 */
1756 if (hdtr.headers != NULL) {
1757 hdr_uio.uio_td = td;
1758 hdr_uio.uio_rw = UIO_WRITE;
1759 error = uiofromiov(hdtr.headers, hdtr.hdr_cnt,
1760 &hdr_uio);
1761 if (error)
1762 goto done;
1763 /* Cache hdr_iov, m_uiotombuf may change it. */
1764 hdr_iov = hdr_uio.uio_iov;
1765 if (hdr_uio.uio_resid > 0) {
1766 m_header = m_uiotombuf(&hdr_uio, M_DONTWAIT, 0);
1767 if (m_header == NULL)
1768 goto done;
1769 headersize = m_header->m_pkthdr.len;
1770 if (compat)
1771 sbytes += headersize;
1772 }
1773 }
1774 }
1775
1776 /*
1777 * Protect against multiple writers to the socket.
1778 */
1779 (void) sblock(&so->so_snd, M_WAITOK);
1780
1781 /*
1782 * Loop through the pages in the file, starting with the requested
1783 * offset. Get a file page (do I/O if necessary), map the file page
1784 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1785 * it on the socket.
1786 */
1787 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1788 vm_pindex_t pindex;
1789 vm_offset_t pgoff;
1790
1791 pindex = OFF_TO_IDX(off);
1792 VM_OBJECT_LOCK(obj);
1793retry_lookup:
1794 /*
1795 * Calculate the amount to transfer. Not to exceed a page,
1796 * the EOF, or the passed in nbytes.
1797 */
1798 xfsize = obj->un_pager.vnp.vnp_size - off;
1799 VM_OBJECT_UNLOCK(obj);
1800 if (xfsize > PAGE_SIZE)
1801 xfsize = PAGE_SIZE;
1802 pgoff = (vm_offset_t)(off & PAGE_MASK);
1803 if (PAGE_SIZE - pgoff < xfsize)
1804 xfsize = PAGE_SIZE - pgoff;
1805 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1806 xfsize = uap->nbytes - sbytes;
1807 if (xfsize <= 0) {
1808 if (m_header != NULL) {
1809 m = m_header;
1810 m_header = NULL;
1811 goto retry_space;
1812 } else
1813 break;
1814 }
1815 /*
1816 * Optimize the non-blocking case by looking at the socket space
1817 * before going to the extra work of constituting the sf_buf.
1818 */
1819 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1820 if (so->so_state & SS_CANTSENDMORE)
1821 error = EPIPE;
1822 else
1823 error = EAGAIN;
1824 sbunlock(&so->so_snd);
1825 goto done;
1826 }
1827 VM_OBJECT_LOCK(obj);
1828 /*
1829 * Attempt to look up the page.
1830 *
1831 * Allocate if not found
1832 *
1833 * Wait and loop if busy.
1834 */
1835 pg = vm_page_lookup(obj, pindex);
1836
1837 if (pg == NULL) {
1838 pg = vm_page_alloc(obj, pindex,
1839 VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1840 if (pg == NULL) {
1841 VM_OBJECT_UNLOCK(obj);
1842 VM_WAIT;
1843 VM_OBJECT_LOCK(obj);
1844 goto retry_lookup;
1845 }
1846 vm_page_lock_queues();
1847 vm_page_wakeup(pg);
1848 } else {
1849 vm_page_lock_queues();
1850 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1851 goto retry_lookup;
1852 /*
1853 * Wire the page so it does not get ripped out from
1854 * under us.
1855 */
1856 vm_page_wire(pg);
1857 }
1858
1859 /*
1860 * If page is not valid for what we need, initiate I/O
1861 */
1862
1863 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
1864 VM_OBJECT_UNLOCK(obj);
1865 } else if (uap->flags & SF_NODISKIO) {
1866 error = EBUSY;
1867 } else {
1868 int bsize, resid;
1869
1870 /*
1871 * Ensure that our page is still around when the I/O
1872 * completes.
1873 */
1874 vm_page_io_start(pg);
1875 vm_page_unlock_queues();
1876 VM_OBJECT_UNLOCK(obj);
1877
1878 /*
1879 * Get the page from backing store.
1880 */
1881 bsize = vp->v_mount->mnt_stat.f_iosize;
1882 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1883 /*
1884 * XXXMAC: Because we don't have fp->f_cred here,
1885 * we pass in NOCRED. This is probably wrong, but
1886 * is consistent with our original implementation.
1887 */
1888 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1889 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1890 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
1891 td->td_ucred, NOCRED, &resid, td);
1892 VOP_UNLOCK(vp, 0, td);
1893 if (error)
1894 VM_OBJECT_LOCK(obj);
1895 vm_page_lock_queues();
1896 vm_page_io_finish(pg);
1897 mbstat.sf_iocnt++;
1898 }
1899
1900 if (error) {
1901 vm_page_unwire(pg, 0);
1902 /*
1903 * See if anyone else might know about this page.
1904 * If not and it is not valid, then free it.
1905 */
1906 if (pg->wire_count == 0 && pg->valid == 0 &&
1907 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1908 pg->hold_count == 0) {
1909 vm_page_busy(pg);
1910 vm_page_free(pg);
1911 }
1912 vm_page_unlock_queues();
1913 VM_OBJECT_UNLOCK(obj);
1914 sbunlock(&so->so_snd);
1915 goto done;
1916 }
1917 vm_page_unlock_queues();
1918
1919 /*
1920 * Get a sendfile buf. We usually wait as long as necessary,
1921 * but this wait can be interrupted.
1922 */
1923 if ((sf = sf_buf_alloc(pg, PCATCH)) == NULL) {
1924 mbstat.sf_allocfail++;
1925 vm_page_lock_queues();
1926 vm_page_unwire(pg, 0);
1927 if (pg->wire_count == 0 && pg->object == NULL)
1928 vm_page_free(pg);
1929 vm_page_unlock_queues();
1930 sbunlock(&so->so_snd);
1931 error = EINTR;
1932 goto done;
1933 }
1934
1935 /*
1936 * Get an mbuf header and set it up as having external storage.
1937 */
1938 if (m_header)
1939 MGET(m, M_TRYWAIT, MT_DATA);
1940 else
1941 MGETHDR(m, M_TRYWAIT, MT_DATA);
1942 if (m == NULL) {
1943 error = ENOBUFS;
1944 sf_buf_mext((void *)sf_buf_kva(sf), sf);
1945 sbunlock(&so->so_snd);
1946 goto done;
1947 }
1948 /*
1949 * Setup external storage for mbuf.
1950 */
1951 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
1952 EXT_SFBUF);
1953 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
1954 m->m_pkthdr.len = m->m_len = xfsize;
1955
1956 if (m_header) {
1957 m_cat(m_header, m);
1958 m = m_header;
1959 m_header = NULL;
1960 m_fixhdr(m);
1961 }
1962
1963 /*
1964 * Add the buffer to the socket buffer chain.
1965 */
1966 s = splnet();
1967retry_space:
1968 /*
1969 * Make sure that the socket is still able to take more data.
1970 * CANTSENDMORE being true usually means that the connection
1971 * was closed. so_error is true when an error was sensed after
1972 * a previous send.
1973 * The state is checked after the page mapping and buffer
1974 * allocation above since those operations may block and make
1975 * any socket checks stale. From this point forward, nothing
1976 * blocks before the pru_send (or more accurately, any blocking
1977 * results in a loop back to here to re-check).
1978 */
1979 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1980 if (so->so_state & SS_CANTSENDMORE) {
1981 error = EPIPE;
1982 } else {
1983 error = so->so_error;
1984 so->so_error = 0;
1985 }
1986 m_freem(m);
1987 sbunlock(&so->so_snd);
1988 splx(s);
1989 goto done;
1990 }
1991 /*
1992 * Wait for socket space to become available. We do this just
1993 * after checking the connection state above in order to avoid
1994 * a race condition with sbwait().
1995 */
1996 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1997 if (so->so_state & SS_NBIO) {
1998 m_freem(m);
1999 sbunlock(&so->so_snd);
2000 splx(s);
2001 error = EAGAIN;
2002 goto done;
2003 }
2004 error = sbwait(&so->so_snd);
2005 /*
2006 * An error from sbwait usually indicates that we've
2007 * been interrupted by a signal. If we've sent anything
2008 * then return bytes sent, otherwise return the error.
2009 */
2010 if (error) {
2011 m_freem(m);
2012 sbunlock(&so->so_snd);
2013 splx(s);
2014 goto done;
2015 }
2016 goto retry_space;
2017 }
2018 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2019 splx(s);
2020 if (error) {
2021 sbunlock(&so->so_snd);
2022 goto done;
2023 }
2024 headersent = 1;
2025 }
2026 sbunlock(&so->so_snd);
2027
2028 /*
2029 * Send trailers. Wimp out and use writev(2).
2030 */
2031 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2032 nuap.fd = uap->s;
2033 nuap.iovp = hdtr.trailers;
2034 nuap.iovcnt = hdtr.trl_cnt;
2035 error = writev(td, &nuap);
2036 if (error)
2037 goto done;
2038 if (compat)
2039 sbytes += td->td_retval[0];
2040 else
2041 hdtr_size += td->td_retval[0];
2042 }
2043
2044done:
2045 if (headersent) {
2046 if (!compat)
2047 hdtr_size += headersize;
2048 } else {
2049 if (compat)
2050 sbytes -= headersize;
2051 }
2052 /*
2053 * If there was no error we have to clear td->td_retval[0]
2054 * because it may have been set by writev.
2055 */
2056 if (error == 0) {
2057 td->td_retval[0] = 0;
2058 }
2059 if (uap->sbytes != NULL) {
2060 if (!compat)
2061 sbytes += hdtr_size;
2062 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2063 }
2064 if (vp)
2065 vrele(vp);
2066 if (so)
2067 fputsock(so);
2068 if (hdr_iov)
2069 FREE(hdr_iov, M_IOV);
2070 if (m_header)
2071 m_freem(m_header);
2072
2073 mtx_unlock(&Giant);
2074
2075 if (error == ERESTART)
2076 error = EINTR;
2077
2078 return (error);
2079}
104 * System call interface to the socket abstraction.
105 */
106#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
107#define COMPAT_OLDSOCK
108#endif
109
110/*
111 * MPSAFE
112 */
113int
114socket(td, uap)
115 struct thread *td;
116 register struct socket_args /* {
117 int domain;
118 int type;
119 int protocol;
120 } */ *uap;
121{
122 struct filedesc *fdp;
123 struct socket *so;
124 struct file *fp;
125 int fd, error;
126
127 fdp = td->td_proc->p_fd;
128 error = falloc(td, &fp, &fd);
129 if (error)
130 return (error);
131 /* An extra reference on `fp' has been held for us by falloc(). */
132 NET_LOCK_GIANT();
133 error = socreate(uap->domain, &so, uap->type, uap->protocol,
134 td->td_ucred, td);
135 NET_UNLOCK_GIANT();
136 FILEDESC_LOCK(fdp);
137 if (error) {
138 if (fdp->fd_ofiles[fd] == fp) {
139 fdp->fd_ofiles[fd] = NULL;
140 fdunused(fdp, fd);
141 FILEDESC_UNLOCK(fdp);
142 fdrop(fp, td);
143 } else {
144 FILEDESC_UNLOCK(fdp);
145 }
146 } else {
147 fp->f_data = so; /* already has ref count */
148 fp->f_flag = FREAD|FWRITE;
149 fp->f_ops = &socketops;
150 fp->f_type = DTYPE_SOCKET;
151 FILEDESC_UNLOCK(fdp);
152 td->td_retval[0] = fd;
153 }
154 fdrop(fp, td);
155 return (error);
156}
157
158/*
159 * MPSAFE
160 */
161/* ARGSUSED */
162int
163bind(td, uap)
164 struct thread *td;
165 register struct bind_args /* {
166 int s;
167 caddr_t name;
168 int namelen;
169 } */ *uap;
170{
171 struct sockaddr *sa;
172 int error;
173
174 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
175 return (error);
176
177 return (kern_bind(td, uap->s, sa));
178}
179
180int
181kern_bind(td, fd, sa)
182 struct thread *td;
183 int fd;
184 struct sockaddr *sa;
185{
186 struct socket *so;
187 int error;
188
189 NET_LOCK_GIANT();
190 if ((error = fgetsock(td, fd, &so, NULL)) != 0)
191 goto done2;
192#ifdef MAC
193 error = mac_check_socket_bind(td->td_ucred, so, sa);
194 if (error)
195 goto done1;
196#endif
197 error = sobind(so, sa, td);
198#ifdef MAC
199done1:
200#endif
201 fputsock(so);
202done2:
203 NET_UNLOCK_GIANT();
204 FREE(sa, M_SONAME);
205 return (error);
206}
207
208/*
209 * MPSAFE
210 */
211/* ARGSUSED */
212int
213listen(td, uap)
214 struct thread *td;
215 register struct listen_args /* {
216 int s;
217 int backlog;
218 } */ *uap;
219{
220 struct socket *so;
221 int error;
222
223 NET_LOCK_GIANT();
224 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
225#ifdef MAC
226 error = mac_check_socket_listen(td->td_ucred, so);
227 if (error)
228 goto done;
229#endif
230 error = solisten(so, uap->backlog, td);
231#ifdef MAC
232done:
233#endif
234 fputsock(so);
235 }
236 NET_UNLOCK_GIANT();
237 return(error);
238}
239
240/*
241 * accept1()
242 * MPSAFE
243 */
244static int
245accept1(td, uap, compat)
246 struct thread *td;
247 register struct accept_args /* {
248 int s;
249 struct sockaddr * __restrict name;
250 socklen_t * __restrict anamelen;
251 } */ *uap;
252 int compat;
253{
254 struct filedesc *fdp;
255 struct file *nfp = NULL;
256 struct sockaddr *sa;
257 socklen_t namelen;
258 int error, s;
259 struct socket *head, *so;
260 int fd;
261 u_int fflag;
262 pid_t pgid;
263 int tmp;
264
265 fdp = td->td_proc->p_fd;
266 if (uap->name) {
267 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
268 if(error)
269 goto done3;
270 if (namelen < 0) {
271 error = EINVAL;
272 goto done3;
273 }
274 }
275 NET_LOCK_GIANT();
276 error = fgetsock(td, uap->s, &head, &fflag);
277 if (error)
278 goto done2;
279 s = splnet();
280 if ((head->so_options & SO_ACCEPTCONN) == 0) {
281 splx(s);
282 error = EINVAL;
283 goto done;
284 }
285 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
286 if (head->so_state & SS_CANTRCVMORE) {
287 head->so_error = ECONNABORTED;
288 break;
289 }
290 if ((head->so_state & SS_NBIO) != 0) {
291 head->so_error = EWOULDBLOCK;
292 break;
293 }
294 error = tsleep(&head->so_timeo, PSOCK | PCATCH,
295 "accept", 0);
296 if (error) {
297 splx(s);
298 goto done;
299 }
300 }
301 if (head->so_error) {
302 error = head->so_error;
303 head->so_error = 0;
304 splx(s);
305 goto done;
306 }
307
308 /*
309 * At this point we know that there is at least one connection
310 * ready to be accepted. Remove it from the queue prior to
311 * allocating the file descriptor for it since falloc() may
312 * block allowing another process to accept the connection
313 * instead.
314 */
315 so = TAILQ_FIRST(&head->so_comp);
316 TAILQ_REMOVE(&head->so_comp, so, so_list);
317 head->so_qlen--;
318
319 error = falloc(td, &nfp, &fd);
320 if (error) {
321 /*
322 * Probably ran out of file descriptors. Put the
323 * unaccepted connection back onto the queue and
324 * do another wakeup so some other process might
325 * have a chance at it.
326 */
327 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
328 head->so_qlen++;
329 wakeup_one(&head->so_timeo);
330 splx(s);
331 goto done;
332 }
333 /* An extra reference on `nfp' has been held for us by falloc(). */
334 td->td_retval[0] = fd;
335
336 /* connection has been removed from the listen queue */
337 KNOTE(&head->so_rcv.sb_sel.si_note, 0);
338
339 so->so_state &= ~SS_COMP;
340 so->so_head = NULL;
341 pgid = fgetown(&head->so_sigio);
342 if (pgid != 0)
343 fsetown(pgid, &so->so_sigio);
344
345 FILE_LOCK(nfp);
346 soref(so); /* file descriptor reference */
347 nfp->f_data = so; /* nfp has ref count from falloc */
348 nfp->f_flag = fflag;
349 nfp->f_ops = &socketops;
350 nfp->f_type = DTYPE_SOCKET;
351 FILE_UNLOCK(nfp);
352 /* Sync socket nonblocking/async state with file flags */
353 tmp = fflag & FNONBLOCK;
354 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
355 tmp = fflag & FASYNC;
356 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
357 sa = 0;
358 error = soaccept(so, &sa);
359 if (error) {
360 /*
361 * return a namelen of zero for older code which might
362 * ignore the return value from accept.
363 */
364 if (uap->name != NULL) {
365 namelen = 0;
366 (void) copyout(&namelen,
367 uap->anamelen, sizeof(*uap->anamelen));
368 }
369 goto noconnection;
370 }
371 if (sa == NULL) {
372 namelen = 0;
373 if (uap->name)
374 goto gotnoname;
375 splx(s);
376 error = 0;
377 goto done;
378 }
379 if (uap->name) {
380 /* check sa_len before it is destroyed */
381 if (namelen > sa->sa_len)
382 namelen = sa->sa_len;
383#ifdef COMPAT_OLDSOCK
384 if (compat)
385 ((struct osockaddr *)sa)->sa_family =
386 sa->sa_family;
387#endif
388 error = copyout(sa, uap->name, (u_int)namelen);
389 if (!error)
390gotnoname:
391 error = copyout(&namelen,
392 uap->anamelen, sizeof (*uap->anamelen));
393 }
394noconnection:
395 if (sa)
396 FREE(sa, M_SONAME);
397
398 /*
399 * close the new descriptor, assuming someone hasn't ripped it
400 * out from under us.
401 */
402 if (error) {
403 FILEDESC_LOCK(fdp);
404 if (fdp->fd_ofiles[fd] == nfp) {
405 fdp->fd_ofiles[fd] = NULL;
406 fdunused(fdp, fd);
407 FILEDESC_UNLOCK(fdp);
408 fdrop(nfp, td);
409 } else {
410 FILEDESC_UNLOCK(fdp);
411 }
412 }
413 splx(s);
414
415 /*
416 * Release explicitly held references before returning.
417 */
418done:
419 if (nfp != NULL)
420 fdrop(nfp, td);
421 fputsock(head);
422done2:
423 NET_UNLOCK_GIANT();
424done3:
425 return (error);
426}
427
428/*
429 * MPSAFE (accept1() is MPSAFE)
430 */
431int
432accept(td, uap)
433 struct thread *td;
434 struct accept_args *uap;
435{
436
437 return (accept1(td, uap, 0));
438}
439
440#ifdef COMPAT_OLDSOCK
441/*
442 * MPSAFE (accept1() is MPSAFE)
443 */
444int
445oaccept(td, uap)
446 struct thread *td;
447 struct accept_args *uap;
448{
449
450 return (accept1(td, uap, 1));
451}
452#endif /* COMPAT_OLDSOCK */
453
454/*
455 * MPSAFE
456 */
457/* ARGSUSED */
458int
459connect(td, uap)
460 struct thread *td;
461 register struct connect_args /* {
462 int s;
463 caddr_t name;
464 int namelen;
465 } */ *uap;
466{
467 struct sockaddr *sa;
468 int error;
469
470 error = getsockaddr(&sa, uap->name, uap->namelen);
471 if (error)
472 return (error);
473
474 return (kern_connect(td, uap->s, sa));
475}
476
477
478int
479kern_connect(td, fd, sa)
480 struct thread *td;
481 int fd;
482 struct sockaddr *sa;
483{
484 struct socket *so;
485 int error, s;
486 int interrupted = 0;
487
488 NET_LOCK_GIANT();
489 if ((error = fgetsock(td, fd, &so, NULL)) != 0)
490 goto done2;
491 if (so->so_state & SS_ISCONNECTING) {
492 error = EALREADY;
493 goto done1;
494 }
495#ifdef MAC
496 error = mac_check_socket_connect(td->td_ucred, so, sa);
497 if (error)
498 goto bad;
499#endif
500 error = soconnect(so, sa, td);
501 if (error)
502 goto bad;
503 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
504 error = EINPROGRESS;
505 goto done1;
506 }
507 s = splnet();
508 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
509 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0);
510 if (error) {
511 if (error == EINTR || error == ERESTART)
512 interrupted = 1;
513 break;
514 }
515 }
516 if (error == 0) {
517 error = so->so_error;
518 so->so_error = 0;
519 }
520 splx(s);
521bad:
522 if (!interrupted)
523 so->so_state &= ~SS_ISCONNECTING;
524 if (error == ERESTART)
525 error = EINTR;
526done1:
527 fputsock(so);
528done2:
529 NET_UNLOCK_GIANT();
530 FREE(sa, M_SONAME);
531 return (error);
532}
533
534/*
535 * MPSAFE
536 */
537int
538socketpair(td, uap)
539 struct thread *td;
540 register struct socketpair_args /* {
541 int domain;
542 int type;
543 int protocol;
544 int *rsv;
545 } */ *uap;
546{
547 register struct filedesc *fdp = td->td_proc->p_fd;
548 struct file *fp1, *fp2;
549 struct socket *so1, *so2;
550 int fd, error, sv[2];
551
552 NET_LOCK_GIANT();
553 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
554 td->td_ucred, td);
555 if (error)
556 goto done2;
557 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
558 td->td_ucred, td);
559 if (error)
560 goto free1;
561 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
562 error = falloc(td, &fp1, &fd);
563 if (error)
564 goto free2;
565 sv[0] = fd;
566 fp1->f_data = so1; /* so1 already has ref count */
567 error = falloc(td, &fp2, &fd);
568 if (error)
569 goto free3;
570 fp2->f_data = so2; /* so2 already has ref count */
571 sv[1] = fd;
572 error = soconnect2(so1, so2);
573 if (error)
574 goto free4;
575 if (uap->type == SOCK_DGRAM) {
576 /*
577 * Datagram socket connection is asymmetric.
578 */
579 error = soconnect2(so2, so1);
580 if (error)
581 goto free4;
582 }
583 FILE_LOCK(fp1);
584 fp1->f_flag = FREAD|FWRITE;
585 fp1->f_ops = &socketops;
586 fp1->f_type = DTYPE_SOCKET;
587 FILE_UNLOCK(fp1);
588 FILE_LOCK(fp2);
589 fp2->f_flag = FREAD|FWRITE;
590 fp2->f_ops = &socketops;
591 fp2->f_type = DTYPE_SOCKET;
592 FILE_UNLOCK(fp2);
593 error = copyout(sv, uap->rsv, 2 * sizeof (int));
594 fdrop(fp1, td);
595 fdrop(fp2, td);
596 goto done2;
597free4:
598 FILEDESC_LOCK(fdp);
599 if (fdp->fd_ofiles[sv[1]] == fp2) {
600 fdp->fd_ofiles[sv[1]] = NULL;
601 fdunused(fdp, sv[1]);
602 FILEDESC_UNLOCK(fdp);
603 fdrop(fp2, td);
604 } else {
605 FILEDESC_UNLOCK(fdp);
606 }
607 fdrop(fp2, td);
608free3:
609 FILEDESC_LOCK(fdp);
610 if (fdp->fd_ofiles[sv[0]] == fp1) {
611 fdp->fd_ofiles[sv[0]] = NULL;
612 fdunused(fdp, sv[0]);
613 FILEDESC_UNLOCK(fdp);
614 fdrop(fp1, td);
615 } else {
616 FILEDESC_UNLOCK(fdp);
617 }
618 fdrop(fp1, td);
619free2:
620 (void)soclose(so2);
621free1:
622 (void)soclose(so1);
623done2:
624 NET_UNLOCK_GIANT();
625 return (error);
626}
627
628static int
629sendit(td, s, mp, flags)
630 register struct thread *td;
631 int s;
632 register struct msghdr *mp;
633 int flags;
634{
635 struct mbuf *control;
636 struct sockaddr *to;
637 int error;
638
639 if (mp->msg_name != NULL) {
640 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
641 if (error) {
642 to = NULL;
643 goto bad;
644 }
645 mp->msg_name = to;
646 } else {
647 to = NULL;
648 }
649
650 if (mp->msg_control) {
651 if (mp->msg_controllen < sizeof(struct cmsghdr)
652#ifdef COMPAT_OLDSOCK
653 && mp->msg_flags != MSG_COMPAT
654#endif
655 ) {
656 error = EINVAL;
657 goto bad;
658 }
659 error = sockargs(&control, mp->msg_control,
660 mp->msg_controllen, MT_CONTROL);
661 if (error)
662 goto bad;
663#ifdef COMPAT_OLDSOCK
664 if (mp->msg_flags == MSG_COMPAT) {
665 register struct cmsghdr *cm;
666
667 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
668 if (control == 0) {
669 error = ENOBUFS;
670 goto bad;
671 } else {
672 cm = mtod(control, struct cmsghdr *);
673 cm->cmsg_len = control->m_len;
674 cm->cmsg_level = SOL_SOCKET;
675 cm->cmsg_type = SCM_RIGHTS;
676 }
677 }
678#endif
679 } else {
680 control = NULL;
681 }
682
683 error = kern_sendit(td, s, mp, flags, control);
684
685bad:
686 if (to)
687 FREE(to, M_SONAME);
688 return (error);
689}
690
691int
692kern_sendit(td, s, mp, flags, control)
693 struct thread *td;
694 int s;
695 struct msghdr *mp;
696 int flags;
697 struct mbuf *control;
698{
699 struct uio auio;
700 struct iovec *iov;
701 struct socket *so;
702 int i;
703 int len, error;
704#ifdef KTRACE
705 struct iovec *ktriov = NULL;
706 struct uio ktruio;
707 int iovlen;
708#endif
709
710 NET_LOCK_GIANT();
711 if ((error = fgetsock(td, s, &so, NULL)) != 0)
712 goto bad2;
713
714#ifdef MAC
715 error = mac_check_socket_send(td->td_ucred, so);
716 if (error)
717 goto bad;
718#endif
719
720 auio.uio_iov = mp->msg_iov;
721 auio.uio_iovcnt = mp->msg_iovlen;
722 auio.uio_segflg = UIO_USERSPACE;
723 auio.uio_rw = UIO_WRITE;
724 auio.uio_td = td;
725 auio.uio_offset = 0; /* XXX */
726 auio.uio_resid = 0;
727 iov = mp->msg_iov;
728 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
729 if ((auio.uio_resid += iov->iov_len) < 0) {
730 error = EINVAL;
731 goto bad;
732 }
733 }
734#ifdef KTRACE
735 if (KTRPOINT(td, KTR_GENIO)) {
736 iovlen = auio.uio_iovcnt * sizeof (struct iovec);
737 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
738 bcopy(auio.uio_iov, ktriov, iovlen);
739 ktruio = auio;
740 }
741#endif
742 len = auio.uio_resid;
743 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
744 0, control, flags, td);
745 if (error) {
746 if (auio.uio_resid != len && (error == ERESTART ||
747 error == EINTR || error == EWOULDBLOCK))
748 error = 0;
749 /* Generation of SIGPIPE can be controlled per socket */
750 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
751 PROC_LOCK(td->td_proc);
752 psignal(td->td_proc, SIGPIPE);
753 PROC_UNLOCK(td->td_proc);
754 }
755 }
756 if (error == 0)
757 td->td_retval[0] = len - auio.uio_resid;
758#ifdef KTRACE
759 if (ktriov != NULL) {
760 if (error == 0) {
761 ktruio.uio_iov = ktriov;
762 ktruio.uio_resid = td->td_retval[0];
763 ktrgenio(s, UIO_WRITE, &ktruio, error);
764 }
765 FREE(ktriov, M_TEMP);
766 }
767#endif
768bad:
769 fputsock(so);
770bad2:
771 NET_UNLOCK_GIANT();
772 return (error);
773}
774
775/*
776 * MPSAFE
777 */
778int
779sendto(td, uap)
780 struct thread *td;
781 register struct sendto_args /* {
782 int s;
783 caddr_t buf;
784 size_t len;
785 int flags;
786 caddr_t to;
787 int tolen;
788 } */ *uap;
789{
790 struct msghdr msg;
791 struct iovec aiov;
792 int error;
793
794 msg.msg_name = uap->to;
795 msg.msg_namelen = uap->tolen;
796 msg.msg_iov = &aiov;
797 msg.msg_iovlen = 1;
798 msg.msg_control = 0;
799#ifdef COMPAT_OLDSOCK
800 msg.msg_flags = 0;
801#endif
802 aiov.iov_base = uap->buf;
803 aiov.iov_len = uap->len;
804 error = sendit(td, uap->s, &msg, uap->flags);
805 return (error);
806}
807
808#ifdef COMPAT_OLDSOCK
809/*
810 * MPSAFE
811 */
812int
813osend(td, uap)
814 struct thread *td;
815 register struct osend_args /* {
816 int s;
817 caddr_t buf;
818 int len;
819 int flags;
820 } */ *uap;
821{
822 struct msghdr msg;
823 struct iovec aiov;
824 int error;
825
826 msg.msg_name = 0;
827 msg.msg_namelen = 0;
828 msg.msg_iov = &aiov;
829 msg.msg_iovlen = 1;
830 aiov.iov_base = uap->buf;
831 aiov.iov_len = uap->len;
832 msg.msg_control = 0;
833 msg.msg_flags = 0;
834 error = sendit(td, uap->s, &msg, uap->flags);
835 return (error);
836}
837
838/*
839 * MPSAFE
840 */
841int
842osendmsg(td, uap)
843 struct thread *td;
844 register struct osendmsg_args /* {
845 int s;
846 caddr_t msg;
847 int flags;
848 } */ *uap;
849{
850 struct msghdr msg;
851 struct iovec aiov[UIO_SMALLIOV], *iov;
852 int error;
853
854 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
855 if (error)
856 goto done2;
857 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
858 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
859 error = EMSGSIZE;
860 goto done2;
861 }
862 MALLOC(iov, struct iovec *,
863 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
864 M_WAITOK);
865 } else {
866 iov = aiov;
867 }
868 error = copyin(msg.msg_iov, iov,
869 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
870 if (error)
871 goto done;
872 msg.msg_flags = MSG_COMPAT;
873 msg.msg_iov = iov;
874 error = sendit(td, uap->s, &msg, uap->flags);
875done:
876 if (iov != aiov)
877 FREE(iov, M_IOV);
878done2:
879 return (error);
880}
881#endif
882
883/*
884 * MPSAFE
885 */
886int
887sendmsg(td, uap)
888 struct thread *td;
889 register struct sendmsg_args /* {
890 int s;
891 caddr_t msg;
892 int flags;
893 } */ *uap;
894{
895 struct msghdr msg;
896 struct iovec aiov[UIO_SMALLIOV], *iov;
897 int error;
898
899 error = copyin(uap->msg, &msg, sizeof (msg));
900 if (error)
901 goto done2;
902 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
903 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
904 error = EMSGSIZE;
905 goto done2;
906 }
907 MALLOC(iov, struct iovec *,
908 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
909 M_WAITOK);
910 } else {
911 iov = aiov;
912 }
913 if (msg.msg_iovlen &&
914 (error = copyin(msg.msg_iov, iov,
915 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
916 goto done;
917 msg.msg_iov = iov;
918#ifdef COMPAT_OLDSOCK
919 msg.msg_flags = 0;
920#endif
921 error = sendit(td, uap->s, &msg, uap->flags);
922done:
923 if (iov != aiov)
924 FREE(iov, M_IOV);
925done2:
926 return (error);
927}
928
929static int
930recvit(td, s, mp, namelenp)
931 register struct thread *td;
932 int s;
933 register struct msghdr *mp;
934 void *namelenp;
935{
936 struct uio auio;
937 register struct iovec *iov;
938 register int i;
939 socklen_t len;
940 int error;
941 struct mbuf *m, *control = 0;
942 caddr_t ctlbuf;
943 struct socket *so;
944 struct sockaddr *fromsa = 0;
945#ifdef KTRACE
946 struct iovec *ktriov = NULL;
947 struct uio ktruio;
948 int iovlen;
949#endif
950
951 NET_LOCK_GIANT();
952 if ((error = fgetsock(td, s, &so, NULL)) != 0) {
953 NET_UNLOCK_GIANT();
954 return (error);
955 }
956
957#ifdef MAC
958 error = mac_check_socket_receive(td->td_ucred, so);
959 if (error) {
960 fputsock(so);
961 NET_UNLOCK_GIANT();
962 return (error);
963 }
964#endif
965
966 auio.uio_iov = mp->msg_iov;
967 auio.uio_iovcnt = mp->msg_iovlen;
968 auio.uio_segflg = UIO_USERSPACE;
969 auio.uio_rw = UIO_READ;
970 auio.uio_td = td;
971 auio.uio_offset = 0; /* XXX */
972 auio.uio_resid = 0;
973 iov = mp->msg_iov;
974 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
975 if ((auio.uio_resid += iov->iov_len) < 0) {
976 fputsock(so);
977 NET_UNLOCK_GIANT();
978 return (EINVAL);
979 }
980 }
981#ifdef KTRACE
982 if (KTRPOINT(td, KTR_GENIO)) {
983 iovlen = auio.uio_iovcnt * sizeof (struct iovec);
984 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
985 bcopy(auio.uio_iov, ktriov, iovlen);
986 ktruio = auio;
987 }
988#endif
989 len = auio.uio_resid;
990 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
991 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
992 &mp->msg_flags);
993 if (error) {
994 if (auio.uio_resid != (int)len && (error == ERESTART ||
995 error == EINTR || error == EWOULDBLOCK))
996 error = 0;
997 }
998#ifdef KTRACE
999 if (ktriov != NULL) {
1000 if (error == 0) {
1001 ktruio.uio_iov = ktriov;
1002 ktruio.uio_resid = (int)len - auio.uio_resid;
1003 ktrgenio(s, UIO_READ, &ktruio, error);
1004 }
1005 FREE(ktriov, M_TEMP);
1006 }
1007#endif
1008 if (error)
1009 goto out;
1010 td->td_retval[0] = (int)len - auio.uio_resid;
1011 if (mp->msg_name) {
1012 len = mp->msg_namelen;
1013 if (len <= 0 || fromsa == 0)
1014 len = 0;
1015 else {
1016 /* save sa_len before it is destroyed by MSG_COMPAT */
1017 len = MIN(len, fromsa->sa_len);
1018#ifdef COMPAT_OLDSOCK
1019 if (mp->msg_flags & MSG_COMPAT)
1020 ((struct osockaddr *)fromsa)->sa_family =
1021 fromsa->sa_family;
1022#endif
1023 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1024 if (error)
1025 goto out;
1026 }
1027 mp->msg_namelen = len;
1028 if (namelenp &&
1029 (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
1030#ifdef COMPAT_OLDSOCK
1031 if (mp->msg_flags & MSG_COMPAT)
1032 error = 0; /* old recvfrom didn't check */
1033 else
1034#endif
1035 goto out;
1036 }
1037 }
1038 if (mp->msg_control) {
1039#ifdef COMPAT_OLDSOCK
1040 /*
1041 * We assume that old recvmsg calls won't receive access
1042 * rights and other control info, esp. as control info
1043 * is always optional and those options didn't exist in 4.3.
1044 * If we receive rights, trim the cmsghdr; anything else
1045 * is tossed.
1046 */
1047 if (control && mp->msg_flags & MSG_COMPAT) {
1048 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1049 SOL_SOCKET ||
1050 mtod(control, struct cmsghdr *)->cmsg_type !=
1051 SCM_RIGHTS) {
1052 mp->msg_controllen = 0;
1053 goto out;
1054 }
1055 control->m_len -= sizeof (struct cmsghdr);
1056 control->m_data += sizeof (struct cmsghdr);
1057 }
1058#endif
1059 len = mp->msg_controllen;
1060 m = control;
1061 mp->msg_controllen = 0;
1062 ctlbuf = mp->msg_control;
1063
1064 while (m && len > 0) {
1065 unsigned int tocopy;
1066
1067 if (len >= m->m_len)
1068 tocopy = m->m_len;
1069 else {
1070 mp->msg_flags |= MSG_CTRUNC;
1071 tocopy = len;
1072 }
1073
1074 if ((error = copyout(mtod(m, caddr_t),
1075 ctlbuf, tocopy)) != 0)
1076 goto out;
1077
1078 ctlbuf += tocopy;
1079 len -= tocopy;
1080 m = m->m_next;
1081 }
1082 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1083 }
1084out:
1085 fputsock(so);
1086 NET_UNLOCK_GIANT();
1087 if (fromsa)
1088 FREE(fromsa, M_SONAME);
1089 if (control)
1090 m_freem(control);
1091 return (error);
1092}
1093
1094/*
1095 * MPSAFE
1096 */
1097int
1098recvfrom(td, uap)
1099 struct thread *td;
1100 register struct recvfrom_args /* {
1101 int s;
1102 caddr_t buf;
1103 size_t len;
1104 int flags;
1105 struct sockaddr * __restrict from;
1106 socklen_t * __restrict fromlenaddr;
1107 } */ *uap;
1108{
1109 struct msghdr msg;
1110 struct iovec aiov;
1111 int error;
1112
1113 if (uap->fromlenaddr) {
1114 error = copyin(uap->fromlenaddr,
1115 &msg.msg_namelen, sizeof (msg.msg_namelen));
1116 if (error)
1117 goto done2;
1118 } else {
1119 msg.msg_namelen = 0;
1120 }
1121 msg.msg_name = uap->from;
1122 msg.msg_iov = &aiov;
1123 msg.msg_iovlen = 1;
1124 aiov.iov_base = uap->buf;
1125 aiov.iov_len = uap->len;
1126 msg.msg_control = 0;
1127 msg.msg_flags = uap->flags;
1128 error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1129done2:
1130 return(error);
1131}
1132
1133#ifdef COMPAT_OLDSOCK
1134/*
1135 * MPSAFE
1136 */
1137int
1138orecvfrom(td, uap)
1139 struct thread *td;
1140 struct recvfrom_args *uap;
1141{
1142
1143 uap->flags |= MSG_COMPAT;
1144 return (recvfrom(td, uap));
1145}
1146#endif
1147
1148
1149#ifdef COMPAT_OLDSOCK
1150/*
1151 * MPSAFE
1152 */
1153int
1154orecv(td, uap)
1155 struct thread *td;
1156 register struct orecv_args /* {
1157 int s;
1158 caddr_t buf;
1159 int len;
1160 int flags;
1161 } */ *uap;
1162{
1163 struct msghdr msg;
1164 struct iovec aiov;
1165 int error;
1166
1167 msg.msg_name = 0;
1168 msg.msg_namelen = 0;
1169 msg.msg_iov = &aiov;
1170 msg.msg_iovlen = 1;
1171 aiov.iov_base = uap->buf;
1172 aiov.iov_len = uap->len;
1173 msg.msg_control = 0;
1174 msg.msg_flags = uap->flags;
1175 error = recvit(td, uap->s, &msg, NULL);
1176 return (error);
1177}
1178
1179/*
1180 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1181 * overlays the new one, missing only the flags, and with the (old) access
1182 * rights where the control fields are now.
1183 *
1184 * MPSAFE
1185 */
1186int
1187orecvmsg(td, uap)
1188 struct thread *td;
1189 register struct orecvmsg_args /* {
1190 int s;
1191 struct omsghdr *msg;
1192 int flags;
1193 } */ *uap;
1194{
1195 struct msghdr msg;
1196 struct iovec aiov[UIO_SMALLIOV], *iov;
1197 int error;
1198
1199 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1200 if (error)
1201 return (error);
1202
1203 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1204 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1205 error = EMSGSIZE;
1206 goto done2;
1207 }
1208 MALLOC(iov, struct iovec *,
1209 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1210 M_WAITOK);
1211 } else {
1212 iov = aiov;
1213 }
1214 msg.msg_flags = uap->flags | MSG_COMPAT;
1215 error = copyin(msg.msg_iov, iov,
1216 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1217 if (error)
1218 goto done;
1219 msg.msg_iov = iov;
1220 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1221
1222 if (msg.msg_controllen && error == 0)
1223 error = copyout(&msg.msg_controllen,
1224 &uap->msg->msg_accrightslen, sizeof (int));
1225done:
1226 if (iov != aiov)
1227 FREE(iov, M_IOV);
1228done2:
1229 return (error);
1230}
1231#endif
1232
1233/*
1234 * MPSAFE
1235 */
1236int
1237recvmsg(td, uap)
1238 struct thread *td;
1239 register struct recvmsg_args /* {
1240 int s;
1241 struct msghdr *msg;
1242 int flags;
1243 } */ *uap;
1244{
1245 struct msghdr msg;
1246 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1247 register int error;
1248
1249 error = copyin(uap->msg, &msg, sizeof (msg));
1250 if (error)
1251 goto done2;
1252 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1253 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1254 error = EMSGSIZE;
1255 goto done2;
1256 }
1257 MALLOC(iov, struct iovec *,
1258 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1259 M_WAITOK);
1260 } else {
1261 iov = aiov;
1262 }
1263#ifdef COMPAT_OLDSOCK
1264 msg.msg_flags = uap->flags &~ MSG_COMPAT;
1265#else
1266 msg.msg_flags = uap->flags;
1267#endif
1268 uiov = msg.msg_iov;
1269 msg.msg_iov = iov;
1270 error = copyin(uiov, iov,
1271 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1272 if (error)
1273 goto done;
1274 error = recvit(td, uap->s, &msg, NULL);
1275 if (!error) {
1276 msg.msg_iov = uiov;
1277 error = copyout(&msg, uap->msg, sizeof(msg));
1278 }
1279done:
1280 if (iov != aiov)
1281 FREE(iov, M_IOV);
1282done2:
1283 return (error);
1284}
1285
1286/*
1287 * MPSAFE
1288 */
1289/* ARGSUSED */
1290int
1291shutdown(td, uap)
1292 struct thread *td;
1293 register struct shutdown_args /* {
1294 int s;
1295 int how;
1296 } */ *uap;
1297{
1298 struct socket *so;
1299 int error;
1300
1301 NET_LOCK_GIANT();
1302 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1303 error = soshutdown(so, uap->how);
1304 fputsock(so);
1305 }
1306 NET_UNLOCK_GIANT();
1307 return(error);
1308}
1309
1310/*
1311 * MPSAFE
1312 */
1313/* ARGSUSED */
1314int
1315setsockopt(td, uap)
1316 struct thread *td;
1317 register struct setsockopt_args /* {
1318 int s;
1319 int level;
1320 int name;
1321 caddr_t val;
1322 int valsize;
1323 } */ *uap;
1324{
1325 struct socket *so;
1326 struct sockopt sopt;
1327 int error;
1328
1329 if (uap->val == 0 && uap->valsize != 0)
1330 return (EFAULT);
1331 if (uap->valsize < 0)
1332 return (EINVAL);
1333
1334 NET_LOCK_GIANT();
1335 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1336 sopt.sopt_dir = SOPT_SET;
1337 sopt.sopt_level = uap->level;
1338 sopt.sopt_name = uap->name;
1339 sopt.sopt_val = uap->val;
1340 sopt.sopt_valsize = uap->valsize;
1341 sopt.sopt_td = td;
1342 error = sosetopt(so, &sopt);
1343 fputsock(so);
1344 }
1345 NET_UNLOCK_GIANT();
1346 return(error);
1347}
1348
1349/*
1350 * MPSAFE
1351 */
1352/* ARGSUSED */
1353int
1354getsockopt(td, uap)
1355 struct thread *td;
1356 register struct getsockopt_args /* {
1357 int s;
1358 int level;
1359 int name;
1360 void * __restrict val;
1361 socklen_t * __restrict avalsize;
1362 } */ *uap;
1363{
1364 socklen_t valsize;
1365 int error;
1366 struct socket *so;
1367 struct sockopt sopt;
1368
1369 NET_LOCK_GIANT();
1370 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1371 goto done2;
1372 if (uap->val) {
1373 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1374 if (error)
1375 goto done1;
1376 if (valsize < 0) {
1377 error = EINVAL;
1378 goto done1;
1379 }
1380 } else {
1381 valsize = 0;
1382 }
1383
1384 sopt.sopt_dir = SOPT_GET;
1385 sopt.sopt_level = uap->level;
1386 sopt.sopt_name = uap->name;
1387 sopt.sopt_val = uap->val;
1388 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1389 sopt.sopt_td = td;
1390
1391 error = sogetopt(so, &sopt);
1392 if (error == 0) {
1393 valsize = sopt.sopt_valsize;
1394 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1395 }
1396done1:
1397 fputsock(so);
1398done2:
1399 NET_UNLOCK_GIANT();
1400 return (error);
1401}
1402
1403/*
1404 * getsockname1() - Get socket name.
1405 *
1406 * MPSAFE
1407 */
1408/* ARGSUSED */
1409static int
1410getsockname1(td, uap, compat)
1411 struct thread *td;
1412 register struct getsockname_args /* {
1413 int fdes;
1414 struct sockaddr * __restrict asa;
1415 socklen_t * __restrict alen;
1416 } */ *uap;
1417 int compat;
1418{
1419 struct socket *so;
1420 struct sockaddr *sa;
1421 socklen_t len;
1422 int error;
1423
1424 NET_LOCK_GIANT();
1425 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1426 goto done2;
1427 error = copyin(uap->alen, &len, sizeof (len));
1428 if (error)
1429 goto done1;
1430 if (len < 0) {
1431 error = EINVAL;
1432 goto done1;
1433 }
1434 sa = 0;
1435 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1436 if (error)
1437 goto bad;
1438 if (sa == 0) {
1439 len = 0;
1440 goto gotnothing;
1441 }
1442
1443 len = MIN(len, sa->sa_len);
1444#ifdef COMPAT_OLDSOCK
1445 if (compat)
1446 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1447#endif
1448 error = copyout(sa, uap->asa, (u_int)len);
1449 if (error == 0)
1450gotnothing:
1451 error = copyout(&len, uap->alen, sizeof (len));
1452bad:
1453 if (sa)
1454 FREE(sa, M_SONAME);
1455done1:
1456 fputsock(so);
1457done2:
1458 NET_UNLOCK_GIANT();
1459 return (error);
1460}
1461
1462/*
1463 * MPSAFE
1464 */
1465int
1466getsockname(td, uap)
1467 struct thread *td;
1468 struct getsockname_args *uap;
1469{
1470
1471 return (getsockname1(td, uap, 0));
1472}
1473
1474#ifdef COMPAT_OLDSOCK
1475/*
1476 * MPSAFE
1477 */
1478int
1479ogetsockname(td, uap)
1480 struct thread *td;
1481 struct getsockname_args *uap;
1482{
1483
1484 return (getsockname1(td, uap, 1));
1485}
1486#endif /* COMPAT_OLDSOCK */
1487
1488/*
1489 * getpeername1() - Get name of peer for connected socket.
1490 *
1491 * MPSAFE
1492 */
1493/* ARGSUSED */
1494static int
1495getpeername1(td, uap, compat)
1496 struct thread *td;
1497 register struct getpeername_args /* {
1498 int fdes;
1499 struct sockaddr * __restrict asa;
1500 socklen_t * __restrict alen;
1501 } */ *uap;
1502 int compat;
1503{
1504 struct socket *so;
1505 struct sockaddr *sa;
1506 socklen_t len;
1507 int error;
1508
1509 NET_LOCK_GIANT();
1510 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1511 goto done2;
1512 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1513 error = ENOTCONN;
1514 goto done1;
1515 }
1516 error = copyin(uap->alen, &len, sizeof (len));
1517 if (error)
1518 goto done1;
1519 if (len < 0) {
1520 error = EINVAL;
1521 goto done1;
1522 }
1523 sa = 0;
1524 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1525 if (error)
1526 goto bad;
1527 if (sa == 0) {
1528 len = 0;
1529 goto gotnothing;
1530 }
1531 len = MIN(len, sa->sa_len);
1532#ifdef COMPAT_OLDSOCK
1533 if (compat)
1534 ((struct osockaddr *)sa)->sa_family =
1535 sa->sa_family;
1536#endif
1537 error = copyout(sa, uap->asa, (u_int)len);
1538 if (error)
1539 goto bad;
1540gotnothing:
1541 error = copyout(&len, uap->alen, sizeof (len));
1542bad:
1543 if (sa)
1544 FREE(sa, M_SONAME);
1545done1:
1546 fputsock(so);
1547done2:
1548 NET_UNLOCK_GIANT();
1549 return (error);
1550}
1551
1552/*
1553 * MPSAFE
1554 */
1555int
1556getpeername(td, uap)
1557 struct thread *td;
1558 struct getpeername_args *uap;
1559{
1560
1561 return (getpeername1(td, uap, 0));
1562}
1563
1564#ifdef COMPAT_OLDSOCK
1565/*
1566 * MPSAFE
1567 */
1568int
1569ogetpeername(td, uap)
1570 struct thread *td;
1571 struct ogetpeername_args *uap;
1572{
1573
1574 /* XXX uap should have type `getpeername_args *' to begin with. */
1575 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1576}
1577#endif /* COMPAT_OLDSOCK */
1578
1579int
1580sockargs(mp, buf, buflen, type)
1581 struct mbuf **mp;
1582 caddr_t buf;
1583 int buflen, type;
1584{
1585 register struct sockaddr *sa;
1586 register struct mbuf *m;
1587 int error;
1588
1589 if ((u_int)buflen > MLEN) {
1590#ifdef COMPAT_OLDSOCK
1591 if (type == MT_SONAME && (u_int)buflen <= 112)
1592 buflen = MLEN; /* unix domain compat. hack */
1593 else
1594#endif
1595 return (EINVAL);
1596 }
1597 m = m_get(M_TRYWAIT, type);
1598 if (m == NULL)
1599 return (ENOBUFS);
1600 m->m_len = buflen;
1601 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1602 if (error)
1603 (void) m_free(m);
1604 else {
1605 *mp = m;
1606 if (type == MT_SONAME) {
1607 sa = mtod(m, struct sockaddr *);
1608
1609#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1610 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1611 sa->sa_family = sa->sa_len;
1612#endif
1613 sa->sa_len = buflen;
1614 }
1615 }
1616 return (error);
1617}
1618
1619int
1620getsockaddr(namp, uaddr, len)
1621 struct sockaddr **namp;
1622 caddr_t uaddr;
1623 size_t len;
1624{
1625 struct sockaddr *sa;
1626 int error;
1627
1628 if (len > SOCK_MAXADDRLEN)
1629 return (ENAMETOOLONG);
1630 if (len < offsetof(struct sockaddr, sa_data[0]))
1631 return (EINVAL);
1632 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1633 error = copyin(uaddr, sa, len);
1634 if (error) {
1635 FREE(sa, M_SONAME);
1636 } else {
1637#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1638 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1639 sa->sa_family = sa->sa_len;
1640#endif
1641 sa->sa_len = len;
1642 *namp = sa;
1643 }
1644 return (error);
1645}
1646
1647/*
1648 * Detach mapped page and release resources back to the system.
1649 */
1650void
1651sf_buf_mext(void *addr, void *args)
1652{
1653 vm_page_t m;
1654
1655 m = sf_buf_page(args);
1656 sf_buf_free(args);
1657 vm_page_lock_queues();
1658 vm_page_unwire(m, 0);
1659 /*
1660 * Check for the object going away on us. This can
1661 * happen since we don't hold a reference to it.
1662 * If so, we're responsible for freeing the page.
1663 */
1664 if (m->wire_count == 0 && m->object == NULL)
1665 vm_page_free(m);
1666 vm_page_unlock_queues();
1667}
1668
1669/*
1670 * sendfile(2)
1671 *
1672 * MPSAFE
1673 *
1674 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1675 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1676 *
1677 * Send a file specified by 'fd' and starting at 'offset' to a socket
1678 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1679 * nbytes == 0. Optionally add a header and/or trailer to the socket
1680 * output. If specified, write the total number of bytes sent into *sbytes.
1681 *
1682 */
1683int
1684sendfile(struct thread *td, struct sendfile_args *uap)
1685{
1686
1687 return (do_sendfile(td, uap, 0));
1688}
1689
1690#ifdef COMPAT_FREEBSD4
1691int
1692freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1693{
1694 struct sendfile_args args;
1695
1696 args.fd = uap->fd;
1697 args.s = uap->s;
1698 args.offset = uap->offset;
1699 args.nbytes = uap->nbytes;
1700 args.hdtr = uap->hdtr;
1701 args.sbytes = uap->sbytes;
1702 args.flags = uap->flags;
1703
1704 return (do_sendfile(td, &args, 1));
1705}
1706#endif /* COMPAT_FREEBSD4 */
1707
1708static int
1709do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1710{
1711 struct vnode *vp;
1712 struct vm_object *obj;
1713 struct socket *so = NULL;
1714 struct mbuf *m, *m_header = NULL;
1715 struct sf_buf *sf;
1716 struct vm_page *pg;
1717 struct writev_args nuap;
1718 struct sf_hdtr hdtr;
1719 struct uio hdr_uio;
1720 off_t off, xfsize, hdtr_size, sbytes = 0;
1721 int error, s, headersize = 0, headersent = 0;
1722 struct iovec *hdr_iov = NULL;
1723
1724 mtx_lock(&Giant);
1725
1726 hdtr_size = 0;
1727
1728 /*
1729 * The descriptor must be a regular file and have a backing VM object.
1730 */
1731 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1732 goto done;
1733 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1734 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1735 error = EINVAL;
1736 VOP_UNLOCK(vp, 0, td);
1737 goto done;
1738 }
1739 VOP_UNLOCK(vp, 0, td);
1740 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1741 goto done;
1742 if (so->so_type != SOCK_STREAM) {
1743 error = EINVAL;
1744 goto done;
1745 }
1746 if ((so->so_state & SS_ISCONNECTED) == 0) {
1747 error = ENOTCONN;
1748 goto done;
1749 }
1750 if (uap->offset < 0) {
1751 error = EINVAL;
1752 goto done;
1753 }
1754
1755#ifdef MAC
1756 error = mac_check_socket_send(td->td_ucred, so);
1757 if (error)
1758 goto done;
1759#endif
1760
1761 /*
1762 * If specified, get the pointer to the sf_hdtr struct for
1763 * any headers/trailers.
1764 */
1765 if (uap->hdtr != NULL) {
1766 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1767 if (error)
1768 goto done;
1769 /*
1770 * Send any headers.
1771 */
1772 if (hdtr.headers != NULL) {
1773 hdr_uio.uio_td = td;
1774 hdr_uio.uio_rw = UIO_WRITE;
1775 error = uiofromiov(hdtr.headers, hdtr.hdr_cnt,
1776 &hdr_uio);
1777 if (error)
1778 goto done;
1779 /* Cache hdr_iov, m_uiotombuf may change it. */
1780 hdr_iov = hdr_uio.uio_iov;
1781 if (hdr_uio.uio_resid > 0) {
1782 m_header = m_uiotombuf(&hdr_uio, M_DONTWAIT, 0);
1783 if (m_header == NULL)
1784 goto done;
1785 headersize = m_header->m_pkthdr.len;
1786 if (compat)
1787 sbytes += headersize;
1788 }
1789 }
1790 }
1791
1792 /*
1793 * Protect against multiple writers to the socket.
1794 */
1795 (void) sblock(&so->so_snd, M_WAITOK);
1796
1797 /*
1798 * Loop through the pages in the file, starting with the requested
1799 * offset. Get a file page (do I/O if necessary), map the file page
1800 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1801 * it on the socket.
1802 */
1803 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1804 vm_pindex_t pindex;
1805 vm_offset_t pgoff;
1806
1807 pindex = OFF_TO_IDX(off);
1808 VM_OBJECT_LOCK(obj);
1809retry_lookup:
1810 /*
1811 * Calculate the amount to transfer. Not to exceed a page,
1812 * the EOF, or the passed in nbytes.
1813 */
1814 xfsize = obj->un_pager.vnp.vnp_size - off;
1815 VM_OBJECT_UNLOCK(obj);
1816 if (xfsize > PAGE_SIZE)
1817 xfsize = PAGE_SIZE;
1818 pgoff = (vm_offset_t)(off & PAGE_MASK);
1819 if (PAGE_SIZE - pgoff < xfsize)
1820 xfsize = PAGE_SIZE - pgoff;
1821 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1822 xfsize = uap->nbytes - sbytes;
1823 if (xfsize <= 0) {
1824 if (m_header != NULL) {
1825 m = m_header;
1826 m_header = NULL;
1827 goto retry_space;
1828 } else
1829 break;
1830 }
1831 /*
1832 * Optimize the non-blocking case by looking at the socket space
1833 * before going to the extra work of constituting the sf_buf.
1834 */
1835 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1836 if (so->so_state & SS_CANTSENDMORE)
1837 error = EPIPE;
1838 else
1839 error = EAGAIN;
1840 sbunlock(&so->so_snd);
1841 goto done;
1842 }
1843 VM_OBJECT_LOCK(obj);
1844 /*
1845 * Attempt to look up the page.
1846 *
1847 * Allocate if not found
1848 *
1849 * Wait and loop if busy.
1850 */
1851 pg = vm_page_lookup(obj, pindex);
1852
1853 if (pg == NULL) {
1854 pg = vm_page_alloc(obj, pindex,
1855 VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1856 if (pg == NULL) {
1857 VM_OBJECT_UNLOCK(obj);
1858 VM_WAIT;
1859 VM_OBJECT_LOCK(obj);
1860 goto retry_lookup;
1861 }
1862 vm_page_lock_queues();
1863 vm_page_wakeup(pg);
1864 } else {
1865 vm_page_lock_queues();
1866 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1867 goto retry_lookup;
1868 /*
1869 * Wire the page so it does not get ripped out from
1870 * under us.
1871 */
1872 vm_page_wire(pg);
1873 }
1874
1875 /*
1876 * If page is not valid for what we need, initiate I/O
1877 */
1878
1879 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
1880 VM_OBJECT_UNLOCK(obj);
1881 } else if (uap->flags & SF_NODISKIO) {
1882 error = EBUSY;
1883 } else {
1884 int bsize, resid;
1885
1886 /*
1887 * Ensure that our page is still around when the I/O
1888 * completes.
1889 */
1890 vm_page_io_start(pg);
1891 vm_page_unlock_queues();
1892 VM_OBJECT_UNLOCK(obj);
1893
1894 /*
1895 * Get the page from backing store.
1896 */
1897 bsize = vp->v_mount->mnt_stat.f_iosize;
1898 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1899 /*
1900 * XXXMAC: Because we don't have fp->f_cred here,
1901 * we pass in NOCRED. This is probably wrong, but
1902 * is consistent with our original implementation.
1903 */
1904 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1905 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1906 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
1907 td->td_ucred, NOCRED, &resid, td);
1908 VOP_UNLOCK(vp, 0, td);
1909 if (error)
1910 VM_OBJECT_LOCK(obj);
1911 vm_page_lock_queues();
1912 vm_page_io_finish(pg);
1913 mbstat.sf_iocnt++;
1914 }
1915
1916 if (error) {
1917 vm_page_unwire(pg, 0);
1918 /*
1919 * See if anyone else might know about this page.
1920 * If not and it is not valid, then free it.
1921 */
1922 if (pg->wire_count == 0 && pg->valid == 0 &&
1923 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1924 pg->hold_count == 0) {
1925 vm_page_busy(pg);
1926 vm_page_free(pg);
1927 }
1928 vm_page_unlock_queues();
1929 VM_OBJECT_UNLOCK(obj);
1930 sbunlock(&so->so_snd);
1931 goto done;
1932 }
1933 vm_page_unlock_queues();
1934
1935 /*
1936 * Get a sendfile buf. We usually wait as long as necessary,
1937 * but this wait can be interrupted.
1938 */
1939 if ((sf = sf_buf_alloc(pg, PCATCH)) == NULL) {
1940 mbstat.sf_allocfail++;
1941 vm_page_lock_queues();
1942 vm_page_unwire(pg, 0);
1943 if (pg->wire_count == 0 && pg->object == NULL)
1944 vm_page_free(pg);
1945 vm_page_unlock_queues();
1946 sbunlock(&so->so_snd);
1947 error = EINTR;
1948 goto done;
1949 }
1950
1951 /*
1952 * Get an mbuf header and set it up as having external storage.
1953 */
1954 if (m_header)
1955 MGET(m, M_TRYWAIT, MT_DATA);
1956 else
1957 MGETHDR(m, M_TRYWAIT, MT_DATA);
1958 if (m == NULL) {
1959 error = ENOBUFS;
1960 sf_buf_mext((void *)sf_buf_kva(sf), sf);
1961 sbunlock(&so->so_snd);
1962 goto done;
1963 }
1964 /*
1965 * Setup external storage for mbuf.
1966 */
1967 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
1968 EXT_SFBUF);
1969 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
1970 m->m_pkthdr.len = m->m_len = xfsize;
1971
1972 if (m_header) {
1973 m_cat(m_header, m);
1974 m = m_header;
1975 m_header = NULL;
1976 m_fixhdr(m);
1977 }
1978
1979 /*
1980 * Add the buffer to the socket buffer chain.
1981 */
1982 s = splnet();
1983retry_space:
1984 /*
1985 * Make sure that the socket is still able to take more data.
1986 * CANTSENDMORE being true usually means that the connection
1987 * was closed. so_error is true when an error was sensed after
1988 * a previous send.
1989 * The state is checked after the page mapping and buffer
1990 * allocation above since those operations may block and make
1991 * any socket checks stale. From this point forward, nothing
1992 * blocks before the pru_send (or more accurately, any blocking
1993 * results in a loop back to here to re-check).
1994 */
1995 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1996 if (so->so_state & SS_CANTSENDMORE) {
1997 error = EPIPE;
1998 } else {
1999 error = so->so_error;
2000 so->so_error = 0;
2001 }
2002 m_freem(m);
2003 sbunlock(&so->so_snd);
2004 splx(s);
2005 goto done;
2006 }
2007 /*
2008 * Wait for socket space to become available. We do this just
2009 * after checking the connection state above in order to avoid
2010 * a race condition with sbwait().
2011 */
2012 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2013 if (so->so_state & SS_NBIO) {
2014 m_freem(m);
2015 sbunlock(&so->so_snd);
2016 splx(s);
2017 error = EAGAIN;
2018 goto done;
2019 }
2020 error = sbwait(&so->so_snd);
2021 /*
2022 * An error from sbwait usually indicates that we've
2023 * been interrupted by a signal. If we've sent anything
2024 * then return bytes sent, otherwise return the error.
2025 */
2026 if (error) {
2027 m_freem(m);
2028 sbunlock(&so->so_snd);
2029 splx(s);
2030 goto done;
2031 }
2032 goto retry_space;
2033 }
2034 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2035 splx(s);
2036 if (error) {
2037 sbunlock(&so->so_snd);
2038 goto done;
2039 }
2040 headersent = 1;
2041 }
2042 sbunlock(&so->so_snd);
2043
2044 /*
2045 * Send trailers. Wimp out and use writev(2).
2046 */
2047 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2048 nuap.fd = uap->s;
2049 nuap.iovp = hdtr.trailers;
2050 nuap.iovcnt = hdtr.trl_cnt;
2051 error = writev(td, &nuap);
2052 if (error)
2053 goto done;
2054 if (compat)
2055 sbytes += td->td_retval[0];
2056 else
2057 hdtr_size += td->td_retval[0];
2058 }
2059
2060done:
2061 if (headersent) {
2062 if (!compat)
2063 hdtr_size += headersize;
2064 } else {
2065 if (compat)
2066 sbytes -= headersize;
2067 }
2068 /*
2069 * If there was no error we have to clear td->td_retval[0]
2070 * because it may have been set by writev.
2071 */
2072 if (error == 0) {
2073 td->td_retval[0] = 0;
2074 }
2075 if (uap->sbytes != NULL) {
2076 if (!compat)
2077 sbytes += hdtr_size;
2078 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2079 }
2080 if (vp)
2081 vrele(vp);
2082 if (so)
2083 fputsock(so);
2084 if (hdr_iov)
2085 FREE(hdr_iov, M_IOV);
2086 if (m_header)
2087 m_freem(m_header);
2088
2089 mtx_unlock(&Giant);
2090
2091 if (error == ERESTART)
2092 error = EINTR;
2093
2094 return (error);
2095}