Deleted Added
full compact
kern_sendfile.c (130211) kern_sendfile.c (130344)
1/*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
1/*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 130211 2004-06-07 21:45:44Z rwatson $");
36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 130344 2004-06-11 11:16:26Z phk $");
37
38#include "opt_compat.h"
39#include "opt_ktrace.h"
40#include "opt_mac.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/lock.h>
46#include <sys/mac.h>
47#include <sys/mutex.h>
48#include <sys/sysproto.h>
49#include <sys/malloc.h>
50#include <sys/filedesc.h>
51#include <sys/event.h>
52#include <sys/proc.h>
53#include <sys/fcntl.h>
54#include <sys/file.h>
55#include <sys/filio.h>
56#include <sys/mount.h>
57#include <sys/mbuf.h>
58#include <sys/protosw.h>
59#include <sys/sf_buf.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/signalvar.h>
63#include <sys/syscallsubr.h>
64#include <sys/sysctl.h>
65#include <sys/uio.h>
66#include <sys/vnode.h>
67#ifdef KTRACE
68#include <sys/ktrace.h>
69#endif
70
71#include <vm/vm.h>
72#include <vm/vm_object.h>
73#include <vm/vm_page.h>
74#include <vm/vm_pageout.h>
75#include <vm/vm_kern.h>
76#include <vm/vm_extern.h>
77
78static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
79static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
80
81static int accept1(struct thread *td, struct accept_args *uap, int compat);
82static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
83static int getsockname1(struct thread *td, struct getsockname_args *uap,
84 int compat);
85static int getpeername1(struct thread *td, struct getpeername_args *uap,
86 int compat);
87
88/*
89 * NSFBUFS-related variables and associated sysctls
90 */
91int nsfbufs;
92int nsfbufspeak;
93int nsfbufsused;
94
95SYSCTL_DECL(_kern_ipc);
96SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
97 "Maximum number of sendfile(2) sf_bufs available");
98SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
99 "Number of sendfile(2) sf_bufs at peak usage");
100SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
101 "Number of sendfile(2) sf_bufs in use");
102
103/*
104 * System call interface to the socket abstraction.
105 */
37
38#include "opt_compat.h"
39#include "opt_ktrace.h"
40#include "opt_mac.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/lock.h>
46#include <sys/mac.h>
47#include <sys/mutex.h>
48#include <sys/sysproto.h>
49#include <sys/malloc.h>
50#include <sys/filedesc.h>
51#include <sys/event.h>
52#include <sys/proc.h>
53#include <sys/fcntl.h>
54#include <sys/file.h>
55#include <sys/filio.h>
56#include <sys/mount.h>
57#include <sys/mbuf.h>
58#include <sys/protosw.h>
59#include <sys/sf_buf.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/signalvar.h>
63#include <sys/syscallsubr.h>
64#include <sys/sysctl.h>
65#include <sys/uio.h>
66#include <sys/vnode.h>
67#ifdef KTRACE
68#include <sys/ktrace.h>
69#endif
70
71#include <vm/vm.h>
72#include <vm/vm_object.h>
73#include <vm/vm_page.h>
74#include <vm/vm_pageout.h>
75#include <vm/vm_kern.h>
76#include <vm/vm_extern.h>
77
78static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
79static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
80
81static int accept1(struct thread *td, struct accept_args *uap, int compat);
82static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
83static int getsockname1(struct thread *td, struct getsockname_args *uap,
84 int compat);
85static int getpeername1(struct thread *td, struct getpeername_args *uap,
86 int compat);
87
88/*
89 * NSFBUFS-related variables and associated sysctls
90 */
91int nsfbufs;
92int nsfbufspeak;
93int nsfbufsused;
94
95SYSCTL_DECL(_kern_ipc);
96SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
97 "Maximum number of sendfile(2) sf_bufs available");
98SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
99 "Number of sendfile(2) sf_bufs at peak usage");
100SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
101 "Number of sendfile(2) sf_bufs in use");
102
103/*
104 * System call interface to the socket abstraction.
105 */
106#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
106#if defined(COMPAT_43)
107#define COMPAT_OLDSOCK
108#endif
109
110/*
111 * MPSAFE
112 */
113int
114socket(td, uap)
115 struct thread *td;
116 register struct socket_args /* {
117 int domain;
118 int type;
119 int protocol;
120 } */ *uap;
121{
122 struct filedesc *fdp;
123 struct socket *so;
124 struct file *fp;
125 int fd, error;
126
127 fdp = td->td_proc->p_fd;
128 error = falloc(td, &fp, &fd);
129 if (error)
130 return (error);
131 /* An extra reference on `fp' has been held for us by falloc(). */
132 NET_LOCK_GIANT();
133 error = socreate(uap->domain, &so, uap->type, uap->protocol,
134 td->td_ucred, td);
135 NET_UNLOCK_GIANT();
136 FILEDESC_LOCK(fdp);
137 if (error) {
138 if (fdp->fd_ofiles[fd] == fp) {
139 fdp->fd_ofiles[fd] = NULL;
140 fdunused(fdp, fd);
141 FILEDESC_UNLOCK(fdp);
142 fdrop(fp, td);
143 } else {
144 FILEDESC_UNLOCK(fdp);
145 }
146 } else {
147 fp->f_data = so; /* already has ref count */
148 fp->f_flag = FREAD|FWRITE;
149 fp->f_ops = &socketops;
150 fp->f_type = DTYPE_SOCKET;
151 FILEDESC_UNLOCK(fdp);
152 td->td_retval[0] = fd;
153 }
154 fdrop(fp, td);
155 return (error);
156}
157
158/*
159 * MPSAFE
160 */
161/* ARGSUSED */
162int
163bind(td, uap)
164 struct thread *td;
165 register struct bind_args /* {
166 int s;
167 caddr_t name;
168 int namelen;
169 } */ *uap;
170{
171 struct sockaddr *sa;
172 int error;
173
174 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
175 return (error);
176
177 return (kern_bind(td, uap->s, sa));
178}
179
180int
181kern_bind(td, fd, sa)
182 struct thread *td;
183 int fd;
184 struct sockaddr *sa;
185{
186 struct socket *so;
187 int error;
188
189 NET_LOCK_GIANT();
190 if ((error = fgetsock(td, fd, &so, NULL)) != 0)
191 goto done2;
192#ifdef MAC
193 error = mac_check_socket_bind(td->td_ucred, so, sa);
194 if (error)
195 goto done1;
196#endif
197 error = sobind(so, sa, td);
198#ifdef MAC
199done1:
200#endif
201 fputsock(so);
202done2:
203 NET_UNLOCK_GIANT();
204 FREE(sa, M_SONAME);
205 return (error);
206}
207
208/*
209 * MPSAFE
210 */
211/* ARGSUSED */
212int
213listen(td, uap)
214 struct thread *td;
215 register struct listen_args /* {
216 int s;
217 int backlog;
218 } */ *uap;
219{
220 struct socket *so;
221 int error;
222
223 NET_LOCK_GIANT();
224 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
225#ifdef MAC
226 error = mac_check_socket_listen(td->td_ucred, so);
227 if (error)
228 goto done;
229#endif
230 error = solisten(so, uap->backlog, td);
231#ifdef MAC
232done:
233#endif
234 fputsock(so);
235 }
236 NET_UNLOCK_GIANT();
237 return(error);
238}
239
240/*
241 * accept1()
242 * MPSAFE
243 */
244static int
245accept1(td, uap, compat)
246 struct thread *td;
247 register struct accept_args /* {
248 int s;
249 struct sockaddr * __restrict name;
250 socklen_t * __restrict anamelen;
251 } */ *uap;
252 int compat;
253{
254 struct filedesc *fdp;
255 struct file *nfp = NULL;
256 struct sockaddr *sa = NULL;
257 socklen_t namelen;
258 int error;
259 struct socket *head, *so;
260 int fd;
261 u_int fflag;
262 pid_t pgid;
263 int tmp;
264
265 fdp = td->td_proc->p_fd;
266 if (uap->name) {
267 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
268 if(error)
269 return (error);
270 if (namelen < 0)
271 return (EINVAL);
272 }
273 NET_LOCK_GIANT();
274 error = fgetsock(td, uap->s, &head, &fflag);
275 if (error)
276 goto done2;
277 if ((head->so_options & SO_ACCEPTCONN) == 0) {
278 error = EINVAL;
279 goto done;
280 }
281 error = falloc(td, &nfp, &fd);
282 if (error)
283 goto done;
284 ACCEPT_LOCK();
285 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
286 ACCEPT_UNLOCK();
287 error = EWOULDBLOCK;
288 goto noconnection;
289 }
290 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
291 if (head->so_state & SS_CANTRCVMORE) {
292 head->so_error = ECONNABORTED;
293 break;
294 }
295 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
296 "accept", 0);
297 if (error) {
298 ACCEPT_UNLOCK();
299 goto noconnection;
300 }
301 }
302 if (head->so_error) {
303 error = head->so_error;
304 head->so_error = 0;
305 ACCEPT_UNLOCK();
306 goto noconnection;
307 }
308 so = TAILQ_FIRST(&head->so_comp);
309 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
310 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
311
312 soref(so); /* file descriptor reference */
313
314 TAILQ_REMOVE(&head->so_comp, so, so_list);
315 head->so_qlen--;
316 so->so_qstate &= ~SQ_COMP;
317 so->so_head = NULL;
318
319 ACCEPT_UNLOCK();
320
321 /* An extra reference on `nfp' has been held for us by falloc(). */
322 td->td_retval[0] = fd;
323
324 /* connection has been removed from the listen queue */
325 KNOTE(&head->so_rcv.sb_sel.si_note, 0);
326
327 pgid = fgetown(&head->so_sigio);
328 if (pgid != 0)
329 fsetown(pgid, &so->so_sigio);
330
331 FILE_LOCK(nfp);
332 nfp->f_data = so; /* nfp has ref count from falloc */
333 nfp->f_flag = fflag;
334 nfp->f_ops = &socketops;
335 nfp->f_type = DTYPE_SOCKET;
336 FILE_UNLOCK(nfp);
337 /* Sync socket nonblocking/async state with file flags */
338 tmp = fflag & FNONBLOCK;
339 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
340 tmp = fflag & FASYNC;
341 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
342 sa = 0;
343 error = soaccept(so, &sa);
344 if (error) {
345 /*
346 * return a namelen of zero for older code which might
347 * ignore the return value from accept.
348 */
349 if (uap->name != NULL) {
350 namelen = 0;
351 (void) copyout(&namelen,
352 uap->anamelen, sizeof(*uap->anamelen));
353 }
354 goto noconnection;
355 }
356 if (sa == NULL) {
357 namelen = 0;
358 if (uap->name)
359 goto gotnoname;
360 error = 0;
361 goto done;
362 }
363 if (uap->name) {
364 /* check sa_len before it is destroyed */
365 if (namelen > sa->sa_len)
366 namelen = sa->sa_len;
367#ifdef COMPAT_OLDSOCK
368 if (compat)
369 ((struct osockaddr *)sa)->sa_family =
370 sa->sa_family;
371#endif
372 error = copyout(sa, uap->name, (u_int)namelen);
373 if (!error)
374gotnoname:
375 error = copyout(&namelen,
376 uap->anamelen, sizeof (*uap->anamelen));
377 }
378noconnection:
379 if (sa)
380 FREE(sa, M_SONAME);
381
382 /*
383 * close the new descriptor, assuming someone hasn't ripped it
384 * out from under us.
385 */
386 if (error) {
387 FILEDESC_LOCK(fdp);
388 if (fdp->fd_ofiles[fd] == nfp) {
389 fdp->fd_ofiles[fd] = NULL;
390 fdunused(fdp, fd);
391 FILEDESC_UNLOCK(fdp);
392 fdrop(nfp, td);
393 } else {
394 FILEDESC_UNLOCK(fdp);
395 }
396 }
397
398 /*
399 * Release explicitly held references before returning.
400 */
401done:
402 if (nfp != NULL)
403 fdrop(nfp, td);
404 fputsock(head);
405done2:
406 NET_UNLOCK_GIANT();
407 return (error);
408}
409
410/*
411 * MPSAFE (accept1() is MPSAFE)
412 */
413int
414accept(td, uap)
415 struct thread *td;
416 struct accept_args *uap;
417{
418
419 return (accept1(td, uap, 0));
420}
421
422#ifdef COMPAT_OLDSOCK
423/*
424 * MPSAFE (accept1() is MPSAFE)
425 */
426int
427oaccept(td, uap)
428 struct thread *td;
429 struct accept_args *uap;
430{
431
432 return (accept1(td, uap, 1));
433}
434#endif /* COMPAT_OLDSOCK */
435
436/*
437 * MPSAFE
438 */
439/* ARGSUSED */
440int
441connect(td, uap)
442 struct thread *td;
443 register struct connect_args /* {
444 int s;
445 caddr_t name;
446 int namelen;
447 } */ *uap;
448{
449 struct sockaddr *sa;
450 int error;
451
452 error = getsockaddr(&sa, uap->name, uap->namelen);
453 if (error)
454 return (error);
455
456 return (kern_connect(td, uap->s, sa));
457}
458
459
460int
461kern_connect(td, fd, sa)
462 struct thread *td;
463 int fd;
464 struct sockaddr *sa;
465{
466 struct socket *so;
467 int error, s;
468 int interrupted = 0;
469
470 NET_LOCK_GIANT();
471 if ((error = fgetsock(td, fd, &so, NULL)) != 0)
472 goto done2;
473 if (so->so_state & SS_ISCONNECTING) {
474 error = EALREADY;
475 goto done1;
476 }
477#ifdef MAC
478 error = mac_check_socket_connect(td->td_ucred, so, sa);
479 if (error)
480 goto bad;
481#endif
482 error = soconnect(so, sa, td);
483 if (error)
484 goto bad;
485 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
486 error = EINPROGRESS;
487 goto done1;
488 }
489 s = splnet();
490 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
491 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0);
492 if (error) {
493 if (error == EINTR || error == ERESTART)
494 interrupted = 1;
495 break;
496 }
497 }
498 if (error == 0) {
499 error = so->so_error;
500 so->so_error = 0;
501 }
502 splx(s);
503bad:
504 if (!interrupted)
505 so->so_state &= ~SS_ISCONNECTING;
506 if (error == ERESTART)
507 error = EINTR;
508done1:
509 fputsock(so);
510done2:
511 NET_UNLOCK_GIANT();
512 FREE(sa, M_SONAME);
513 return (error);
514}
515
516/*
517 * MPSAFE
518 */
519int
520socketpair(td, uap)
521 struct thread *td;
522 register struct socketpair_args /* {
523 int domain;
524 int type;
525 int protocol;
526 int *rsv;
527 } */ *uap;
528{
529 register struct filedesc *fdp = td->td_proc->p_fd;
530 struct file *fp1, *fp2;
531 struct socket *so1, *so2;
532 int fd, error, sv[2];
533
534 NET_LOCK_GIANT();
535 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
536 td->td_ucred, td);
537 if (error)
538 goto done2;
539 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
540 td->td_ucred, td);
541 if (error)
542 goto free1;
543 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
544 error = falloc(td, &fp1, &fd);
545 if (error)
546 goto free2;
547 sv[0] = fd;
548 fp1->f_data = so1; /* so1 already has ref count */
549 error = falloc(td, &fp2, &fd);
550 if (error)
551 goto free3;
552 fp2->f_data = so2; /* so2 already has ref count */
553 sv[1] = fd;
554 error = soconnect2(so1, so2);
555 if (error)
556 goto free4;
557 if (uap->type == SOCK_DGRAM) {
558 /*
559 * Datagram socket connection is asymmetric.
560 */
561 error = soconnect2(so2, so1);
562 if (error)
563 goto free4;
564 }
565 FILE_LOCK(fp1);
566 fp1->f_flag = FREAD|FWRITE;
567 fp1->f_ops = &socketops;
568 fp1->f_type = DTYPE_SOCKET;
569 FILE_UNLOCK(fp1);
570 FILE_LOCK(fp2);
571 fp2->f_flag = FREAD|FWRITE;
572 fp2->f_ops = &socketops;
573 fp2->f_type = DTYPE_SOCKET;
574 FILE_UNLOCK(fp2);
575 error = copyout(sv, uap->rsv, 2 * sizeof (int));
576 fdrop(fp1, td);
577 fdrop(fp2, td);
578 goto done2;
579free4:
580 FILEDESC_LOCK(fdp);
581 if (fdp->fd_ofiles[sv[1]] == fp2) {
582 fdp->fd_ofiles[sv[1]] = NULL;
583 fdunused(fdp, sv[1]);
584 FILEDESC_UNLOCK(fdp);
585 fdrop(fp2, td);
586 } else {
587 FILEDESC_UNLOCK(fdp);
588 }
589 fdrop(fp2, td);
590free3:
591 FILEDESC_LOCK(fdp);
592 if (fdp->fd_ofiles[sv[0]] == fp1) {
593 fdp->fd_ofiles[sv[0]] = NULL;
594 fdunused(fdp, sv[0]);
595 FILEDESC_UNLOCK(fdp);
596 fdrop(fp1, td);
597 } else {
598 FILEDESC_UNLOCK(fdp);
599 }
600 fdrop(fp1, td);
601free2:
602 (void)soclose(so2);
603free1:
604 (void)soclose(so1);
605done2:
606 NET_UNLOCK_GIANT();
607 return (error);
608}
609
610static int
611sendit(td, s, mp, flags)
612 register struct thread *td;
613 int s;
614 register struct msghdr *mp;
615 int flags;
616{
617 struct mbuf *control;
618 struct sockaddr *to;
619 int error;
620
621 if (mp->msg_name != NULL) {
622 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
623 if (error) {
624 to = NULL;
625 goto bad;
626 }
627 mp->msg_name = to;
628 } else {
629 to = NULL;
630 }
631
632 if (mp->msg_control) {
633 if (mp->msg_controllen < sizeof(struct cmsghdr)
634#ifdef COMPAT_OLDSOCK
635 && mp->msg_flags != MSG_COMPAT
636#endif
637 ) {
638 error = EINVAL;
639 goto bad;
640 }
641 error = sockargs(&control, mp->msg_control,
642 mp->msg_controllen, MT_CONTROL);
643 if (error)
644 goto bad;
645#ifdef COMPAT_OLDSOCK
646 if (mp->msg_flags == MSG_COMPAT) {
647 register struct cmsghdr *cm;
648
649 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
650 if (control == 0) {
651 error = ENOBUFS;
652 goto bad;
653 } else {
654 cm = mtod(control, struct cmsghdr *);
655 cm->cmsg_len = control->m_len;
656 cm->cmsg_level = SOL_SOCKET;
657 cm->cmsg_type = SCM_RIGHTS;
658 }
659 }
660#endif
661 } else {
662 control = NULL;
663 }
664
665 error = kern_sendit(td, s, mp, flags, control);
666
667bad:
668 if (to)
669 FREE(to, M_SONAME);
670 return (error);
671}
672
673int
674kern_sendit(td, s, mp, flags, control)
675 struct thread *td;
676 int s;
677 struct msghdr *mp;
678 int flags;
679 struct mbuf *control;
680{
681 struct uio auio;
682 struct iovec *iov;
683 struct socket *so;
684 int i;
685 int len, error;
686#ifdef KTRACE
687 struct iovec *ktriov = NULL;
688 struct uio ktruio;
689 int iovlen;
690#endif
691
692 NET_LOCK_GIANT();
693 if ((error = fgetsock(td, s, &so, NULL)) != 0)
694 goto bad2;
695
696#ifdef MAC
697 error = mac_check_socket_send(td->td_ucred, so);
698 if (error)
699 goto bad;
700#endif
701
702 auio.uio_iov = mp->msg_iov;
703 auio.uio_iovcnt = mp->msg_iovlen;
704 auio.uio_segflg = UIO_USERSPACE;
705 auio.uio_rw = UIO_WRITE;
706 auio.uio_td = td;
707 auio.uio_offset = 0; /* XXX */
708 auio.uio_resid = 0;
709 iov = mp->msg_iov;
710 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
711 if ((auio.uio_resid += iov->iov_len) < 0) {
712 error = EINVAL;
713 goto bad;
714 }
715 }
716#ifdef KTRACE
717 if (KTRPOINT(td, KTR_GENIO)) {
718 iovlen = auio.uio_iovcnt * sizeof (struct iovec);
719 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
720 bcopy(auio.uio_iov, ktriov, iovlen);
721 ktruio = auio;
722 }
723#endif
724 len = auio.uio_resid;
725 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
726 0, control, flags, td);
727 if (error) {
728 if (auio.uio_resid != len && (error == ERESTART ||
729 error == EINTR || error == EWOULDBLOCK))
730 error = 0;
731 /* Generation of SIGPIPE can be controlled per socket */
732 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
733 PROC_LOCK(td->td_proc);
734 psignal(td->td_proc, SIGPIPE);
735 PROC_UNLOCK(td->td_proc);
736 }
737 }
738 if (error == 0)
739 td->td_retval[0] = len - auio.uio_resid;
740#ifdef KTRACE
741 if (ktriov != NULL) {
742 if (error == 0) {
743 ktruio.uio_iov = ktriov;
744 ktruio.uio_resid = td->td_retval[0];
745 ktrgenio(s, UIO_WRITE, &ktruio, error);
746 }
747 FREE(ktriov, M_TEMP);
748 }
749#endif
750bad:
751 fputsock(so);
752bad2:
753 NET_UNLOCK_GIANT();
754 return (error);
755}
756
757/*
758 * MPSAFE
759 */
760int
761sendto(td, uap)
762 struct thread *td;
763 register struct sendto_args /* {
764 int s;
765 caddr_t buf;
766 size_t len;
767 int flags;
768 caddr_t to;
769 int tolen;
770 } */ *uap;
771{
772 struct msghdr msg;
773 struct iovec aiov;
774 int error;
775
776 msg.msg_name = uap->to;
777 msg.msg_namelen = uap->tolen;
778 msg.msg_iov = &aiov;
779 msg.msg_iovlen = 1;
780 msg.msg_control = 0;
781#ifdef COMPAT_OLDSOCK
782 msg.msg_flags = 0;
783#endif
784 aiov.iov_base = uap->buf;
785 aiov.iov_len = uap->len;
786 error = sendit(td, uap->s, &msg, uap->flags);
787 return (error);
788}
789
790#ifdef COMPAT_OLDSOCK
791/*
792 * MPSAFE
793 */
794int
795osend(td, uap)
796 struct thread *td;
797 register struct osend_args /* {
798 int s;
799 caddr_t buf;
800 int len;
801 int flags;
802 } */ *uap;
803{
804 struct msghdr msg;
805 struct iovec aiov;
806 int error;
807
808 msg.msg_name = 0;
809 msg.msg_namelen = 0;
810 msg.msg_iov = &aiov;
811 msg.msg_iovlen = 1;
812 aiov.iov_base = uap->buf;
813 aiov.iov_len = uap->len;
814 msg.msg_control = 0;
815 msg.msg_flags = 0;
816 error = sendit(td, uap->s, &msg, uap->flags);
817 return (error);
818}
819
820/*
821 * MPSAFE
822 */
823int
824osendmsg(td, uap)
825 struct thread *td;
826 register struct osendmsg_args /* {
827 int s;
828 caddr_t msg;
829 int flags;
830 } */ *uap;
831{
832 struct msghdr msg;
833 struct iovec aiov[UIO_SMALLIOV], *iov;
834 int error;
835
836 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
837 if (error)
838 goto done2;
839 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
840 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
841 error = EMSGSIZE;
842 goto done2;
843 }
844 MALLOC(iov, struct iovec *,
845 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
846 M_WAITOK);
847 } else {
848 iov = aiov;
849 }
850 error = copyin(msg.msg_iov, iov,
851 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
852 if (error)
853 goto done;
854 msg.msg_flags = MSG_COMPAT;
855 msg.msg_iov = iov;
856 error = sendit(td, uap->s, &msg, uap->flags);
857done:
858 if (iov != aiov)
859 FREE(iov, M_IOV);
860done2:
861 return (error);
862}
863#endif
864
865/*
866 * MPSAFE
867 */
868int
869sendmsg(td, uap)
870 struct thread *td;
871 register struct sendmsg_args /* {
872 int s;
873 caddr_t msg;
874 int flags;
875 } */ *uap;
876{
877 struct msghdr msg;
878 struct iovec aiov[UIO_SMALLIOV], *iov;
879 int error;
880
881 error = copyin(uap->msg, &msg, sizeof (msg));
882 if (error)
883 goto done2;
884 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
885 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
886 error = EMSGSIZE;
887 goto done2;
888 }
889 MALLOC(iov, struct iovec *,
890 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
891 M_WAITOK);
892 } else {
893 iov = aiov;
894 }
895 if (msg.msg_iovlen &&
896 (error = copyin(msg.msg_iov, iov,
897 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
898 goto done;
899 msg.msg_iov = iov;
900#ifdef COMPAT_OLDSOCK
901 msg.msg_flags = 0;
902#endif
903 error = sendit(td, uap->s, &msg, uap->flags);
904done:
905 if (iov != aiov)
906 FREE(iov, M_IOV);
907done2:
908 return (error);
909}
910
911static int
912recvit(td, s, mp, namelenp)
913 register struct thread *td;
914 int s;
915 register struct msghdr *mp;
916 void *namelenp;
917{
918 struct uio auio;
919 register struct iovec *iov;
920 register int i;
921 socklen_t len;
922 int error;
923 struct mbuf *m, *control = 0;
924 caddr_t ctlbuf;
925 struct socket *so;
926 struct sockaddr *fromsa = 0;
927#ifdef KTRACE
928 struct iovec *ktriov = NULL;
929 struct uio ktruio;
930 int iovlen;
931#endif
932
933 NET_LOCK_GIANT();
934 if ((error = fgetsock(td, s, &so, NULL)) != 0) {
935 NET_UNLOCK_GIANT();
936 return (error);
937 }
938
939#ifdef MAC
940 error = mac_check_socket_receive(td->td_ucred, so);
941 if (error) {
942 fputsock(so);
943 NET_UNLOCK_GIANT();
944 return (error);
945 }
946#endif
947
948 auio.uio_iov = mp->msg_iov;
949 auio.uio_iovcnt = mp->msg_iovlen;
950 auio.uio_segflg = UIO_USERSPACE;
951 auio.uio_rw = UIO_READ;
952 auio.uio_td = td;
953 auio.uio_offset = 0; /* XXX */
954 auio.uio_resid = 0;
955 iov = mp->msg_iov;
956 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
957 if ((auio.uio_resid += iov->iov_len) < 0) {
958 fputsock(so);
959 NET_UNLOCK_GIANT();
960 return (EINVAL);
961 }
962 }
963#ifdef KTRACE
964 if (KTRPOINT(td, KTR_GENIO)) {
965 iovlen = auio.uio_iovcnt * sizeof (struct iovec);
966 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
967 bcopy(auio.uio_iov, ktriov, iovlen);
968 ktruio = auio;
969 }
970#endif
971 len = auio.uio_resid;
972 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
973 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
974 &mp->msg_flags);
975 if (error) {
976 if (auio.uio_resid != (int)len && (error == ERESTART ||
977 error == EINTR || error == EWOULDBLOCK))
978 error = 0;
979 }
980#ifdef KTRACE
981 if (ktriov != NULL) {
982 if (error == 0) {
983 ktruio.uio_iov = ktriov;
984 ktruio.uio_resid = (int)len - auio.uio_resid;
985 ktrgenio(s, UIO_READ, &ktruio, error);
986 }
987 FREE(ktriov, M_TEMP);
988 }
989#endif
990 if (error)
991 goto out;
992 td->td_retval[0] = (int)len - auio.uio_resid;
993 if (mp->msg_name) {
994 len = mp->msg_namelen;
995 if (len <= 0 || fromsa == 0)
996 len = 0;
997 else {
998 /* save sa_len before it is destroyed by MSG_COMPAT */
999 len = MIN(len, fromsa->sa_len);
1000#ifdef COMPAT_OLDSOCK
1001 if (mp->msg_flags & MSG_COMPAT)
1002 ((struct osockaddr *)fromsa)->sa_family =
1003 fromsa->sa_family;
1004#endif
1005 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1006 if (error)
1007 goto out;
1008 }
1009 mp->msg_namelen = len;
1010 if (namelenp &&
1011 (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
1012#ifdef COMPAT_OLDSOCK
1013 if (mp->msg_flags & MSG_COMPAT)
1014 error = 0; /* old recvfrom didn't check */
1015 else
1016#endif
1017 goto out;
1018 }
1019 }
1020 if (mp->msg_control) {
1021#ifdef COMPAT_OLDSOCK
1022 /*
1023 * We assume that old recvmsg calls won't receive access
1024 * rights and other control info, esp. as control info
1025 * is always optional and those options didn't exist in 4.3.
1026 * If we receive rights, trim the cmsghdr; anything else
1027 * is tossed.
1028 */
1029 if (control && mp->msg_flags & MSG_COMPAT) {
1030 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1031 SOL_SOCKET ||
1032 mtod(control, struct cmsghdr *)->cmsg_type !=
1033 SCM_RIGHTS) {
1034 mp->msg_controllen = 0;
1035 goto out;
1036 }
1037 control->m_len -= sizeof (struct cmsghdr);
1038 control->m_data += sizeof (struct cmsghdr);
1039 }
1040#endif
1041 len = mp->msg_controllen;
1042 m = control;
1043 mp->msg_controllen = 0;
1044 ctlbuf = mp->msg_control;
1045
1046 while (m && len > 0) {
1047 unsigned int tocopy;
1048
1049 if (len >= m->m_len)
1050 tocopy = m->m_len;
1051 else {
1052 mp->msg_flags |= MSG_CTRUNC;
1053 tocopy = len;
1054 }
1055
1056 if ((error = copyout(mtod(m, caddr_t),
1057 ctlbuf, tocopy)) != 0)
1058 goto out;
1059
1060 ctlbuf += tocopy;
1061 len -= tocopy;
1062 m = m->m_next;
1063 }
1064 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1065 }
1066out:
1067 fputsock(so);
1068 NET_UNLOCK_GIANT();
1069 if (fromsa)
1070 FREE(fromsa, M_SONAME);
1071 if (control)
1072 m_freem(control);
1073 return (error);
1074}
1075
1076/*
1077 * MPSAFE
1078 */
1079int
1080recvfrom(td, uap)
1081 struct thread *td;
1082 register struct recvfrom_args /* {
1083 int s;
1084 caddr_t buf;
1085 size_t len;
1086 int flags;
1087 struct sockaddr * __restrict from;
1088 socklen_t * __restrict fromlenaddr;
1089 } */ *uap;
1090{
1091 struct msghdr msg;
1092 struct iovec aiov;
1093 int error;
1094
1095 if (uap->fromlenaddr) {
1096 error = copyin(uap->fromlenaddr,
1097 &msg.msg_namelen, sizeof (msg.msg_namelen));
1098 if (error)
1099 goto done2;
1100 } else {
1101 msg.msg_namelen = 0;
1102 }
1103 msg.msg_name = uap->from;
1104 msg.msg_iov = &aiov;
1105 msg.msg_iovlen = 1;
1106 aiov.iov_base = uap->buf;
1107 aiov.iov_len = uap->len;
1108 msg.msg_control = 0;
1109 msg.msg_flags = uap->flags;
1110 error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1111done2:
1112 return(error);
1113}
1114
1115#ifdef COMPAT_OLDSOCK
1116/*
1117 * MPSAFE
1118 */
1119int
1120orecvfrom(td, uap)
1121 struct thread *td;
1122 struct recvfrom_args *uap;
1123{
1124
1125 uap->flags |= MSG_COMPAT;
1126 return (recvfrom(td, uap));
1127}
1128#endif
1129
1130
1131#ifdef COMPAT_OLDSOCK
1132/*
1133 * MPSAFE
1134 */
1135int
1136orecv(td, uap)
1137 struct thread *td;
1138 register struct orecv_args /* {
1139 int s;
1140 caddr_t buf;
1141 int len;
1142 int flags;
1143 } */ *uap;
1144{
1145 struct msghdr msg;
1146 struct iovec aiov;
1147 int error;
1148
1149 msg.msg_name = 0;
1150 msg.msg_namelen = 0;
1151 msg.msg_iov = &aiov;
1152 msg.msg_iovlen = 1;
1153 aiov.iov_base = uap->buf;
1154 aiov.iov_len = uap->len;
1155 msg.msg_control = 0;
1156 msg.msg_flags = uap->flags;
1157 error = recvit(td, uap->s, &msg, NULL);
1158 return (error);
1159}
1160
1161/*
1162 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1163 * overlays the new one, missing only the flags, and with the (old) access
1164 * rights where the control fields are now.
1165 *
1166 * MPSAFE
1167 */
1168int
1169orecvmsg(td, uap)
1170 struct thread *td;
1171 register struct orecvmsg_args /* {
1172 int s;
1173 struct omsghdr *msg;
1174 int flags;
1175 } */ *uap;
1176{
1177 struct msghdr msg;
1178 struct iovec aiov[UIO_SMALLIOV], *iov;
1179 int error;
1180
1181 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1182 if (error)
1183 return (error);
1184
1185 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1186 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1187 error = EMSGSIZE;
1188 goto done2;
1189 }
1190 MALLOC(iov, struct iovec *,
1191 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1192 M_WAITOK);
1193 } else {
1194 iov = aiov;
1195 }
1196 msg.msg_flags = uap->flags | MSG_COMPAT;
1197 error = copyin(msg.msg_iov, iov,
1198 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1199 if (error)
1200 goto done;
1201 msg.msg_iov = iov;
1202 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1203
1204 if (msg.msg_controllen && error == 0)
1205 error = copyout(&msg.msg_controllen,
1206 &uap->msg->msg_accrightslen, sizeof (int));
1207done:
1208 if (iov != aiov)
1209 FREE(iov, M_IOV);
1210done2:
1211 return (error);
1212}
1213#endif
1214
1215/*
1216 * MPSAFE
1217 */
1218int
1219recvmsg(td, uap)
1220 struct thread *td;
1221 register struct recvmsg_args /* {
1222 int s;
1223 struct msghdr *msg;
1224 int flags;
1225 } */ *uap;
1226{
1227 struct msghdr msg;
1228 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1229 register int error;
1230
1231 error = copyin(uap->msg, &msg, sizeof (msg));
1232 if (error)
1233 goto done2;
1234 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1235 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1236 error = EMSGSIZE;
1237 goto done2;
1238 }
1239 MALLOC(iov, struct iovec *,
1240 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1241 M_WAITOK);
1242 } else {
1243 iov = aiov;
1244 }
1245#ifdef COMPAT_OLDSOCK
1246 msg.msg_flags = uap->flags &~ MSG_COMPAT;
1247#else
1248 msg.msg_flags = uap->flags;
1249#endif
1250 uiov = msg.msg_iov;
1251 msg.msg_iov = iov;
1252 error = copyin(uiov, iov,
1253 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1254 if (error)
1255 goto done;
1256 error = recvit(td, uap->s, &msg, NULL);
1257 if (!error) {
1258 msg.msg_iov = uiov;
1259 error = copyout(&msg, uap->msg, sizeof(msg));
1260 }
1261done:
1262 if (iov != aiov)
1263 FREE(iov, M_IOV);
1264done2:
1265 return (error);
1266}
1267
1268/*
1269 * MPSAFE
1270 */
1271/* ARGSUSED */
1272int
1273shutdown(td, uap)
1274 struct thread *td;
1275 register struct shutdown_args /* {
1276 int s;
1277 int how;
1278 } */ *uap;
1279{
1280 struct socket *so;
1281 int error;
1282
1283 NET_LOCK_GIANT();
1284 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1285 error = soshutdown(so, uap->how);
1286 fputsock(so);
1287 }
1288 NET_UNLOCK_GIANT();
1289 return(error);
1290}
1291
1292/*
1293 * MPSAFE
1294 */
1295/* ARGSUSED */
1296int
1297setsockopt(td, uap)
1298 struct thread *td;
1299 register struct setsockopt_args /* {
1300 int s;
1301 int level;
1302 int name;
1303 caddr_t val;
1304 int valsize;
1305 } */ *uap;
1306{
1307 struct socket *so;
1308 struct sockopt sopt;
1309 int error;
1310
1311 if (uap->val == 0 && uap->valsize != 0)
1312 return (EFAULT);
1313 if (uap->valsize < 0)
1314 return (EINVAL);
1315
1316 NET_LOCK_GIANT();
1317 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1318 sopt.sopt_dir = SOPT_SET;
1319 sopt.sopt_level = uap->level;
1320 sopt.sopt_name = uap->name;
1321 sopt.sopt_val = uap->val;
1322 sopt.sopt_valsize = uap->valsize;
1323 sopt.sopt_td = td;
1324 error = sosetopt(so, &sopt);
1325 fputsock(so);
1326 }
1327 NET_UNLOCK_GIANT();
1328 return(error);
1329}
1330
1331/*
1332 * MPSAFE
1333 */
1334/* ARGSUSED */
1335int
1336getsockopt(td, uap)
1337 struct thread *td;
1338 register struct getsockopt_args /* {
1339 int s;
1340 int level;
1341 int name;
1342 void * __restrict val;
1343 socklen_t * __restrict avalsize;
1344 } */ *uap;
1345{
1346 socklen_t valsize;
1347 int error;
1348 struct socket *so;
1349 struct sockopt sopt;
1350
1351 NET_LOCK_GIANT();
1352 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1353 goto done2;
1354 if (uap->val) {
1355 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1356 if (error)
1357 goto done1;
1358 if (valsize < 0) {
1359 error = EINVAL;
1360 goto done1;
1361 }
1362 } else {
1363 valsize = 0;
1364 }
1365
1366 sopt.sopt_dir = SOPT_GET;
1367 sopt.sopt_level = uap->level;
1368 sopt.sopt_name = uap->name;
1369 sopt.sopt_val = uap->val;
1370 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1371 sopt.sopt_td = td;
1372
1373 error = sogetopt(so, &sopt);
1374 if (error == 0) {
1375 valsize = sopt.sopt_valsize;
1376 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1377 }
1378done1:
1379 fputsock(so);
1380done2:
1381 NET_UNLOCK_GIANT();
1382 return (error);
1383}
1384
1385/*
1386 * getsockname1() - Get socket name.
1387 *
1388 * MPSAFE
1389 */
1390/* ARGSUSED */
1391static int
1392getsockname1(td, uap, compat)
1393 struct thread *td;
1394 register struct getsockname_args /* {
1395 int fdes;
1396 struct sockaddr * __restrict asa;
1397 socklen_t * __restrict alen;
1398 } */ *uap;
1399 int compat;
1400{
1401 struct socket *so;
1402 struct sockaddr *sa;
1403 socklen_t len;
1404 int error;
1405
1406 NET_LOCK_GIANT();
1407 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1408 goto done2;
1409 error = copyin(uap->alen, &len, sizeof (len));
1410 if (error)
1411 goto done1;
1412 if (len < 0) {
1413 error = EINVAL;
1414 goto done1;
1415 }
1416 sa = 0;
1417 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1418 if (error)
1419 goto bad;
1420 if (sa == 0) {
1421 len = 0;
1422 goto gotnothing;
1423 }
1424
1425 len = MIN(len, sa->sa_len);
1426#ifdef COMPAT_OLDSOCK
1427 if (compat)
1428 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1429#endif
1430 error = copyout(sa, uap->asa, (u_int)len);
1431 if (error == 0)
1432gotnothing:
1433 error = copyout(&len, uap->alen, sizeof (len));
1434bad:
1435 if (sa)
1436 FREE(sa, M_SONAME);
1437done1:
1438 fputsock(so);
1439done2:
1440 NET_UNLOCK_GIANT();
1441 return (error);
1442}
1443
1444/*
1445 * MPSAFE
1446 */
1447int
1448getsockname(td, uap)
1449 struct thread *td;
1450 struct getsockname_args *uap;
1451{
1452
1453 return (getsockname1(td, uap, 0));
1454}
1455
1456#ifdef COMPAT_OLDSOCK
1457/*
1458 * MPSAFE
1459 */
1460int
1461ogetsockname(td, uap)
1462 struct thread *td;
1463 struct getsockname_args *uap;
1464{
1465
1466 return (getsockname1(td, uap, 1));
1467}
1468#endif /* COMPAT_OLDSOCK */
1469
1470/*
1471 * getpeername1() - Get name of peer for connected socket.
1472 *
1473 * MPSAFE
1474 */
1475/* ARGSUSED */
1476static int
1477getpeername1(td, uap, compat)
1478 struct thread *td;
1479 register struct getpeername_args /* {
1480 int fdes;
1481 struct sockaddr * __restrict asa;
1482 socklen_t * __restrict alen;
1483 } */ *uap;
1484 int compat;
1485{
1486 struct socket *so;
1487 struct sockaddr *sa;
1488 socklen_t len;
1489 int error;
1490
1491 NET_LOCK_GIANT();
1492 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1493 goto done2;
1494 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1495 error = ENOTCONN;
1496 goto done1;
1497 }
1498 error = copyin(uap->alen, &len, sizeof (len));
1499 if (error)
1500 goto done1;
1501 if (len < 0) {
1502 error = EINVAL;
1503 goto done1;
1504 }
1505 sa = 0;
1506 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1507 if (error)
1508 goto bad;
1509 if (sa == 0) {
1510 len = 0;
1511 goto gotnothing;
1512 }
1513 len = MIN(len, sa->sa_len);
1514#ifdef COMPAT_OLDSOCK
1515 if (compat)
1516 ((struct osockaddr *)sa)->sa_family =
1517 sa->sa_family;
1518#endif
1519 error = copyout(sa, uap->asa, (u_int)len);
1520 if (error)
1521 goto bad;
1522gotnothing:
1523 error = copyout(&len, uap->alen, sizeof (len));
1524bad:
1525 if (sa)
1526 FREE(sa, M_SONAME);
1527done1:
1528 fputsock(so);
1529done2:
1530 NET_UNLOCK_GIANT();
1531 return (error);
1532}
1533
1534/*
1535 * MPSAFE
1536 */
1537int
1538getpeername(td, uap)
1539 struct thread *td;
1540 struct getpeername_args *uap;
1541{
1542
1543 return (getpeername1(td, uap, 0));
1544}
1545
1546#ifdef COMPAT_OLDSOCK
1547/*
1548 * MPSAFE
1549 */
1550int
1551ogetpeername(td, uap)
1552 struct thread *td;
1553 struct ogetpeername_args *uap;
1554{
1555
1556 /* XXX uap should have type `getpeername_args *' to begin with. */
1557 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1558}
1559#endif /* COMPAT_OLDSOCK */
1560
1561int
1562sockargs(mp, buf, buflen, type)
1563 struct mbuf **mp;
1564 caddr_t buf;
1565 int buflen, type;
1566{
1567 register struct sockaddr *sa;
1568 register struct mbuf *m;
1569 int error;
1570
1571 if ((u_int)buflen > MLEN) {
1572#ifdef COMPAT_OLDSOCK
1573 if (type == MT_SONAME && (u_int)buflen <= 112)
1574 buflen = MLEN; /* unix domain compat. hack */
1575 else
1576#endif
1577 if ((u_int)buflen > MCLBYTES)
1578 return (EINVAL);
1579 }
1580 m = m_get(M_TRYWAIT, type);
1581 if (m == NULL)
1582 return (ENOBUFS);
1583 if ((u_int)buflen > MLEN) {
1584 MCLGET(m, M_TRYWAIT);
1585 if ((m->m_flags & M_EXT) == 0) {
1586 m_free(m);
1587 return (ENOBUFS);
1588 }
1589 }
1590 m->m_len = buflen;
1591 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1592 if (error)
1593 (void) m_free(m);
1594 else {
1595 *mp = m;
1596 if (type == MT_SONAME) {
1597 sa = mtod(m, struct sockaddr *);
1598
1599#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1600 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1601 sa->sa_family = sa->sa_len;
1602#endif
1603 sa->sa_len = buflen;
1604 }
1605 }
1606 return (error);
1607}
1608
1609int
1610getsockaddr(namp, uaddr, len)
1611 struct sockaddr **namp;
1612 caddr_t uaddr;
1613 size_t len;
1614{
1615 struct sockaddr *sa;
1616 int error;
1617
1618 if (len > SOCK_MAXADDRLEN)
1619 return (ENAMETOOLONG);
1620 if (len < offsetof(struct sockaddr, sa_data[0]))
1621 return (EINVAL);
1622 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1623 error = copyin(uaddr, sa, len);
1624 if (error) {
1625 FREE(sa, M_SONAME);
1626 } else {
1627#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1628 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1629 sa->sa_family = sa->sa_len;
1630#endif
1631 sa->sa_len = len;
1632 *namp = sa;
1633 }
1634 return (error);
1635}
1636
1637/*
1638 * Detach mapped page and release resources back to the system.
1639 */
1640void
1641sf_buf_mext(void *addr, void *args)
1642{
1643 vm_page_t m;
1644
1645 m = sf_buf_page(args);
1646 sf_buf_free(args);
1647 vm_page_lock_queues();
1648 vm_page_unwire(m, 0);
1649 /*
1650 * Check for the object going away on us. This can
1651 * happen since we don't hold a reference to it.
1652 * If so, we're responsible for freeing the page.
1653 */
1654 if (m->wire_count == 0 && m->object == NULL)
1655 vm_page_free(m);
1656 vm_page_unlock_queues();
1657}
1658
1659/*
1660 * sendfile(2)
1661 *
1662 * MPSAFE
1663 *
1664 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1665 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1666 *
1667 * Send a file specified by 'fd' and starting at 'offset' to a socket
1668 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1669 * nbytes == 0. Optionally add a header and/or trailer to the socket
1670 * output. If specified, write the total number of bytes sent into *sbytes.
1671 *
1672 */
1673int
1674sendfile(struct thread *td, struct sendfile_args *uap)
1675{
1676
1677 return (do_sendfile(td, uap, 0));
1678}
1679
1680#ifdef COMPAT_FREEBSD4
1681int
1682freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1683{
1684 struct sendfile_args args;
1685
1686 args.fd = uap->fd;
1687 args.s = uap->s;
1688 args.offset = uap->offset;
1689 args.nbytes = uap->nbytes;
1690 args.hdtr = uap->hdtr;
1691 args.sbytes = uap->sbytes;
1692 args.flags = uap->flags;
1693
1694 return (do_sendfile(td, &args, 1));
1695}
1696#endif /* COMPAT_FREEBSD4 */
1697
1698static int
1699do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1700{
1701 struct vnode *vp;
1702 struct vm_object *obj;
1703 struct socket *so = NULL;
1704 struct mbuf *m, *m_header = NULL;
1705 struct sf_buf *sf;
1706 struct vm_page *pg;
1707 struct writev_args nuap;
1708 struct sf_hdtr hdtr;
1709 struct uio hdr_uio;
1710 off_t off, xfsize, hdtr_size, sbytes = 0;
1711 int error, s, headersize = 0, headersent = 0;
1712 struct iovec *hdr_iov = NULL;
1713
1714 mtx_lock(&Giant);
1715
1716 hdtr_size = 0;
1717
1718 /*
1719 * The descriptor must be a regular file and have a backing VM object.
1720 */
1721 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1722 goto done;
1723 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1724 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1725 error = EINVAL;
1726 VOP_UNLOCK(vp, 0, td);
1727 goto done;
1728 }
1729 VOP_UNLOCK(vp, 0, td);
1730 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1731 goto done;
1732 if (so->so_type != SOCK_STREAM) {
1733 error = EINVAL;
1734 goto done;
1735 }
1736 if ((so->so_state & SS_ISCONNECTED) == 0) {
1737 error = ENOTCONN;
1738 goto done;
1739 }
1740 if (uap->offset < 0) {
1741 error = EINVAL;
1742 goto done;
1743 }
1744
1745#ifdef MAC
1746 error = mac_check_socket_send(td->td_ucred, so);
1747 if (error)
1748 goto done;
1749#endif
1750
1751 /*
1752 * If specified, get the pointer to the sf_hdtr struct for
1753 * any headers/trailers.
1754 */
1755 if (uap->hdtr != NULL) {
1756 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1757 if (error)
1758 goto done;
1759 /*
1760 * Send any headers.
1761 */
1762 if (hdtr.headers != NULL) {
1763 hdr_uio.uio_td = td;
1764 hdr_uio.uio_rw = UIO_WRITE;
1765 error = uiofromiov(hdtr.headers, hdtr.hdr_cnt,
1766 &hdr_uio);
1767 if (error)
1768 goto done;
1769 /* Cache hdr_iov, m_uiotombuf may change it. */
1770 hdr_iov = hdr_uio.uio_iov;
1771 if (hdr_uio.uio_resid > 0) {
1772 m_header = m_uiotombuf(&hdr_uio, M_DONTWAIT, 0);
1773 if (m_header == NULL)
1774 goto done;
1775 headersize = m_header->m_pkthdr.len;
1776 if (compat)
1777 sbytes += headersize;
1778 }
1779 }
1780 }
1781
1782 /*
1783 * Protect against multiple writers to the socket.
1784 */
1785 (void) sblock(&so->so_snd, M_WAITOK);
1786
1787 /*
1788 * Loop through the pages in the file, starting with the requested
1789 * offset. Get a file page (do I/O if necessary), map the file page
1790 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1791 * it on the socket.
1792 */
1793 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1794 vm_pindex_t pindex;
1795 vm_offset_t pgoff;
1796
1797 pindex = OFF_TO_IDX(off);
1798 VM_OBJECT_LOCK(obj);
1799retry_lookup:
1800 /*
1801 * Calculate the amount to transfer. Not to exceed a page,
1802 * the EOF, or the passed in nbytes.
1803 */
1804 xfsize = obj->un_pager.vnp.vnp_size - off;
1805 VM_OBJECT_UNLOCK(obj);
1806 if (xfsize > PAGE_SIZE)
1807 xfsize = PAGE_SIZE;
1808 pgoff = (vm_offset_t)(off & PAGE_MASK);
1809 if (PAGE_SIZE - pgoff < xfsize)
1810 xfsize = PAGE_SIZE - pgoff;
1811 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1812 xfsize = uap->nbytes - sbytes;
1813 if (xfsize <= 0) {
1814 if (m_header != NULL) {
1815 m = m_header;
1816 m_header = NULL;
1817 goto retry_space;
1818 } else
1819 break;
1820 }
1821 /*
1822 * Optimize the non-blocking case by looking at the socket space
1823 * before going to the extra work of constituting the sf_buf.
1824 */
1825 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1826 if (so->so_state & SS_CANTSENDMORE)
1827 error = EPIPE;
1828 else
1829 error = EAGAIN;
1830 sbunlock(&so->so_snd);
1831 goto done;
1832 }
1833 VM_OBJECT_LOCK(obj);
1834 /*
1835 * Attempt to look up the page.
1836 *
1837 * Allocate if not found
1838 *
1839 * Wait and loop if busy.
1840 */
1841 pg = vm_page_lookup(obj, pindex);
1842
1843 if (pg == NULL) {
1844 pg = vm_page_alloc(obj, pindex,
1845 VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1846 if (pg == NULL) {
1847 VM_OBJECT_UNLOCK(obj);
1848 VM_WAIT;
1849 VM_OBJECT_LOCK(obj);
1850 goto retry_lookup;
1851 }
1852 vm_page_lock_queues();
1853 vm_page_wakeup(pg);
1854 } else {
1855 vm_page_lock_queues();
1856 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1857 goto retry_lookup;
1858 /*
1859 * Wire the page so it does not get ripped out from
1860 * under us.
1861 */
1862 vm_page_wire(pg);
1863 }
1864
1865 /*
1866 * If page is not valid for what we need, initiate I/O
1867 */
1868
1869 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
1870 VM_OBJECT_UNLOCK(obj);
1871 } else if (uap->flags & SF_NODISKIO) {
1872 error = EBUSY;
1873 } else {
1874 int bsize, resid;
1875
1876 /*
1877 * Ensure that our page is still around when the I/O
1878 * completes.
1879 */
1880 vm_page_io_start(pg);
1881 vm_page_unlock_queues();
1882 VM_OBJECT_UNLOCK(obj);
1883
1884 /*
1885 * Get the page from backing store.
1886 */
1887 bsize = vp->v_mount->mnt_stat.f_iosize;
1888 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1889 /*
1890 * XXXMAC: Because we don't have fp->f_cred here,
1891 * we pass in NOCRED. This is probably wrong, but
1892 * is consistent with our original implementation.
1893 */
1894 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1895 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1896 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
1897 td->td_ucred, NOCRED, &resid, td);
1898 VOP_UNLOCK(vp, 0, td);
1899 if (error)
1900 VM_OBJECT_LOCK(obj);
1901 vm_page_lock_queues();
1902 vm_page_io_finish(pg);
1903 mbstat.sf_iocnt++;
1904 }
1905
1906 if (error) {
1907 vm_page_unwire(pg, 0);
1908 /*
1909 * See if anyone else might know about this page.
1910 * If not and it is not valid, then free it.
1911 */
1912 if (pg->wire_count == 0 && pg->valid == 0 &&
1913 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1914 pg->hold_count == 0) {
1915 vm_page_busy(pg);
1916 vm_page_free(pg);
1917 }
1918 vm_page_unlock_queues();
1919 VM_OBJECT_UNLOCK(obj);
1920 sbunlock(&so->so_snd);
1921 goto done;
1922 }
1923 vm_page_unlock_queues();
1924
1925 /*
1926 * Get a sendfile buf. We usually wait as long as necessary,
1927 * but this wait can be interrupted.
1928 */
1929 if ((sf = sf_buf_alloc(pg, PCATCH)) == NULL) {
1930 mbstat.sf_allocfail++;
1931 vm_page_lock_queues();
1932 vm_page_unwire(pg, 0);
1933 if (pg->wire_count == 0 && pg->object == NULL)
1934 vm_page_free(pg);
1935 vm_page_unlock_queues();
1936 sbunlock(&so->so_snd);
1937 error = EINTR;
1938 goto done;
1939 }
1940
1941 /*
1942 * Get an mbuf header and set it up as having external storage.
1943 */
1944 if (m_header)
1945 MGET(m, M_TRYWAIT, MT_DATA);
1946 else
1947 MGETHDR(m, M_TRYWAIT, MT_DATA);
1948 if (m == NULL) {
1949 error = ENOBUFS;
1950 sf_buf_mext((void *)sf_buf_kva(sf), sf);
1951 sbunlock(&so->so_snd);
1952 goto done;
1953 }
1954 /*
1955 * Setup external storage for mbuf.
1956 */
1957 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
1958 EXT_SFBUF);
1959 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
1960 m->m_pkthdr.len = m->m_len = xfsize;
1961
1962 if (m_header) {
1963 m_cat(m_header, m);
1964 m = m_header;
1965 m_header = NULL;
1966 m_fixhdr(m);
1967 }
1968
1969 /*
1970 * Add the buffer to the socket buffer chain.
1971 */
1972 s = splnet();
1973retry_space:
1974 /*
1975 * Make sure that the socket is still able to take more data.
1976 * CANTSENDMORE being true usually means that the connection
1977 * was closed. so_error is true when an error was sensed after
1978 * a previous send.
1979 * The state is checked after the page mapping and buffer
1980 * allocation above since those operations may block and make
1981 * any socket checks stale. From this point forward, nothing
1982 * blocks before the pru_send (or more accurately, any blocking
1983 * results in a loop back to here to re-check).
1984 */
1985 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1986 if (so->so_state & SS_CANTSENDMORE) {
1987 error = EPIPE;
1988 } else {
1989 error = so->so_error;
1990 so->so_error = 0;
1991 }
1992 m_freem(m);
1993 sbunlock(&so->so_snd);
1994 splx(s);
1995 goto done;
1996 }
1997 /*
1998 * Wait for socket space to become available. We do this just
1999 * after checking the connection state above in order to avoid
2000 * a race condition with sbwait().
2001 */
2002 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2003 if (so->so_state & SS_NBIO) {
2004 m_freem(m);
2005 sbunlock(&so->so_snd);
2006 splx(s);
2007 error = EAGAIN;
2008 goto done;
2009 }
2010 error = sbwait(&so->so_snd);
2011 /*
2012 * An error from sbwait usually indicates that we've
2013 * been interrupted by a signal. If we've sent anything
2014 * then return bytes sent, otherwise return the error.
2015 */
2016 if (error) {
2017 m_freem(m);
2018 sbunlock(&so->so_snd);
2019 splx(s);
2020 goto done;
2021 }
2022 goto retry_space;
2023 }
2024 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2025 splx(s);
2026 if (error) {
2027 sbunlock(&so->so_snd);
2028 goto done;
2029 }
2030 headersent = 1;
2031 }
2032 sbunlock(&so->so_snd);
2033
2034 /*
2035 * Send trailers. Wimp out and use writev(2).
2036 */
2037 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2038 nuap.fd = uap->s;
2039 nuap.iovp = hdtr.trailers;
2040 nuap.iovcnt = hdtr.trl_cnt;
2041 error = writev(td, &nuap);
2042 if (error)
2043 goto done;
2044 if (compat)
2045 sbytes += td->td_retval[0];
2046 else
2047 hdtr_size += td->td_retval[0];
2048 }
2049
2050done:
2051 if (headersent) {
2052 if (!compat)
2053 hdtr_size += headersize;
2054 } else {
2055 if (compat)
2056 sbytes -= headersize;
2057 }
2058 /*
2059 * If there was no error we have to clear td->td_retval[0]
2060 * because it may have been set by writev.
2061 */
2062 if (error == 0) {
2063 td->td_retval[0] = 0;
2064 }
2065 if (uap->sbytes != NULL) {
2066 if (!compat)
2067 sbytes += hdtr_size;
2068 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2069 }
2070 if (vp)
2071 vrele(vp);
2072 if (so)
2073 fputsock(so);
2074 if (hdr_iov)
2075 FREE(hdr_iov, M_IOV);
2076 if (m_header)
2077 m_freem(m_header);
2078
2079 mtx_unlock(&Giant);
2080
2081 if (error == ERESTART)
2082 error = EINTR;
2083
2084 return (error);
2085}
107#define COMPAT_OLDSOCK
108#endif
109
110/*
111 * MPSAFE
112 */
113int
114socket(td, uap)
115 struct thread *td;
116 register struct socket_args /* {
117 int domain;
118 int type;
119 int protocol;
120 } */ *uap;
121{
122 struct filedesc *fdp;
123 struct socket *so;
124 struct file *fp;
125 int fd, error;
126
127 fdp = td->td_proc->p_fd;
128 error = falloc(td, &fp, &fd);
129 if (error)
130 return (error);
131 /* An extra reference on `fp' has been held for us by falloc(). */
132 NET_LOCK_GIANT();
133 error = socreate(uap->domain, &so, uap->type, uap->protocol,
134 td->td_ucred, td);
135 NET_UNLOCK_GIANT();
136 FILEDESC_LOCK(fdp);
137 if (error) {
138 if (fdp->fd_ofiles[fd] == fp) {
139 fdp->fd_ofiles[fd] = NULL;
140 fdunused(fdp, fd);
141 FILEDESC_UNLOCK(fdp);
142 fdrop(fp, td);
143 } else {
144 FILEDESC_UNLOCK(fdp);
145 }
146 } else {
147 fp->f_data = so; /* already has ref count */
148 fp->f_flag = FREAD|FWRITE;
149 fp->f_ops = &socketops;
150 fp->f_type = DTYPE_SOCKET;
151 FILEDESC_UNLOCK(fdp);
152 td->td_retval[0] = fd;
153 }
154 fdrop(fp, td);
155 return (error);
156}
157
158/*
159 * MPSAFE
160 */
161/* ARGSUSED */
162int
163bind(td, uap)
164 struct thread *td;
165 register struct bind_args /* {
166 int s;
167 caddr_t name;
168 int namelen;
169 } */ *uap;
170{
171 struct sockaddr *sa;
172 int error;
173
174 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
175 return (error);
176
177 return (kern_bind(td, uap->s, sa));
178}
179
180int
181kern_bind(td, fd, sa)
182 struct thread *td;
183 int fd;
184 struct sockaddr *sa;
185{
186 struct socket *so;
187 int error;
188
189 NET_LOCK_GIANT();
190 if ((error = fgetsock(td, fd, &so, NULL)) != 0)
191 goto done2;
192#ifdef MAC
193 error = mac_check_socket_bind(td->td_ucred, so, sa);
194 if (error)
195 goto done1;
196#endif
197 error = sobind(so, sa, td);
198#ifdef MAC
199done1:
200#endif
201 fputsock(so);
202done2:
203 NET_UNLOCK_GIANT();
204 FREE(sa, M_SONAME);
205 return (error);
206}
207
208/*
209 * MPSAFE
210 */
211/* ARGSUSED */
212int
213listen(td, uap)
214 struct thread *td;
215 register struct listen_args /* {
216 int s;
217 int backlog;
218 } */ *uap;
219{
220 struct socket *so;
221 int error;
222
223 NET_LOCK_GIANT();
224 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
225#ifdef MAC
226 error = mac_check_socket_listen(td->td_ucred, so);
227 if (error)
228 goto done;
229#endif
230 error = solisten(so, uap->backlog, td);
231#ifdef MAC
232done:
233#endif
234 fputsock(so);
235 }
236 NET_UNLOCK_GIANT();
237 return(error);
238}
239
240/*
241 * accept1()
242 * MPSAFE
243 */
244static int
245accept1(td, uap, compat)
246 struct thread *td;
247 register struct accept_args /* {
248 int s;
249 struct sockaddr * __restrict name;
250 socklen_t * __restrict anamelen;
251 } */ *uap;
252 int compat;
253{
254 struct filedesc *fdp;
255 struct file *nfp = NULL;
256 struct sockaddr *sa = NULL;
257 socklen_t namelen;
258 int error;
259 struct socket *head, *so;
260 int fd;
261 u_int fflag;
262 pid_t pgid;
263 int tmp;
264
265 fdp = td->td_proc->p_fd;
266 if (uap->name) {
267 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
268 if(error)
269 return (error);
270 if (namelen < 0)
271 return (EINVAL);
272 }
273 NET_LOCK_GIANT();
274 error = fgetsock(td, uap->s, &head, &fflag);
275 if (error)
276 goto done2;
277 if ((head->so_options & SO_ACCEPTCONN) == 0) {
278 error = EINVAL;
279 goto done;
280 }
281 error = falloc(td, &nfp, &fd);
282 if (error)
283 goto done;
284 ACCEPT_LOCK();
285 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
286 ACCEPT_UNLOCK();
287 error = EWOULDBLOCK;
288 goto noconnection;
289 }
290 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
291 if (head->so_state & SS_CANTRCVMORE) {
292 head->so_error = ECONNABORTED;
293 break;
294 }
295 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
296 "accept", 0);
297 if (error) {
298 ACCEPT_UNLOCK();
299 goto noconnection;
300 }
301 }
302 if (head->so_error) {
303 error = head->so_error;
304 head->so_error = 0;
305 ACCEPT_UNLOCK();
306 goto noconnection;
307 }
308 so = TAILQ_FIRST(&head->so_comp);
309 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
310 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
311
312 soref(so); /* file descriptor reference */
313
314 TAILQ_REMOVE(&head->so_comp, so, so_list);
315 head->so_qlen--;
316 so->so_qstate &= ~SQ_COMP;
317 so->so_head = NULL;
318
319 ACCEPT_UNLOCK();
320
321 /* An extra reference on `nfp' has been held for us by falloc(). */
322 td->td_retval[0] = fd;
323
324 /* connection has been removed from the listen queue */
325 KNOTE(&head->so_rcv.sb_sel.si_note, 0);
326
327 pgid = fgetown(&head->so_sigio);
328 if (pgid != 0)
329 fsetown(pgid, &so->so_sigio);
330
331 FILE_LOCK(nfp);
332 nfp->f_data = so; /* nfp has ref count from falloc */
333 nfp->f_flag = fflag;
334 nfp->f_ops = &socketops;
335 nfp->f_type = DTYPE_SOCKET;
336 FILE_UNLOCK(nfp);
337 /* Sync socket nonblocking/async state with file flags */
338 tmp = fflag & FNONBLOCK;
339 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
340 tmp = fflag & FASYNC;
341 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
342 sa = 0;
343 error = soaccept(so, &sa);
344 if (error) {
345 /*
346 * return a namelen of zero for older code which might
347 * ignore the return value from accept.
348 */
349 if (uap->name != NULL) {
350 namelen = 0;
351 (void) copyout(&namelen,
352 uap->anamelen, sizeof(*uap->anamelen));
353 }
354 goto noconnection;
355 }
356 if (sa == NULL) {
357 namelen = 0;
358 if (uap->name)
359 goto gotnoname;
360 error = 0;
361 goto done;
362 }
363 if (uap->name) {
364 /* check sa_len before it is destroyed */
365 if (namelen > sa->sa_len)
366 namelen = sa->sa_len;
367#ifdef COMPAT_OLDSOCK
368 if (compat)
369 ((struct osockaddr *)sa)->sa_family =
370 sa->sa_family;
371#endif
372 error = copyout(sa, uap->name, (u_int)namelen);
373 if (!error)
374gotnoname:
375 error = copyout(&namelen,
376 uap->anamelen, sizeof (*uap->anamelen));
377 }
378noconnection:
379 if (sa)
380 FREE(sa, M_SONAME);
381
382 /*
383 * close the new descriptor, assuming someone hasn't ripped it
384 * out from under us.
385 */
386 if (error) {
387 FILEDESC_LOCK(fdp);
388 if (fdp->fd_ofiles[fd] == nfp) {
389 fdp->fd_ofiles[fd] = NULL;
390 fdunused(fdp, fd);
391 FILEDESC_UNLOCK(fdp);
392 fdrop(nfp, td);
393 } else {
394 FILEDESC_UNLOCK(fdp);
395 }
396 }
397
398 /*
399 * Release explicitly held references before returning.
400 */
401done:
402 if (nfp != NULL)
403 fdrop(nfp, td);
404 fputsock(head);
405done2:
406 NET_UNLOCK_GIANT();
407 return (error);
408}
409
410/*
411 * MPSAFE (accept1() is MPSAFE)
412 */
413int
414accept(td, uap)
415 struct thread *td;
416 struct accept_args *uap;
417{
418
419 return (accept1(td, uap, 0));
420}
421
422#ifdef COMPAT_OLDSOCK
423/*
424 * MPSAFE (accept1() is MPSAFE)
425 */
426int
427oaccept(td, uap)
428 struct thread *td;
429 struct accept_args *uap;
430{
431
432 return (accept1(td, uap, 1));
433}
434#endif /* COMPAT_OLDSOCK */
435
436/*
437 * MPSAFE
438 */
439/* ARGSUSED */
440int
441connect(td, uap)
442 struct thread *td;
443 register struct connect_args /* {
444 int s;
445 caddr_t name;
446 int namelen;
447 } */ *uap;
448{
449 struct sockaddr *sa;
450 int error;
451
452 error = getsockaddr(&sa, uap->name, uap->namelen);
453 if (error)
454 return (error);
455
456 return (kern_connect(td, uap->s, sa));
457}
458
459
460int
461kern_connect(td, fd, sa)
462 struct thread *td;
463 int fd;
464 struct sockaddr *sa;
465{
466 struct socket *so;
467 int error, s;
468 int interrupted = 0;
469
470 NET_LOCK_GIANT();
471 if ((error = fgetsock(td, fd, &so, NULL)) != 0)
472 goto done2;
473 if (so->so_state & SS_ISCONNECTING) {
474 error = EALREADY;
475 goto done1;
476 }
477#ifdef MAC
478 error = mac_check_socket_connect(td->td_ucred, so, sa);
479 if (error)
480 goto bad;
481#endif
482 error = soconnect(so, sa, td);
483 if (error)
484 goto bad;
485 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
486 error = EINPROGRESS;
487 goto done1;
488 }
489 s = splnet();
490 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
491 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0);
492 if (error) {
493 if (error == EINTR || error == ERESTART)
494 interrupted = 1;
495 break;
496 }
497 }
498 if (error == 0) {
499 error = so->so_error;
500 so->so_error = 0;
501 }
502 splx(s);
503bad:
504 if (!interrupted)
505 so->so_state &= ~SS_ISCONNECTING;
506 if (error == ERESTART)
507 error = EINTR;
508done1:
509 fputsock(so);
510done2:
511 NET_UNLOCK_GIANT();
512 FREE(sa, M_SONAME);
513 return (error);
514}
515
516/*
517 * MPSAFE
518 */
519int
520socketpair(td, uap)
521 struct thread *td;
522 register struct socketpair_args /* {
523 int domain;
524 int type;
525 int protocol;
526 int *rsv;
527 } */ *uap;
528{
529 register struct filedesc *fdp = td->td_proc->p_fd;
530 struct file *fp1, *fp2;
531 struct socket *so1, *so2;
532 int fd, error, sv[2];
533
534 NET_LOCK_GIANT();
535 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
536 td->td_ucred, td);
537 if (error)
538 goto done2;
539 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
540 td->td_ucred, td);
541 if (error)
542 goto free1;
543 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
544 error = falloc(td, &fp1, &fd);
545 if (error)
546 goto free2;
547 sv[0] = fd;
548 fp1->f_data = so1; /* so1 already has ref count */
549 error = falloc(td, &fp2, &fd);
550 if (error)
551 goto free3;
552 fp2->f_data = so2; /* so2 already has ref count */
553 sv[1] = fd;
554 error = soconnect2(so1, so2);
555 if (error)
556 goto free4;
557 if (uap->type == SOCK_DGRAM) {
558 /*
559 * Datagram socket connection is asymmetric.
560 */
561 error = soconnect2(so2, so1);
562 if (error)
563 goto free4;
564 }
565 FILE_LOCK(fp1);
566 fp1->f_flag = FREAD|FWRITE;
567 fp1->f_ops = &socketops;
568 fp1->f_type = DTYPE_SOCKET;
569 FILE_UNLOCK(fp1);
570 FILE_LOCK(fp2);
571 fp2->f_flag = FREAD|FWRITE;
572 fp2->f_ops = &socketops;
573 fp2->f_type = DTYPE_SOCKET;
574 FILE_UNLOCK(fp2);
575 error = copyout(sv, uap->rsv, 2 * sizeof (int));
576 fdrop(fp1, td);
577 fdrop(fp2, td);
578 goto done2;
579free4:
580 FILEDESC_LOCK(fdp);
581 if (fdp->fd_ofiles[sv[1]] == fp2) {
582 fdp->fd_ofiles[sv[1]] = NULL;
583 fdunused(fdp, sv[1]);
584 FILEDESC_UNLOCK(fdp);
585 fdrop(fp2, td);
586 } else {
587 FILEDESC_UNLOCK(fdp);
588 }
589 fdrop(fp2, td);
590free3:
591 FILEDESC_LOCK(fdp);
592 if (fdp->fd_ofiles[sv[0]] == fp1) {
593 fdp->fd_ofiles[sv[0]] = NULL;
594 fdunused(fdp, sv[0]);
595 FILEDESC_UNLOCK(fdp);
596 fdrop(fp1, td);
597 } else {
598 FILEDESC_UNLOCK(fdp);
599 }
600 fdrop(fp1, td);
601free2:
602 (void)soclose(so2);
603free1:
604 (void)soclose(so1);
605done2:
606 NET_UNLOCK_GIANT();
607 return (error);
608}
609
610static int
611sendit(td, s, mp, flags)
612 register struct thread *td;
613 int s;
614 register struct msghdr *mp;
615 int flags;
616{
617 struct mbuf *control;
618 struct sockaddr *to;
619 int error;
620
621 if (mp->msg_name != NULL) {
622 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
623 if (error) {
624 to = NULL;
625 goto bad;
626 }
627 mp->msg_name = to;
628 } else {
629 to = NULL;
630 }
631
632 if (mp->msg_control) {
633 if (mp->msg_controllen < sizeof(struct cmsghdr)
634#ifdef COMPAT_OLDSOCK
635 && mp->msg_flags != MSG_COMPAT
636#endif
637 ) {
638 error = EINVAL;
639 goto bad;
640 }
641 error = sockargs(&control, mp->msg_control,
642 mp->msg_controllen, MT_CONTROL);
643 if (error)
644 goto bad;
645#ifdef COMPAT_OLDSOCK
646 if (mp->msg_flags == MSG_COMPAT) {
647 register struct cmsghdr *cm;
648
649 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
650 if (control == 0) {
651 error = ENOBUFS;
652 goto bad;
653 } else {
654 cm = mtod(control, struct cmsghdr *);
655 cm->cmsg_len = control->m_len;
656 cm->cmsg_level = SOL_SOCKET;
657 cm->cmsg_type = SCM_RIGHTS;
658 }
659 }
660#endif
661 } else {
662 control = NULL;
663 }
664
665 error = kern_sendit(td, s, mp, flags, control);
666
667bad:
668 if (to)
669 FREE(to, M_SONAME);
670 return (error);
671}
672
673int
674kern_sendit(td, s, mp, flags, control)
675 struct thread *td;
676 int s;
677 struct msghdr *mp;
678 int flags;
679 struct mbuf *control;
680{
681 struct uio auio;
682 struct iovec *iov;
683 struct socket *so;
684 int i;
685 int len, error;
686#ifdef KTRACE
687 struct iovec *ktriov = NULL;
688 struct uio ktruio;
689 int iovlen;
690#endif
691
692 NET_LOCK_GIANT();
693 if ((error = fgetsock(td, s, &so, NULL)) != 0)
694 goto bad2;
695
696#ifdef MAC
697 error = mac_check_socket_send(td->td_ucred, so);
698 if (error)
699 goto bad;
700#endif
701
702 auio.uio_iov = mp->msg_iov;
703 auio.uio_iovcnt = mp->msg_iovlen;
704 auio.uio_segflg = UIO_USERSPACE;
705 auio.uio_rw = UIO_WRITE;
706 auio.uio_td = td;
707 auio.uio_offset = 0; /* XXX */
708 auio.uio_resid = 0;
709 iov = mp->msg_iov;
710 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
711 if ((auio.uio_resid += iov->iov_len) < 0) {
712 error = EINVAL;
713 goto bad;
714 }
715 }
716#ifdef KTRACE
717 if (KTRPOINT(td, KTR_GENIO)) {
718 iovlen = auio.uio_iovcnt * sizeof (struct iovec);
719 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
720 bcopy(auio.uio_iov, ktriov, iovlen);
721 ktruio = auio;
722 }
723#endif
724 len = auio.uio_resid;
725 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
726 0, control, flags, td);
727 if (error) {
728 if (auio.uio_resid != len && (error == ERESTART ||
729 error == EINTR || error == EWOULDBLOCK))
730 error = 0;
731 /* Generation of SIGPIPE can be controlled per socket */
732 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
733 PROC_LOCK(td->td_proc);
734 psignal(td->td_proc, SIGPIPE);
735 PROC_UNLOCK(td->td_proc);
736 }
737 }
738 if (error == 0)
739 td->td_retval[0] = len - auio.uio_resid;
740#ifdef KTRACE
741 if (ktriov != NULL) {
742 if (error == 0) {
743 ktruio.uio_iov = ktriov;
744 ktruio.uio_resid = td->td_retval[0];
745 ktrgenio(s, UIO_WRITE, &ktruio, error);
746 }
747 FREE(ktriov, M_TEMP);
748 }
749#endif
750bad:
751 fputsock(so);
752bad2:
753 NET_UNLOCK_GIANT();
754 return (error);
755}
756
757/*
758 * MPSAFE
759 */
760int
761sendto(td, uap)
762 struct thread *td;
763 register struct sendto_args /* {
764 int s;
765 caddr_t buf;
766 size_t len;
767 int flags;
768 caddr_t to;
769 int tolen;
770 } */ *uap;
771{
772 struct msghdr msg;
773 struct iovec aiov;
774 int error;
775
776 msg.msg_name = uap->to;
777 msg.msg_namelen = uap->tolen;
778 msg.msg_iov = &aiov;
779 msg.msg_iovlen = 1;
780 msg.msg_control = 0;
781#ifdef COMPAT_OLDSOCK
782 msg.msg_flags = 0;
783#endif
784 aiov.iov_base = uap->buf;
785 aiov.iov_len = uap->len;
786 error = sendit(td, uap->s, &msg, uap->flags);
787 return (error);
788}
789
790#ifdef COMPAT_OLDSOCK
791/*
792 * MPSAFE
793 */
794int
795osend(td, uap)
796 struct thread *td;
797 register struct osend_args /* {
798 int s;
799 caddr_t buf;
800 int len;
801 int flags;
802 } */ *uap;
803{
804 struct msghdr msg;
805 struct iovec aiov;
806 int error;
807
808 msg.msg_name = 0;
809 msg.msg_namelen = 0;
810 msg.msg_iov = &aiov;
811 msg.msg_iovlen = 1;
812 aiov.iov_base = uap->buf;
813 aiov.iov_len = uap->len;
814 msg.msg_control = 0;
815 msg.msg_flags = 0;
816 error = sendit(td, uap->s, &msg, uap->flags);
817 return (error);
818}
819
820/*
821 * MPSAFE
822 */
823int
824osendmsg(td, uap)
825 struct thread *td;
826 register struct osendmsg_args /* {
827 int s;
828 caddr_t msg;
829 int flags;
830 } */ *uap;
831{
832 struct msghdr msg;
833 struct iovec aiov[UIO_SMALLIOV], *iov;
834 int error;
835
836 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
837 if (error)
838 goto done2;
839 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
840 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
841 error = EMSGSIZE;
842 goto done2;
843 }
844 MALLOC(iov, struct iovec *,
845 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
846 M_WAITOK);
847 } else {
848 iov = aiov;
849 }
850 error = copyin(msg.msg_iov, iov,
851 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
852 if (error)
853 goto done;
854 msg.msg_flags = MSG_COMPAT;
855 msg.msg_iov = iov;
856 error = sendit(td, uap->s, &msg, uap->flags);
857done:
858 if (iov != aiov)
859 FREE(iov, M_IOV);
860done2:
861 return (error);
862}
863#endif
864
865/*
866 * MPSAFE
867 */
868int
869sendmsg(td, uap)
870 struct thread *td;
871 register struct sendmsg_args /* {
872 int s;
873 caddr_t msg;
874 int flags;
875 } */ *uap;
876{
877 struct msghdr msg;
878 struct iovec aiov[UIO_SMALLIOV], *iov;
879 int error;
880
881 error = copyin(uap->msg, &msg, sizeof (msg));
882 if (error)
883 goto done2;
884 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
885 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
886 error = EMSGSIZE;
887 goto done2;
888 }
889 MALLOC(iov, struct iovec *,
890 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
891 M_WAITOK);
892 } else {
893 iov = aiov;
894 }
895 if (msg.msg_iovlen &&
896 (error = copyin(msg.msg_iov, iov,
897 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
898 goto done;
899 msg.msg_iov = iov;
900#ifdef COMPAT_OLDSOCK
901 msg.msg_flags = 0;
902#endif
903 error = sendit(td, uap->s, &msg, uap->flags);
904done:
905 if (iov != aiov)
906 FREE(iov, M_IOV);
907done2:
908 return (error);
909}
910
911static int
912recvit(td, s, mp, namelenp)
913 register struct thread *td;
914 int s;
915 register struct msghdr *mp;
916 void *namelenp;
917{
918 struct uio auio;
919 register struct iovec *iov;
920 register int i;
921 socklen_t len;
922 int error;
923 struct mbuf *m, *control = 0;
924 caddr_t ctlbuf;
925 struct socket *so;
926 struct sockaddr *fromsa = 0;
927#ifdef KTRACE
928 struct iovec *ktriov = NULL;
929 struct uio ktruio;
930 int iovlen;
931#endif
932
933 NET_LOCK_GIANT();
934 if ((error = fgetsock(td, s, &so, NULL)) != 0) {
935 NET_UNLOCK_GIANT();
936 return (error);
937 }
938
939#ifdef MAC
940 error = mac_check_socket_receive(td->td_ucred, so);
941 if (error) {
942 fputsock(so);
943 NET_UNLOCK_GIANT();
944 return (error);
945 }
946#endif
947
948 auio.uio_iov = mp->msg_iov;
949 auio.uio_iovcnt = mp->msg_iovlen;
950 auio.uio_segflg = UIO_USERSPACE;
951 auio.uio_rw = UIO_READ;
952 auio.uio_td = td;
953 auio.uio_offset = 0; /* XXX */
954 auio.uio_resid = 0;
955 iov = mp->msg_iov;
956 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
957 if ((auio.uio_resid += iov->iov_len) < 0) {
958 fputsock(so);
959 NET_UNLOCK_GIANT();
960 return (EINVAL);
961 }
962 }
963#ifdef KTRACE
964 if (KTRPOINT(td, KTR_GENIO)) {
965 iovlen = auio.uio_iovcnt * sizeof (struct iovec);
966 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
967 bcopy(auio.uio_iov, ktriov, iovlen);
968 ktruio = auio;
969 }
970#endif
971 len = auio.uio_resid;
972 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
973 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
974 &mp->msg_flags);
975 if (error) {
976 if (auio.uio_resid != (int)len && (error == ERESTART ||
977 error == EINTR || error == EWOULDBLOCK))
978 error = 0;
979 }
980#ifdef KTRACE
981 if (ktriov != NULL) {
982 if (error == 0) {
983 ktruio.uio_iov = ktriov;
984 ktruio.uio_resid = (int)len - auio.uio_resid;
985 ktrgenio(s, UIO_READ, &ktruio, error);
986 }
987 FREE(ktriov, M_TEMP);
988 }
989#endif
990 if (error)
991 goto out;
992 td->td_retval[0] = (int)len - auio.uio_resid;
993 if (mp->msg_name) {
994 len = mp->msg_namelen;
995 if (len <= 0 || fromsa == 0)
996 len = 0;
997 else {
998 /* save sa_len before it is destroyed by MSG_COMPAT */
999 len = MIN(len, fromsa->sa_len);
1000#ifdef COMPAT_OLDSOCK
1001 if (mp->msg_flags & MSG_COMPAT)
1002 ((struct osockaddr *)fromsa)->sa_family =
1003 fromsa->sa_family;
1004#endif
1005 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1006 if (error)
1007 goto out;
1008 }
1009 mp->msg_namelen = len;
1010 if (namelenp &&
1011 (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
1012#ifdef COMPAT_OLDSOCK
1013 if (mp->msg_flags & MSG_COMPAT)
1014 error = 0; /* old recvfrom didn't check */
1015 else
1016#endif
1017 goto out;
1018 }
1019 }
1020 if (mp->msg_control) {
1021#ifdef COMPAT_OLDSOCK
1022 /*
1023 * We assume that old recvmsg calls won't receive access
1024 * rights and other control info, esp. as control info
1025 * is always optional and those options didn't exist in 4.3.
1026 * If we receive rights, trim the cmsghdr; anything else
1027 * is tossed.
1028 */
1029 if (control && mp->msg_flags & MSG_COMPAT) {
1030 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1031 SOL_SOCKET ||
1032 mtod(control, struct cmsghdr *)->cmsg_type !=
1033 SCM_RIGHTS) {
1034 mp->msg_controllen = 0;
1035 goto out;
1036 }
1037 control->m_len -= sizeof (struct cmsghdr);
1038 control->m_data += sizeof (struct cmsghdr);
1039 }
1040#endif
1041 len = mp->msg_controllen;
1042 m = control;
1043 mp->msg_controllen = 0;
1044 ctlbuf = mp->msg_control;
1045
1046 while (m && len > 0) {
1047 unsigned int tocopy;
1048
1049 if (len >= m->m_len)
1050 tocopy = m->m_len;
1051 else {
1052 mp->msg_flags |= MSG_CTRUNC;
1053 tocopy = len;
1054 }
1055
1056 if ((error = copyout(mtod(m, caddr_t),
1057 ctlbuf, tocopy)) != 0)
1058 goto out;
1059
1060 ctlbuf += tocopy;
1061 len -= tocopy;
1062 m = m->m_next;
1063 }
1064 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1065 }
1066out:
1067 fputsock(so);
1068 NET_UNLOCK_GIANT();
1069 if (fromsa)
1070 FREE(fromsa, M_SONAME);
1071 if (control)
1072 m_freem(control);
1073 return (error);
1074}
1075
1076/*
1077 * MPSAFE
1078 */
1079int
1080recvfrom(td, uap)
1081 struct thread *td;
1082 register struct recvfrom_args /* {
1083 int s;
1084 caddr_t buf;
1085 size_t len;
1086 int flags;
1087 struct sockaddr * __restrict from;
1088 socklen_t * __restrict fromlenaddr;
1089 } */ *uap;
1090{
1091 struct msghdr msg;
1092 struct iovec aiov;
1093 int error;
1094
1095 if (uap->fromlenaddr) {
1096 error = copyin(uap->fromlenaddr,
1097 &msg.msg_namelen, sizeof (msg.msg_namelen));
1098 if (error)
1099 goto done2;
1100 } else {
1101 msg.msg_namelen = 0;
1102 }
1103 msg.msg_name = uap->from;
1104 msg.msg_iov = &aiov;
1105 msg.msg_iovlen = 1;
1106 aiov.iov_base = uap->buf;
1107 aiov.iov_len = uap->len;
1108 msg.msg_control = 0;
1109 msg.msg_flags = uap->flags;
1110 error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1111done2:
1112 return(error);
1113}
1114
1115#ifdef COMPAT_OLDSOCK
1116/*
1117 * MPSAFE
1118 */
1119int
1120orecvfrom(td, uap)
1121 struct thread *td;
1122 struct recvfrom_args *uap;
1123{
1124
1125 uap->flags |= MSG_COMPAT;
1126 return (recvfrom(td, uap));
1127}
1128#endif
1129
1130
1131#ifdef COMPAT_OLDSOCK
1132/*
1133 * MPSAFE
1134 */
1135int
1136orecv(td, uap)
1137 struct thread *td;
1138 register struct orecv_args /* {
1139 int s;
1140 caddr_t buf;
1141 int len;
1142 int flags;
1143 } */ *uap;
1144{
1145 struct msghdr msg;
1146 struct iovec aiov;
1147 int error;
1148
1149 msg.msg_name = 0;
1150 msg.msg_namelen = 0;
1151 msg.msg_iov = &aiov;
1152 msg.msg_iovlen = 1;
1153 aiov.iov_base = uap->buf;
1154 aiov.iov_len = uap->len;
1155 msg.msg_control = 0;
1156 msg.msg_flags = uap->flags;
1157 error = recvit(td, uap->s, &msg, NULL);
1158 return (error);
1159}
1160
1161/*
1162 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1163 * overlays the new one, missing only the flags, and with the (old) access
1164 * rights where the control fields are now.
1165 *
1166 * MPSAFE
1167 */
1168int
1169orecvmsg(td, uap)
1170 struct thread *td;
1171 register struct orecvmsg_args /* {
1172 int s;
1173 struct omsghdr *msg;
1174 int flags;
1175 } */ *uap;
1176{
1177 struct msghdr msg;
1178 struct iovec aiov[UIO_SMALLIOV], *iov;
1179 int error;
1180
1181 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1182 if (error)
1183 return (error);
1184
1185 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1186 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1187 error = EMSGSIZE;
1188 goto done2;
1189 }
1190 MALLOC(iov, struct iovec *,
1191 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1192 M_WAITOK);
1193 } else {
1194 iov = aiov;
1195 }
1196 msg.msg_flags = uap->flags | MSG_COMPAT;
1197 error = copyin(msg.msg_iov, iov,
1198 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1199 if (error)
1200 goto done;
1201 msg.msg_iov = iov;
1202 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1203
1204 if (msg.msg_controllen && error == 0)
1205 error = copyout(&msg.msg_controllen,
1206 &uap->msg->msg_accrightslen, sizeof (int));
1207done:
1208 if (iov != aiov)
1209 FREE(iov, M_IOV);
1210done2:
1211 return (error);
1212}
1213#endif
1214
1215/*
1216 * MPSAFE
1217 */
1218int
1219recvmsg(td, uap)
1220 struct thread *td;
1221 register struct recvmsg_args /* {
1222 int s;
1223 struct msghdr *msg;
1224 int flags;
1225 } */ *uap;
1226{
1227 struct msghdr msg;
1228 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1229 register int error;
1230
1231 error = copyin(uap->msg, &msg, sizeof (msg));
1232 if (error)
1233 goto done2;
1234 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1235 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1236 error = EMSGSIZE;
1237 goto done2;
1238 }
1239 MALLOC(iov, struct iovec *,
1240 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1241 M_WAITOK);
1242 } else {
1243 iov = aiov;
1244 }
1245#ifdef COMPAT_OLDSOCK
1246 msg.msg_flags = uap->flags &~ MSG_COMPAT;
1247#else
1248 msg.msg_flags = uap->flags;
1249#endif
1250 uiov = msg.msg_iov;
1251 msg.msg_iov = iov;
1252 error = copyin(uiov, iov,
1253 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1254 if (error)
1255 goto done;
1256 error = recvit(td, uap->s, &msg, NULL);
1257 if (!error) {
1258 msg.msg_iov = uiov;
1259 error = copyout(&msg, uap->msg, sizeof(msg));
1260 }
1261done:
1262 if (iov != aiov)
1263 FREE(iov, M_IOV);
1264done2:
1265 return (error);
1266}
1267
1268/*
1269 * MPSAFE
1270 */
1271/* ARGSUSED */
1272int
1273shutdown(td, uap)
1274 struct thread *td;
1275 register struct shutdown_args /* {
1276 int s;
1277 int how;
1278 } */ *uap;
1279{
1280 struct socket *so;
1281 int error;
1282
1283 NET_LOCK_GIANT();
1284 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1285 error = soshutdown(so, uap->how);
1286 fputsock(so);
1287 }
1288 NET_UNLOCK_GIANT();
1289 return(error);
1290}
1291
1292/*
1293 * MPSAFE
1294 */
1295/* ARGSUSED */
1296int
1297setsockopt(td, uap)
1298 struct thread *td;
1299 register struct setsockopt_args /* {
1300 int s;
1301 int level;
1302 int name;
1303 caddr_t val;
1304 int valsize;
1305 } */ *uap;
1306{
1307 struct socket *so;
1308 struct sockopt sopt;
1309 int error;
1310
1311 if (uap->val == 0 && uap->valsize != 0)
1312 return (EFAULT);
1313 if (uap->valsize < 0)
1314 return (EINVAL);
1315
1316 NET_LOCK_GIANT();
1317 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1318 sopt.sopt_dir = SOPT_SET;
1319 sopt.sopt_level = uap->level;
1320 sopt.sopt_name = uap->name;
1321 sopt.sopt_val = uap->val;
1322 sopt.sopt_valsize = uap->valsize;
1323 sopt.sopt_td = td;
1324 error = sosetopt(so, &sopt);
1325 fputsock(so);
1326 }
1327 NET_UNLOCK_GIANT();
1328 return(error);
1329}
1330
1331/*
1332 * MPSAFE
1333 */
1334/* ARGSUSED */
1335int
1336getsockopt(td, uap)
1337 struct thread *td;
1338 register struct getsockopt_args /* {
1339 int s;
1340 int level;
1341 int name;
1342 void * __restrict val;
1343 socklen_t * __restrict avalsize;
1344 } */ *uap;
1345{
1346 socklen_t valsize;
1347 int error;
1348 struct socket *so;
1349 struct sockopt sopt;
1350
1351 NET_LOCK_GIANT();
1352 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1353 goto done2;
1354 if (uap->val) {
1355 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1356 if (error)
1357 goto done1;
1358 if (valsize < 0) {
1359 error = EINVAL;
1360 goto done1;
1361 }
1362 } else {
1363 valsize = 0;
1364 }
1365
1366 sopt.sopt_dir = SOPT_GET;
1367 sopt.sopt_level = uap->level;
1368 sopt.sopt_name = uap->name;
1369 sopt.sopt_val = uap->val;
1370 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1371 sopt.sopt_td = td;
1372
1373 error = sogetopt(so, &sopt);
1374 if (error == 0) {
1375 valsize = sopt.sopt_valsize;
1376 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1377 }
1378done1:
1379 fputsock(so);
1380done2:
1381 NET_UNLOCK_GIANT();
1382 return (error);
1383}
1384
1385/*
1386 * getsockname1() - Get socket name.
1387 *
1388 * MPSAFE
1389 */
1390/* ARGSUSED */
1391static int
1392getsockname1(td, uap, compat)
1393 struct thread *td;
1394 register struct getsockname_args /* {
1395 int fdes;
1396 struct sockaddr * __restrict asa;
1397 socklen_t * __restrict alen;
1398 } */ *uap;
1399 int compat;
1400{
1401 struct socket *so;
1402 struct sockaddr *sa;
1403 socklen_t len;
1404 int error;
1405
1406 NET_LOCK_GIANT();
1407 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1408 goto done2;
1409 error = copyin(uap->alen, &len, sizeof (len));
1410 if (error)
1411 goto done1;
1412 if (len < 0) {
1413 error = EINVAL;
1414 goto done1;
1415 }
1416 sa = 0;
1417 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1418 if (error)
1419 goto bad;
1420 if (sa == 0) {
1421 len = 0;
1422 goto gotnothing;
1423 }
1424
1425 len = MIN(len, sa->sa_len);
1426#ifdef COMPAT_OLDSOCK
1427 if (compat)
1428 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1429#endif
1430 error = copyout(sa, uap->asa, (u_int)len);
1431 if (error == 0)
1432gotnothing:
1433 error = copyout(&len, uap->alen, sizeof (len));
1434bad:
1435 if (sa)
1436 FREE(sa, M_SONAME);
1437done1:
1438 fputsock(so);
1439done2:
1440 NET_UNLOCK_GIANT();
1441 return (error);
1442}
1443
1444/*
1445 * MPSAFE
1446 */
1447int
1448getsockname(td, uap)
1449 struct thread *td;
1450 struct getsockname_args *uap;
1451{
1452
1453 return (getsockname1(td, uap, 0));
1454}
1455
1456#ifdef COMPAT_OLDSOCK
1457/*
1458 * MPSAFE
1459 */
1460int
1461ogetsockname(td, uap)
1462 struct thread *td;
1463 struct getsockname_args *uap;
1464{
1465
1466 return (getsockname1(td, uap, 1));
1467}
1468#endif /* COMPAT_OLDSOCK */
1469
1470/*
1471 * getpeername1() - Get name of peer for connected socket.
1472 *
1473 * MPSAFE
1474 */
1475/* ARGSUSED */
1476static int
1477getpeername1(td, uap, compat)
1478 struct thread *td;
1479 register struct getpeername_args /* {
1480 int fdes;
1481 struct sockaddr * __restrict asa;
1482 socklen_t * __restrict alen;
1483 } */ *uap;
1484 int compat;
1485{
1486 struct socket *so;
1487 struct sockaddr *sa;
1488 socklen_t len;
1489 int error;
1490
1491 NET_LOCK_GIANT();
1492 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1493 goto done2;
1494 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1495 error = ENOTCONN;
1496 goto done1;
1497 }
1498 error = copyin(uap->alen, &len, sizeof (len));
1499 if (error)
1500 goto done1;
1501 if (len < 0) {
1502 error = EINVAL;
1503 goto done1;
1504 }
1505 sa = 0;
1506 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1507 if (error)
1508 goto bad;
1509 if (sa == 0) {
1510 len = 0;
1511 goto gotnothing;
1512 }
1513 len = MIN(len, sa->sa_len);
1514#ifdef COMPAT_OLDSOCK
1515 if (compat)
1516 ((struct osockaddr *)sa)->sa_family =
1517 sa->sa_family;
1518#endif
1519 error = copyout(sa, uap->asa, (u_int)len);
1520 if (error)
1521 goto bad;
1522gotnothing:
1523 error = copyout(&len, uap->alen, sizeof (len));
1524bad:
1525 if (sa)
1526 FREE(sa, M_SONAME);
1527done1:
1528 fputsock(so);
1529done2:
1530 NET_UNLOCK_GIANT();
1531 return (error);
1532}
1533
1534/*
1535 * MPSAFE
1536 */
1537int
1538getpeername(td, uap)
1539 struct thread *td;
1540 struct getpeername_args *uap;
1541{
1542
1543 return (getpeername1(td, uap, 0));
1544}
1545
1546#ifdef COMPAT_OLDSOCK
1547/*
1548 * MPSAFE
1549 */
1550int
1551ogetpeername(td, uap)
1552 struct thread *td;
1553 struct ogetpeername_args *uap;
1554{
1555
1556 /* XXX uap should have type `getpeername_args *' to begin with. */
1557 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1558}
1559#endif /* COMPAT_OLDSOCK */
1560
1561int
1562sockargs(mp, buf, buflen, type)
1563 struct mbuf **mp;
1564 caddr_t buf;
1565 int buflen, type;
1566{
1567 register struct sockaddr *sa;
1568 register struct mbuf *m;
1569 int error;
1570
1571 if ((u_int)buflen > MLEN) {
1572#ifdef COMPAT_OLDSOCK
1573 if (type == MT_SONAME && (u_int)buflen <= 112)
1574 buflen = MLEN; /* unix domain compat. hack */
1575 else
1576#endif
1577 if ((u_int)buflen > MCLBYTES)
1578 return (EINVAL);
1579 }
1580 m = m_get(M_TRYWAIT, type);
1581 if (m == NULL)
1582 return (ENOBUFS);
1583 if ((u_int)buflen > MLEN) {
1584 MCLGET(m, M_TRYWAIT);
1585 if ((m->m_flags & M_EXT) == 0) {
1586 m_free(m);
1587 return (ENOBUFS);
1588 }
1589 }
1590 m->m_len = buflen;
1591 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1592 if (error)
1593 (void) m_free(m);
1594 else {
1595 *mp = m;
1596 if (type == MT_SONAME) {
1597 sa = mtod(m, struct sockaddr *);
1598
1599#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1600 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1601 sa->sa_family = sa->sa_len;
1602#endif
1603 sa->sa_len = buflen;
1604 }
1605 }
1606 return (error);
1607}
1608
1609int
1610getsockaddr(namp, uaddr, len)
1611 struct sockaddr **namp;
1612 caddr_t uaddr;
1613 size_t len;
1614{
1615 struct sockaddr *sa;
1616 int error;
1617
1618 if (len > SOCK_MAXADDRLEN)
1619 return (ENAMETOOLONG);
1620 if (len < offsetof(struct sockaddr, sa_data[0]))
1621 return (EINVAL);
1622 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1623 error = copyin(uaddr, sa, len);
1624 if (error) {
1625 FREE(sa, M_SONAME);
1626 } else {
1627#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1628 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1629 sa->sa_family = sa->sa_len;
1630#endif
1631 sa->sa_len = len;
1632 *namp = sa;
1633 }
1634 return (error);
1635}
1636
1637/*
1638 * Detach mapped page and release resources back to the system.
1639 */
1640void
1641sf_buf_mext(void *addr, void *args)
1642{
1643 vm_page_t m;
1644
1645 m = sf_buf_page(args);
1646 sf_buf_free(args);
1647 vm_page_lock_queues();
1648 vm_page_unwire(m, 0);
1649 /*
1650 * Check for the object going away on us. This can
1651 * happen since we don't hold a reference to it.
1652 * If so, we're responsible for freeing the page.
1653 */
1654 if (m->wire_count == 0 && m->object == NULL)
1655 vm_page_free(m);
1656 vm_page_unlock_queues();
1657}
1658
1659/*
1660 * sendfile(2)
1661 *
1662 * MPSAFE
1663 *
1664 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1665 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1666 *
1667 * Send a file specified by 'fd' and starting at 'offset' to a socket
1668 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1669 * nbytes == 0. Optionally add a header and/or trailer to the socket
1670 * output. If specified, write the total number of bytes sent into *sbytes.
1671 *
1672 */
1673int
1674sendfile(struct thread *td, struct sendfile_args *uap)
1675{
1676
1677 return (do_sendfile(td, uap, 0));
1678}
1679
1680#ifdef COMPAT_FREEBSD4
1681int
1682freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1683{
1684 struct sendfile_args args;
1685
1686 args.fd = uap->fd;
1687 args.s = uap->s;
1688 args.offset = uap->offset;
1689 args.nbytes = uap->nbytes;
1690 args.hdtr = uap->hdtr;
1691 args.sbytes = uap->sbytes;
1692 args.flags = uap->flags;
1693
1694 return (do_sendfile(td, &args, 1));
1695}
1696#endif /* COMPAT_FREEBSD4 */
1697
1698static int
1699do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1700{
1701 struct vnode *vp;
1702 struct vm_object *obj;
1703 struct socket *so = NULL;
1704 struct mbuf *m, *m_header = NULL;
1705 struct sf_buf *sf;
1706 struct vm_page *pg;
1707 struct writev_args nuap;
1708 struct sf_hdtr hdtr;
1709 struct uio hdr_uio;
1710 off_t off, xfsize, hdtr_size, sbytes = 0;
1711 int error, s, headersize = 0, headersent = 0;
1712 struct iovec *hdr_iov = NULL;
1713
1714 mtx_lock(&Giant);
1715
1716 hdtr_size = 0;
1717
1718 /*
1719 * The descriptor must be a regular file and have a backing VM object.
1720 */
1721 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1722 goto done;
1723 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1724 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1725 error = EINVAL;
1726 VOP_UNLOCK(vp, 0, td);
1727 goto done;
1728 }
1729 VOP_UNLOCK(vp, 0, td);
1730 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1731 goto done;
1732 if (so->so_type != SOCK_STREAM) {
1733 error = EINVAL;
1734 goto done;
1735 }
1736 if ((so->so_state & SS_ISCONNECTED) == 0) {
1737 error = ENOTCONN;
1738 goto done;
1739 }
1740 if (uap->offset < 0) {
1741 error = EINVAL;
1742 goto done;
1743 }
1744
1745#ifdef MAC
1746 error = mac_check_socket_send(td->td_ucred, so);
1747 if (error)
1748 goto done;
1749#endif
1750
1751 /*
1752 * If specified, get the pointer to the sf_hdtr struct for
1753 * any headers/trailers.
1754 */
1755 if (uap->hdtr != NULL) {
1756 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1757 if (error)
1758 goto done;
1759 /*
1760 * Send any headers.
1761 */
1762 if (hdtr.headers != NULL) {
1763 hdr_uio.uio_td = td;
1764 hdr_uio.uio_rw = UIO_WRITE;
1765 error = uiofromiov(hdtr.headers, hdtr.hdr_cnt,
1766 &hdr_uio);
1767 if (error)
1768 goto done;
1769 /* Cache hdr_iov, m_uiotombuf may change it. */
1770 hdr_iov = hdr_uio.uio_iov;
1771 if (hdr_uio.uio_resid > 0) {
1772 m_header = m_uiotombuf(&hdr_uio, M_DONTWAIT, 0);
1773 if (m_header == NULL)
1774 goto done;
1775 headersize = m_header->m_pkthdr.len;
1776 if (compat)
1777 sbytes += headersize;
1778 }
1779 }
1780 }
1781
1782 /*
1783 * Protect against multiple writers to the socket.
1784 */
1785 (void) sblock(&so->so_snd, M_WAITOK);
1786
1787 /*
1788 * Loop through the pages in the file, starting with the requested
1789 * offset. Get a file page (do I/O if necessary), map the file page
1790 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1791 * it on the socket.
1792 */
1793 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1794 vm_pindex_t pindex;
1795 vm_offset_t pgoff;
1796
1797 pindex = OFF_TO_IDX(off);
1798 VM_OBJECT_LOCK(obj);
1799retry_lookup:
1800 /*
1801 * Calculate the amount to transfer. Not to exceed a page,
1802 * the EOF, or the passed in nbytes.
1803 */
1804 xfsize = obj->un_pager.vnp.vnp_size - off;
1805 VM_OBJECT_UNLOCK(obj);
1806 if (xfsize > PAGE_SIZE)
1807 xfsize = PAGE_SIZE;
1808 pgoff = (vm_offset_t)(off & PAGE_MASK);
1809 if (PAGE_SIZE - pgoff < xfsize)
1810 xfsize = PAGE_SIZE - pgoff;
1811 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1812 xfsize = uap->nbytes - sbytes;
1813 if (xfsize <= 0) {
1814 if (m_header != NULL) {
1815 m = m_header;
1816 m_header = NULL;
1817 goto retry_space;
1818 } else
1819 break;
1820 }
1821 /*
1822 * Optimize the non-blocking case by looking at the socket space
1823 * before going to the extra work of constituting the sf_buf.
1824 */
1825 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1826 if (so->so_state & SS_CANTSENDMORE)
1827 error = EPIPE;
1828 else
1829 error = EAGAIN;
1830 sbunlock(&so->so_snd);
1831 goto done;
1832 }
1833 VM_OBJECT_LOCK(obj);
1834 /*
1835 * Attempt to look up the page.
1836 *
1837 * Allocate if not found
1838 *
1839 * Wait and loop if busy.
1840 */
1841 pg = vm_page_lookup(obj, pindex);
1842
1843 if (pg == NULL) {
1844 pg = vm_page_alloc(obj, pindex,
1845 VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1846 if (pg == NULL) {
1847 VM_OBJECT_UNLOCK(obj);
1848 VM_WAIT;
1849 VM_OBJECT_LOCK(obj);
1850 goto retry_lookup;
1851 }
1852 vm_page_lock_queues();
1853 vm_page_wakeup(pg);
1854 } else {
1855 vm_page_lock_queues();
1856 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1857 goto retry_lookup;
1858 /*
1859 * Wire the page so it does not get ripped out from
1860 * under us.
1861 */
1862 vm_page_wire(pg);
1863 }
1864
1865 /*
1866 * If page is not valid for what we need, initiate I/O
1867 */
1868
1869 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
1870 VM_OBJECT_UNLOCK(obj);
1871 } else if (uap->flags & SF_NODISKIO) {
1872 error = EBUSY;
1873 } else {
1874 int bsize, resid;
1875
1876 /*
1877 * Ensure that our page is still around when the I/O
1878 * completes.
1879 */
1880 vm_page_io_start(pg);
1881 vm_page_unlock_queues();
1882 VM_OBJECT_UNLOCK(obj);
1883
1884 /*
1885 * Get the page from backing store.
1886 */
1887 bsize = vp->v_mount->mnt_stat.f_iosize;
1888 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1889 /*
1890 * XXXMAC: Because we don't have fp->f_cred here,
1891 * we pass in NOCRED. This is probably wrong, but
1892 * is consistent with our original implementation.
1893 */
1894 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1895 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1896 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
1897 td->td_ucred, NOCRED, &resid, td);
1898 VOP_UNLOCK(vp, 0, td);
1899 if (error)
1900 VM_OBJECT_LOCK(obj);
1901 vm_page_lock_queues();
1902 vm_page_io_finish(pg);
1903 mbstat.sf_iocnt++;
1904 }
1905
1906 if (error) {
1907 vm_page_unwire(pg, 0);
1908 /*
1909 * See if anyone else might know about this page.
1910 * If not and it is not valid, then free it.
1911 */
1912 if (pg->wire_count == 0 && pg->valid == 0 &&
1913 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1914 pg->hold_count == 0) {
1915 vm_page_busy(pg);
1916 vm_page_free(pg);
1917 }
1918 vm_page_unlock_queues();
1919 VM_OBJECT_UNLOCK(obj);
1920 sbunlock(&so->so_snd);
1921 goto done;
1922 }
1923 vm_page_unlock_queues();
1924
1925 /*
1926 * Get a sendfile buf. We usually wait as long as necessary,
1927 * but this wait can be interrupted.
1928 */
1929 if ((sf = sf_buf_alloc(pg, PCATCH)) == NULL) {
1930 mbstat.sf_allocfail++;
1931 vm_page_lock_queues();
1932 vm_page_unwire(pg, 0);
1933 if (pg->wire_count == 0 && pg->object == NULL)
1934 vm_page_free(pg);
1935 vm_page_unlock_queues();
1936 sbunlock(&so->so_snd);
1937 error = EINTR;
1938 goto done;
1939 }
1940
1941 /*
1942 * Get an mbuf header and set it up as having external storage.
1943 */
1944 if (m_header)
1945 MGET(m, M_TRYWAIT, MT_DATA);
1946 else
1947 MGETHDR(m, M_TRYWAIT, MT_DATA);
1948 if (m == NULL) {
1949 error = ENOBUFS;
1950 sf_buf_mext((void *)sf_buf_kva(sf), sf);
1951 sbunlock(&so->so_snd);
1952 goto done;
1953 }
1954 /*
1955 * Setup external storage for mbuf.
1956 */
1957 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
1958 EXT_SFBUF);
1959 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
1960 m->m_pkthdr.len = m->m_len = xfsize;
1961
1962 if (m_header) {
1963 m_cat(m_header, m);
1964 m = m_header;
1965 m_header = NULL;
1966 m_fixhdr(m);
1967 }
1968
1969 /*
1970 * Add the buffer to the socket buffer chain.
1971 */
1972 s = splnet();
1973retry_space:
1974 /*
1975 * Make sure that the socket is still able to take more data.
1976 * CANTSENDMORE being true usually means that the connection
1977 * was closed. so_error is true when an error was sensed after
1978 * a previous send.
1979 * The state is checked after the page mapping and buffer
1980 * allocation above since those operations may block and make
1981 * any socket checks stale. From this point forward, nothing
1982 * blocks before the pru_send (or more accurately, any blocking
1983 * results in a loop back to here to re-check).
1984 */
1985 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1986 if (so->so_state & SS_CANTSENDMORE) {
1987 error = EPIPE;
1988 } else {
1989 error = so->so_error;
1990 so->so_error = 0;
1991 }
1992 m_freem(m);
1993 sbunlock(&so->so_snd);
1994 splx(s);
1995 goto done;
1996 }
1997 /*
1998 * Wait for socket space to become available. We do this just
1999 * after checking the connection state above in order to avoid
2000 * a race condition with sbwait().
2001 */
2002 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2003 if (so->so_state & SS_NBIO) {
2004 m_freem(m);
2005 sbunlock(&so->so_snd);
2006 splx(s);
2007 error = EAGAIN;
2008 goto done;
2009 }
2010 error = sbwait(&so->so_snd);
2011 /*
2012 * An error from sbwait usually indicates that we've
2013 * been interrupted by a signal. If we've sent anything
2014 * then return bytes sent, otherwise return the error.
2015 */
2016 if (error) {
2017 m_freem(m);
2018 sbunlock(&so->so_snd);
2019 splx(s);
2020 goto done;
2021 }
2022 goto retry_space;
2023 }
2024 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2025 splx(s);
2026 if (error) {
2027 sbunlock(&so->so_snd);
2028 goto done;
2029 }
2030 headersent = 1;
2031 }
2032 sbunlock(&so->so_snd);
2033
2034 /*
2035 * Send trailers. Wimp out and use writev(2).
2036 */
2037 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2038 nuap.fd = uap->s;
2039 nuap.iovp = hdtr.trailers;
2040 nuap.iovcnt = hdtr.trl_cnt;
2041 error = writev(td, &nuap);
2042 if (error)
2043 goto done;
2044 if (compat)
2045 sbytes += td->td_retval[0];
2046 else
2047 hdtr_size += td->td_retval[0];
2048 }
2049
2050done:
2051 if (headersent) {
2052 if (!compat)
2053 hdtr_size += headersize;
2054 } else {
2055 if (compat)
2056 sbytes -= headersize;
2057 }
2058 /*
2059 * If there was no error we have to clear td->td_retval[0]
2060 * because it may have been set by writev.
2061 */
2062 if (error == 0) {
2063 td->td_retval[0] = 0;
2064 }
2065 if (uap->sbytes != NULL) {
2066 if (!compat)
2067 sbytes += hdtr_size;
2068 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2069 }
2070 if (vp)
2071 vrele(vp);
2072 if (so)
2073 fputsock(so);
2074 if (hdr_iov)
2075 FREE(hdr_iov, M_IOV);
2076 if (m_header)
2077 m_freem(m_header);
2078
2079 mtx_unlock(&Giant);
2080
2081 if (error == ERESTART)
2082 error = EINTR;
2083
2084 return (error);
2085}