Deleted Added
full compact
sctp_syscalls.c (86487) sctp_syscalls.c (88739)
1/*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
1/*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
37 * $FreeBSD: head/sys/kern/uipc_syscalls.c 86487 2001-11-17 03:07:11Z dillon $
37 * $FreeBSD: head/sys/kern/uipc_syscalls.c 88739 2001-12-31 17:45:16Z rwatson $
38 */
39
40#include "opt_compat.h"
41#include "opt_ktrace.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/sysproto.h>
49#include <sys/malloc.h>
50#include <sys/filedesc.h>
51#include <sys/event.h>
52#include <sys/proc.h>
53#include <sys/fcntl.h>
54#include <sys/file.h>
55#include <sys/lock.h>
56#include <sys/mount.h>
57#include <sys/mbuf.h>
58#include <sys/protosw.h>
59#include <sys/socket.h>
60#include <sys/socketvar.h>
61#include <sys/signalvar.h>
62#include <sys/uio.h>
63#include <sys/vnode.h>
64#ifdef KTRACE
65#include <sys/ktrace.h>
66#endif
67
68#include <vm/vm.h>
69#include <vm/vm_object.h>
70#include <vm/vm_page.h>
71#include <vm/vm_pageout.h>
72#include <vm/vm_kern.h>
73#include <vm/vm_extern.h>
74
75static void sf_buf_init(void *arg);
76SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
77static struct sf_buf *sf_buf_alloc(void);
78static void sf_buf_free(caddr_t addr, void *args);
79
80static int sendit __P((struct thread *td, int s, struct msghdr *mp, int flags));
81static int recvit __P((struct thread *td, int s, struct msghdr *mp,
82 caddr_t namelenp));
83
84static int accept1 __P((struct thread *td, struct accept_args *uap, int compat));
85static int getsockname1 __P((struct thread *td, struct getsockname_args *uap,
86 int compat));
87static int getpeername1 __P((struct thread *td, struct getpeername_args *uap,
88 int compat));
89
90/*
91 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
92 * sf_freelist head with the sf_lock mutex.
93 */
94static struct {
95 SLIST_HEAD(, sf_buf) sf_head;
96 struct mtx sf_lock;
97} sf_freelist;
98
99static vm_offset_t sf_base;
100static struct sf_buf *sf_bufs;
101static u_int sf_buf_alloc_want;
102
103/*
104 * System call interface to the socket abstraction.
105 */
106#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
107#define COMPAT_OLDSOCK
108#endif
109
110extern struct fileops socketops;
111
112/*
113 * MPSAFE
114 */
115int
116socket(td, uap)
117 struct thread *td;
118 register struct socket_args /* {
119 int domain;
120 int type;
121 int protocol;
122 } */ *uap;
123{
124 struct filedesc *fdp;
125 struct socket *so;
126 struct file *fp;
127 int fd, error;
128
129 mtx_lock(&Giant);
130 fdp = td->td_proc->p_fd;
131 error = falloc(td, &fp, &fd);
132 if (error)
133 goto done2;
134 fhold(fp);
38 */
39
40#include "opt_compat.h"
41#include "opt_ktrace.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/sysproto.h>
49#include <sys/malloc.h>
50#include <sys/filedesc.h>
51#include <sys/event.h>
52#include <sys/proc.h>
53#include <sys/fcntl.h>
54#include <sys/file.h>
55#include <sys/lock.h>
56#include <sys/mount.h>
57#include <sys/mbuf.h>
58#include <sys/protosw.h>
59#include <sys/socket.h>
60#include <sys/socketvar.h>
61#include <sys/signalvar.h>
62#include <sys/uio.h>
63#include <sys/vnode.h>
64#ifdef KTRACE
65#include <sys/ktrace.h>
66#endif
67
68#include <vm/vm.h>
69#include <vm/vm_object.h>
70#include <vm/vm_page.h>
71#include <vm/vm_pageout.h>
72#include <vm/vm_kern.h>
73#include <vm/vm_extern.h>
74
75static void sf_buf_init(void *arg);
76SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
77static struct sf_buf *sf_buf_alloc(void);
78static void sf_buf_free(caddr_t addr, void *args);
79
80static int sendit __P((struct thread *td, int s, struct msghdr *mp, int flags));
81static int recvit __P((struct thread *td, int s, struct msghdr *mp,
82 caddr_t namelenp));
83
84static int accept1 __P((struct thread *td, struct accept_args *uap, int compat));
85static int getsockname1 __P((struct thread *td, struct getsockname_args *uap,
86 int compat));
87static int getpeername1 __P((struct thread *td, struct getpeername_args *uap,
88 int compat));
89
90/*
91 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
92 * sf_freelist head with the sf_lock mutex.
93 */
94static struct {
95 SLIST_HEAD(, sf_buf) sf_head;
96 struct mtx sf_lock;
97} sf_freelist;
98
99static vm_offset_t sf_base;
100static struct sf_buf *sf_bufs;
101static u_int sf_buf_alloc_want;
102
103/*
104 * System call interface to the socket abstraction.
105 */
106#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
107#define COMPAT_OLDSOCK
108#endif
109
110extern struct fileops socketops;
111
112/*
113 * MPSAFE
114 */
115int
116socket(td, uap)
117 struct thread *td;
118 register struct socket_args /* {
119 int domain;
120 int type;
121 int protocol;
122 } */ *uap;
123{
124 struct filedesc *fdp;
125 struct socket *so;
126 struct file *fp;
127 int fd, error;
128
129 mtx_lock(&Giant);
130 fdp = td->td_proc->p_fd;
131 error = falloc(td, &fp, &fd);
132 if (error)
133 goto done2;
134 fhold(fp);
135 error = socreate(uap->domain, &so, uap->type, uap->protocol, td);
135 error = socreate(uap->domain, &so, uap->type, uap->protocol,
136 td->td_proc->p_ucred, td);
136 if (error) {
137 if (fdp->fd_ofiles[fd] == fp) {
138 fdp->fd_ofiles[fd] = NULL;
139 fdrop(fp, td);
140 }
141 } else {
142 fp->f_data = (caddr_t)so; /* already has ref count */
143 fp->f_flag = FREAD|FWRITE;
144 fp->f_ops = &socketops;
145 fp->f_type = DTYPE_SOCKET;
146 td->td_retval[0] = fd;
147 }
148 fdrop(fp, td);
149done2:
150 mtx_unlock(&Giant);
151 return (error);
152}
153
154/*
155 * MPSAFE
156 */
157/* ARGSUSED */
158int
159bind(td, uap)
160 struct thread *td;
161 register struct bind_args /* {
162 int s;
163 caddr_t name;
164 int namelen;
165 } */ *uap;
166{
167 struct sockaddr *sa;
168 struct socket *sp;
169 int error;
170
171 mtx_lock(&Giant);
172 if ((error = fgetsock(td, uap->s, &sp, NULL)) != 0)
173 goto done2;
174 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
175 goto done1;
176 error = sobind(sp, sa, td);
177 FREE(sa, M_SONAME);
178done1:
179 fputsock(sp);
180done2:
181 mtx_unlock(&Giant);
182 return (error);
183}
184
185/*
186 * MPSAFE
187 */
188/* ARGSUSED */
189int
190listen(td, uap)
191 struct thread *td;
192 register struct listen_args /* {
193 int s;
194 int backlog;
195 } */ *uap;
196{
197 struct socket *sp;
198 int error;
199
200 mtx_lock(&Giant);
201 if ((error = fgetsock(td, uap->s, &sp, NULL)) == 0) {
202 error = solisten(sp, uap->backlog, td);
203 fputsock(sp);
204 }
205 mtx_unlock(&Giant);
206 return(error);
207}
208
209/*
210 * accept1()
211 * MPSAFE
212 */
213static int
214accept1(td, uap, compat)
215 struct thread *td;
216 register struct accept_args /* {
217 int s;
218 caddr_t name;
219 int *anamelen;
220 } */ *uap;
221 int compat;
222{
223 struct filedesc *fdp;
224 struct file *nfp = NULL;
225 struct sockaddr *sa;
226 int namelen, error, s;
227 struct socket *head, *so;
228 int fd;
229 u_int fflag;
230
231 mtx_lock(&Giant);
232 fdp = td->td_proc->p_fd;
233 if (uap->name) {
234 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
235 sizeof (namelen));
236 if(error)
237 goto done2;
238 }
239 error = fgetsock(td, uap->s, &head, &fflag);
240 if (error)
241 goto done2;
242 s = splnet();
243 if ((head->so_options & SO_ACCEPTCONN) == 0) {
244 splx(s);
245 error = EINVAL;
246 goto done;
247 }
248 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
249 splx(s);
250 error = EWOULDBLOCK;
251 goto done;
252 }
253 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
254 if (head->so_state & SS_CANTRCVMORE) {
255 head->so_error = ECONNABORTED;
256 break;
257 }
258 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
259 "accept", 0);
260 if (error) {
261 splx(s);
262 goto done;
263 }
264 }
265 if (head->so_error) {
266 error = head->so_error;
267 head->so_error = 0;
268 splx(s);
269 goto done;
270 }
271
272 /*
273 * At this point we know that there is at least one connection
274 * ready to be accepted. Remove it from the queue prior to
275 * allocating the file descriptor for it since falloc() may
276 * block allowing another process to accept the connection
277 * instead.
278 */
279 so = TAILQ_FIRST(&head->so_comp);
280 TAILQ_REMOVE(&head->so_comp, so, so_list);
281 head->so_qlen--;
282
283 error = falloc(td, &nfp, &fd);
284 if (error) {
285 /*
286 * Probably ran out of file descriptors. Put the
287 * unaccepted connection back onto the queue and
288 * do another wakeup so some other process might
289 * have a chance at it.
290 */
291 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
292 head->so_qlen++;
293 wakeup_one(&head->so_timeo);
294 splx(s);
295 goto done;
296 }
297 fhold(nfp);
298 td->td_retval[0] = fd;
299
300 /* connection has been removed from the listen queue */
301 KNOTE(&head->so_rcv.sb_sel.si_note, 0);
302
303 so->so_state &= ~SS_COMP;
304 so->so_head = NULL;
305 if (head->so_sigio != NULL)
306 fsetown(fgetown(head->so_sigio), &so->so_sigio);
307
308 soref(so); /* file descriptor reference */
309 nfp->f_data = (caddr_t)so; /* nfp has ref count from falloc */
310 nfp->f_flag = fflag;
311 nfp->f_ops = &socketops;
312 nfp->f_type = DTYPE_SOCKET;
313 sa = 0;
314 error = soaccept(so, &sa);
315 if (error) {
316 /*
317 * return a namelen of zero for older code which might
318 * ignore the return value from accept.
319 */
320 if (uap->name != NULL) {
321 namelen = 0;
322 (void) copyout((caddr_t)&namelen,
323 (caddr_t)uap->anamelen, sizeof(*uap->anamelen));
324 }
325 goto noconnection;
326 }
327 if (sa == NULL) {
328 namelen = 0;
329 if (uap->name)
330 goto gotnoname;
331 splx(s);
332 error = 0;
333 goto done;
334 }
335 if (uap->name) {
336 /* check sa_len before it is destroyed */
337 if (namelen > sa->sa_len)
338 namelen = sa->sa_len;
339#ifdef COMPAT_OLDSOCK
340 if (compat)
341 ((struct osockaddr *)sa)->sa_family =
342 sa->sa_family;
343#endif
344 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
345 if (!error)
346gotnoname:
347 error = copyout((caddr_t)&namelen,
348 (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
349 }
350noconnection:
351 if (sa)
352 FREE(sa, M_SONAME);
353
354 /*
355 * close the new descriptor, assuming someone hasn't ripped it
356 * out from under us.
357 */
358 if (error) {
359 if (fdp->fd_ofiles[fd] == nfp) {
360 fdp->fd_ofiles[fd] = NULL;
361 fdrop(nfp, td);
362 }
363 }
364 splx(s);
365
366 /*
367 * Release explicitly held references before returning.
368 */
369done:
370 if (nfp != NULL)
371 fdrop(nfp, td);
372 fputsock(head);
373done2:
374 mtx_unlock(&Giant);
375 return (error);
376}
377
378/*
379 * MPSAFE (accept1() is MPSAFE)
380 */
381int
382accept(td, uap)
383 struct thread *td;
384 struct accept_args *uap;
385{
386
387 return (accept1(td, uap, 0));
388}
389
390#ifdef COMPAT_OLDSOCK
391/*
392 * MPSAFE (accept1() is MPSAFE)
393 */
394int
395oaccept(td, uap)
396 struct thread *td;
397 struct accept_args *uap;
398{
399
400 return (accept1(td, uap, 1));
401}
402#endif /* COMPAT_OLDSOCK */
403
404/*
405 * MPSAFE
406 */
407/* ARGSUSED */
408int
409connect(td, uap)
410 struct thread *td;
411 register struct connect_args /* {
412 int s;
413 caddr_t name;
414 int namelen;
415 } */ *uap;
416{
417 struct socket *so;
418 struct sockaddr *sa;
419 int error, s;
420
421 mtx_lock(&Giant);
422 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
423 goto done2;
424 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
425 error = EALREADY;
426 goto done1;
427 }
428 error = getsockaddr(&sa, uap->name, uap->namelen);
429 if (error)
430 goto done1;
431 error = soconnect(so, sa, td);
432 if (error)
433 goto bad;
434 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
435 FREE(sa, M_SONAME);
436 error = EINPROGRESS;
437 goto done1;
438 }
439 s = splnet();
440 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
441 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, "connec", 0);
442 if (error)
443 break;
444 }
445 if (error == 0) {
446 error = so->so_error;
447 so->so_error = 0;
448 }
449 splx(s);
450bad:
451 so->so_state &= ~SS_ISCONNECTING;
452 FREE(sa, M_SONAME);
453 if (error == ERESTART)
454 error = EINTR;
455done1:
456 fputsock(so);
457done2:
458 mtx_unlock(&Giant);
459 return (error);
460}
461
462/*
463 * MPSAFE
464 */
465int
466socketpair(td, uap)
467 struct thread *td;
468 register struct socketpair_args /* {
469 int domain;
470 int type;
471 int protocol;
472 int *rsv;
473 } */ *uap;
474{
475 register struct filedesc *fdp = td->td_proc->p_fd;
476 struct file *fp1, *fp2;
477 struct socket *so1, *so2;
478 int fd, error, sv[2];
479
480 mtx_lock(&Giant);
137 if (error) {
138 if (fdp->fd_ofiles[fd] == fp) {
139 fdp->fd_ofiles[fd] = NULL;
140 fdrop(fp, td);
141 }
142 } else {
143 fp->f_data = (caddr_t)so; /* already has ref count */
144 fp->f_flag = FREAD|FWRITE;
145 fp->f_ops = &socketops;
146 fp->f_type = DTYPE_SOCKET;
147 td->td_retval[0] = fd;
148 }
149 fdrop(fp, td);
150done2:
151 mtx_unlock(&Giant);
152 return (error);
153}
154
155/*
156 * MPSAFE
157 */
158/* ARGSUSED */
159int
160bind(td, uap)
161 struct thread *td;
162 register struct bind_args /* {
163 int s;
164 caddr_t name;
165 int namelen;
166 } */ *uap;
167{
168 struct sockaddr *sa;
169 struct socket *sp;
170 int error;
171
172 mtx_lock(&Giant);
173 if ((error = fgetsock(td, uap->s, &sp, NULL)) != 0)
174 goto done2;
175 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
176 goto done1;
177 error = sobind(sp, sa, td);
178 FREE(sa, M_SONAME);
179done1:
180 fputsock(sp);
181done2:
182 mtx_unlock(&Giant);
183 return (error);
184}
185
186/*
187 * MPSAFE
188 */
189/* ARGSUSED */
190int
191listen(td, uap)
192 struct thread *td;
193 register struct listen_args /* {
194 int s;
195 int backlog;
196 } */ *uap;
197{
198 struct socket *sp;
199 int error;
200
201 mtx_lock(&Giant);
202 if ((error = fgetsock(td, uap->s, &sp, NULL)) == 0) {
203 error = solisten(sp, uap->backlog, td);
204 fputsock(sp);
205 }
206 mtx_unlock(&Giant);
207 return(error);
208}
209
210/*
211 * accept1()
212 * MPSAFE
213 */
214static int
215accept1(td, uap, compat)
216 struct thread *td;
217 register struct accept_args /* {
218 int s;
219 caddr_t name;
220 int *anamelen;
221 } */ *uap;
222 int compat;
223{
224 struct filedesc *fdp;
225 struct file *nfp = NULL;
226 struct sockaddr *sa;
227 int namelen, error, s;
228 struct socket *head, *so;
229 int fd;
230 u_int fflag;
231
232 mtx_lock(&Giant);
233 fdp = td->td_proc->p_fd;
234 if (uap->name) {
235 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
236 sizeof (namelen));
237 if(error)
238 goto done2;
239 }
240 error = fgetsock(td, uap->s, &head, &fflag);
241 if (error)
242 goto done2;
243 s = splnet();
244 if ((head->so_options & SO_ACCEPTCONN) == 0) {
245 splx(s);
246 error = EINVAL;
247 goto done;
248 }
249 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
250 splx(s);
251 error = EWOULDBLOCK;
252 goto done;
253 }
254 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
255 if (head->so_state & SS_CANTRCVMORE) {
256 head->so_error = ECONNABORTED;
257 break;
258 }
259 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
260 "accept", 0);
261 if (error) {
262 splx(s);
263 goto done;
264 }
265 }
266 if (head->so_error) {
267 error = head->so_error;
268 head->so_error = 0;
269 splx(s);
270 goto done;
271 }
272
273 /*
274 * At this point we know that there is at least one connection
275 * ready to be accepted. Remove it from the queue prior to
276 * allocating the file descriptor for it since falloc() may
277 * block allowing another process to accept the connection
278 * instead.
279 */
280 so = TAILQ_FIRST(&head->so_comp);
281 TAILQ_REMOVE(&head->so_comp, so, so_list);
282 head->so_qlen--;
283
284 error = falloc(td, &nfp, &fd);
285 if (error) {
286 /*
287 * Probably ran out of file descriptors. Put the
288 * unaccepted connection back onto the queue and
289 * do another wakeup so some other process might
290 * have a chance at it.
291 */
292 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
293 head->so_qlen++;
294 wakeup_one(&head->so_timeo);
295 splx(s);
296 goto done;
297 }
298 fhold(nfp);
299 td->td_retval[0] = fd;
300
301 /* connection has been removed from the listen queue */
302 KNOTE(&head->so_rcv.sb_sel.si_note, 0);
303
304 so->so_state &= ~SS_COMP;
305 so->so_head = NULL;
306 if (head->so_sigio != NULL)
307 fsetown(fgetown(head->so_sigio), &so->so_sigio);
308
309 soref(so); /* file descriptor reference */
310 nfp->f_data = (caddr_t)so; /* nfp has ref count from falloc */
311 nfp->f_flag = fflag;
312 nfp->f_ops = &socketops;
313 nfp->f_type = DTYPE_SOCKET;
314 sa = 0;
315 error = soaccept(so, &sa);
316 if (error) {
317 /*
318 * return a namelen of zero for older code which might
319 * ignore the return value from accept.
320 */
321 if (uap->name != NULL) {
322 namelen = 0;
323 (void) copyout((caddr_t)&namelen,
324 (caddr_t)uap->anamelen, sizeof(*uap->anamelen));
325 }
326 goto noconnection;
327 }
328 if (sa == NULL) {
329 namelen = 0;
330 if (uap->name)
331 goto gotnoname;
332 splx(s);
333 error = 0;
334 goto done;
335 }
336 if (uap->name) {
337 /* check sa_len before it is destroyed */
338 if (namelen > sa->sa_len)
339 namelen = sa->sa_len;
340#ifdef COMPAT_OLDSOCK
341 if (compat)
342 ((struct osockaddr *)sa)->sa_family =
343 sa->sa_family;
344#endif
345 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
346 if (!error)
347gotnoname:
348 error = copyout((caddr_t)&namelen,
349 (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
350 }
351noconnection:
352 if (sa)
353 FREE(sa, M_SONAME);
354
355 /*
356 * close the new descriptor, assuming someone hasn't ripped it
357 * out from under us.
358 */
359 if (error) {
360 if (fdp->fd_ofiles[fd] == nfp) {
361 fdp->fd_ofiles[fd] = NULL;
362 fdrop(nfp, td);
363 }
364 }
365 splx(s);
366
367 /*
368 * Release explicitly held references before returning.
369 */
370done:
371 if (nfp != NULL)
372 fdrop(nfp, td);
373 fputsock(head);
374done2:
375 mtx_unlock(&Giant);
376 return (error);
377}
378
379/*
380 * MPSAFE (accept1() is MPSAFE)
381 */
382int
383accept(td, uap)
384 struct thread *td;
385 struct accept_args *uap;
386{
387
388 return (accept1(td, uap, 0));
389}
390
391#ifdef COMPAT_OLDSOCK
392/*
393 * MPSAFE (accept1() is MPSAFE)
394 */
395int
396oaccept(td, uap)
397 struct thread *td;
398 struct accept_args *uap;
399{
400
401 return (accept1(td, uap, 1));
402}
403#endif /* COMPAT_OLDSOCK */
404
405/*
406 * MPSAFE
407 */
408/* ARGSUSED */
409int
410connect(td, uap)
411 struct thread *td;
412 register struct connect_args /* {
413 int s;
414 caddr_t name;
415 int namelen;
416 } */ *uap;
417{
418 struct socket *so;
419 struct sockaddr *sa;
420 int error, s;
421
422 mtx_lock(&Giant);
423 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
424 goto done2;
425 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
426 error = EALREADY;
427 goto done1;
428 }
429 error = getsockaddr(&sa, uap->name, uap->namelen);
430 if (error)
431 goto done1;
432 error = soconnect(so, sa, td);
433 if (error)
434 goto bad;
435 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
436 FREE(sa, M_SONAME);
437 error = EINPROGRESS;
438 goto done1;
439 }
440 s = splnet();
441 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
442 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, "connec", 0);
443 if (error)
444 break;
445 }
446 if (error == 0) {
447 error = so->so_error;
448 so->so_error = 0;
449 }
450 splx(s);
451bad:
452 so->so_state &= ~SS_ISCONNECTING;
453 FREE(sa, M_SONAME);
454 if (error == ERESTART)
455 error = EINTR;
456done1:
457 fputsock(so);
458done2:
459 mtx_unlock(&Giant);
460 return (error);
461}
462
463/*
464 * MPSAFE
465 */
466int
467socketpair(td, uap)
468 struct thread *td;
469 register struct socketpair_args /* {
470 int domain;
471 int type;
472 int protocol;
473 int *rsv;
474 } */ *uap;
475{
476 register struct filedesc *fdp = td->td_proc->p_fd;
477 struct file *fp1, *fp2;
478 struct socket *so1, *so2;
479 int fd, error, sv[2];
480
481 mtx_lock(&Giant);
481 error = socreate(uap->domain, &so1, uap->type, uap->protocol, td);
482 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
483 td->td_proc->p_ucred, td);
482 if (error)
483 goto done2;
484 if (error)
485 goto done2;
484 error = socreate(uap->domain, &so2, uap->type, uap->protocol, td);
486 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
487 td->td_proc->p_ucred, td);
485 if (error)
486 goto free1;
487 error = falloc(td, &fp1, &fd);
488 if (error)
489 goto free2;
490 fhold(fp1);
491 sv[0] = fd;
492 fp1->f_data = (caddr_t)so1; /* so1 already has ref count */
493 error = falloc(td, &fp2, &fd);
494 if (error)
495 goto free3;
496 fhold(fp2);
497 fp2->f_data = (caddr_t)so2; /* so2 already has ref count */
498 sv[1] = fd;
499 error = soconnect2(so1, so2);
500 if (error)
501 goto free4;
502 if (uap->type == SOCK_DGRAM) {
503 /*
504 * Datagram socket connection is asymmetric.
505 */
506 error = soconnect2(so2, so1);
507 if (error)
508 goto free4;
509 }
510 fp1->f_flag = fp2->f_flag = FREAD|FWRITE;
511 fp1->f_ops = fp2->f_ops = &socketops;
512 fp1->f_type = fp2->f_type = DTYPE_SOCKET;
513 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
514 fdrop(fp1, td);
515 fdrop(fp2, td);
516 goto done2;
517free4:
518 if (fdp->fd_ofiles[sv[1]] == fp2) {
519 fdp->fd_ofiles[sv[1]] = NULL;
520 fdrop(fp2, td);
521 }
522 fdrop(fp2, td);
523free3:
524 if (fdp->fd_ofiles[sv[0]] == fp1) {
525 fdp->fd_ofiles[sv[0]] = NULL;
526 fdrop(fp1, td);
527 }
528 fdrop(fp1, td);
529free2:
530 (void)soclose(so2);
531free1:
532 (void)soclose(so1);
533done2:
534 mtx_unlock(&Giant);
535 return (error);
536}
537
538static int
539sendit(td, s, mp, flags)
540 register struct thread *td;
541 int s;
542 register struct msghdr *mp;
543 int flags;
544{
545 struct uio auio;
546 register struct iovec *iov;
547 register int i;
548 struct mbuf *control;
549 struct sockaddr *to = NULL;
550 int len, error;
551 struct socket *so;
552#ifdef KTRACE
553 struct iovec *ktriov = NULL;
554 struct uio ktruio;
555#endif
556
557 if ((error = fgetsock(td, s, &so, NULL)) != 0)
558 return (error);
559 auio.uio_iov = mp->msg_iov;
560 auio.uio_iovcnt = mp->msg_iovlen;
561 auio.uio_segflg = UIO_USERSPACE;
562 auio.uio_rw = UIO_WRITE;
563 auio.uio_td = td;
564 auio.uio_offset = 0; /* XXX */
565 auio.uio_resid = 0;
566 iov = mp->msg_iov;
567 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
568 if ((auio.uio_resid += iov->iov_len) < 0) {
569 error = EINVAL;
570 goto bad;
571 }
572 }
573 if (mp->msg_name) {
574 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
575 if (error)
576 goto bad;
577 }
578 if (mp->msg_control) {
579 if (mp->msg_controllen < sizeof(struct cmsghdr)
580#ifdef COMPAT_OLDSOCK
581 && mp->msg_flags != MSG_COMPAT
582#endif
583 ) {
584 error = EINVAL;
585 goto bad;
586 }
587 error = sockargs(&control, mp->msg_control,
588 mp->msg_controllen, MT_CONTROL);
589 if (error)
590 goto bad;
591#ifdef COMPAT_OLDSOCK
592 if (mp->msg_flags == MSG_COMPAT) {
593 register struct cmsghdr *cm;
594
595 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
596 if (control == 0) {
597 error = ENOBUFS;
598 goto bad;
599 } else {
600 cm = mtod(control, struct cmsghdr *);
601 cm->cmsg_len = control->m_len;
602 cm->cmsg_level = SOL_SOCKET;
603 cm->cmsg_type = SCM_RIGHTS;
604 }
605 }
606#endif
607 } else {
608 control = 0;
609 }
610#ifdef KTRACE
611 if (KTRPOINT(td->td_proc, KTR_GENIO)) {
612 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
613
614 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
615 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
616 ktruio = auio;
617 }
618#endif
619 len = auio.uio_resid;
620 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
621 flags, td);
622 if (error) {
623 if (auio.uio_resid != len && (error == ERESTART ||
624 error == EINTR || error == EWOULDBLOCK))
625 error = 0;
626 if (error == EPIPE) {
627 PROC_LOCK(td->td_proc);
628 psignal(td->td_proc, SIGPIPE);
629 PROC_UNLOCK(td->td_proc);
630 }
631 }
632 if (error == 0)
633 td->td_retval[0] = len - auio.uio_resid;
634#ifdef KTRACE
635 if (ktriov != NULL) {
636 if (error == 0) {
637 ktruio.uio_iov = ktriov;
638 ktruio.uio_resid = td->td_retval[0];
639 ktrgenio(td->td_proc->p_tracep, s, UIO_WRITE, &ktruio, error);
640 }
641 FREE(ktriov, M_TEMP);
642 }
643#endif
644bad:
645 fputsock(so);
646 if (to)
647 FREE(to, M_SONAME);
648 return (error);
649}
650
651/*
652 * MPSAFE
653 */
654int
655sendto(td, uap)
656 struct thread *td;
657 register struct sendto_args /* {
658 int s;
659 caddr_t buf;
660 size_t len;
661 int flags;
662 caddr_t to;
663 int tolen;
664 } */ *uap;
665{
666 struct msghdr msg;
667 struct iovec aiov;
668 int error;
669
670 msg.msg_name = uap->to;
671 msg.msg_namelen = uap->tolen;
672 msg.msg_iov = &aiov;
673 msg.msg_iovlen = 1;
674 msg.msg_control = 0;
675#ifdef COMPAT_OLDSOCK
676 msg.msg_flags = 0;
677#endif
678 aiov.iov_base = uap->buf;
679 aiov.iov_len = uap->len;
680 mtx_lock(&Giant);
681 error = sendit(td, uap->s, &msg, uap->flags);
682 mtx_unlock(&Giant);
683 return (error);
684}
685
686#ifdef COMPAT_OLDSOCK
687/*
688 * MPSAFE
689 */
690int
691osend(td, uap)
692 struct thread *td;
693 register struct osend_args /* {
694 int s;
695 caddr_t buf;
696 int len;
697 int flags;
698 } */ *uap;
699{
700 struct msghdr msg;
701 struct iovec aiov;
702 int error;
703
704 msg.msg_name = 0;
705 msg.msg_namelen = 0;
706 msg.msg_iov = &aiov;
707 msg.msg_iovlen = 1;
708 aiov.iov_base = uap->buf;
709 aiov.iov_len = uap->len;
710 msg.msg_control = 0;
711 msg.msg_flags = 0;
712 mtx_lock(&Giant);
713 error = sendit(td, uap->s, &msg, uap->flags);
714 mtx_unlock(&Giant);
715 return (error);
716}
717
718/*
719 * MPSAFE
720 */
721int
722osendmsg(td, uap)
723 struct thread *td;
724 register struct osendmsg_args /* {
725 int s;
726 caddr_t msg;
727 int flags;
728 } */ *uap;
729{
730 struct msghdr msg;
731 struct iovec aiov[UIO_SMALLIOV], *iov;
732 int error;
733
734 mtx_lock(&Giant);
735 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
736 if (error)
737 goto done2;
738 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
739 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
740 error = EMSGSIZE;
741 goto done2;
742 }
743 MALLOC(iov, struct iovec *,
744 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
745 M_WAITOK);
746 } else {
747 iov = aiov;
748 }
749 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
750 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
751 if (error)
752 goto done;
753 msg.msg_flags = MSG_COMPAT;
754 msg.msg_iov = iov;
755 error = sendit(td, uap->s, &msg, uap->flags);
756done:
757 if (iov != aiov)
758 FREE(iov, M_IOV);
759done2:
760 mtx_unlock(&Giant);
761 return (error);
762}
763#endif
764
765/*
766 * MPSAFE
767 */
768int
769sendmsg(td, uap)
770 struct thread *td;
771 register struct sendmsg_args /* {
772 int s;
773 caddr_t msg;
774 int flags;
775 } */ *uap;
776{
777 struct msghdr msg;
778 struct iovec aiov[UIO_SMALLIOV], *iov;
779 int error;
780
781 mtx_lock(&Giant);
782 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
783 if (error)
784 goto done2;
785 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
786 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
787 error = EMSGSIZE;
788 goto done2;
789 }
790 MALLOC(iov, struct iovec *,
791 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
792 M_WAITOK);
793 } else {
794 iov = aiov;
795 }
796 if (msg.msg_iovlen &&
797 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
798 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
799 goto done;
800 msg.msg_iov = iov;
801#ifdef COMPAT_OLDSOCK
802 msg.msg_flags = 0;
803#endif
804 error = sendit(td, uap->s, &msg, uap->flags);
805done:
806 if (iov != aiov)
807 FREE(iov, M_IOV);
808done2:
809 mtx_unlock(&Giant);
810 return (error);
811}
812
813static int
814recvit(td, s, mp, namelenp)
815 register struct thread *td;
816 int s;
817 register struct msghdr *mp;
818 caddr_t namelenp;
819{
820 struct uio auio;
821 register struct iovec *iov;
822 register int i;
823 int len, error;
824 struct mbuf *m, *control = 0;
825 caddr_t ctlbuf;
826 struct socket *so;
827 struct sockaddr *fromsa = 0;
828#ifdef KTRACE
829 struct iovec *ktriov = NULL;
830 struct uio ktruio;
831#endif
832
833 if ((error = fgetsock(td, s, &so, NULL)) != 0)
834 return (error);
835 auio.uio_iov = mp->msg_iov;
836 auio.uio_iovcnt = mp->msg_iovlen;
837 auio.uio_segflg = UIO_USERSPACE;
838 auio.uio_rw = UIO_READ;
839 auio.uio_td = td;
840 auio.uio_offset = 0; /* XXX */
841 auio.uio_resid = 0;
842 iov = mp->msg_iov;
843 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
844 if ((auio.uio_resid += iov->iov_len) < 0) {
845 fputsock(so);
846 return (EINVAL);
847 }
848 }
849#ifdef KTRACE
850 if (KTRPOINT(td->td_proc, KTR_GENIO)) {
851 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
852
853 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
854 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
855 ktruio = auio;
856 }
857#endif
858 len = auio.uio_resid;
859 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
860 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
861 &mp->msg_flags);
862 if (error) {
863 if (auio.uio_resid != len && (error == ERESTART ||
864 error == EINTR || error == EWOULDBLOCK))
865 error = 0;
866 }
867#ifdef KTRACE
868 if (ktriov != NULL) {
869 if (error == 0) {
870 ktruio.uio_iov = ktriov;
871 ktruio.uio_resid = len - auio.uio_resid;
872 ktrgenio(td->td_proc->p_tracep, s, UIO_READ, &ktruio, error);
873 }
874 FREE(ktriov, M_TEMP);
875 }
876#endif
877 if (error)
878 goto out;
879 td->td_retval[0] = len - auio.uio_resid;
880 if (mp->msg_name) {
881 len = mp->msg_namelen;
882 if (len <= 0 || fromsa == 0)
883 len = 0;
884 else {
885#ifndef MIN
886#define MIN(a,b) ((a)>(b)?(b):(a))
887#endif
888 /* save sa_len before it is destroyed by MSG_COMPAT */
889 len = MIN(len, fromsa->sa_len);
890#ifdef COMPAT_OLDSOCK
891 if (mp->msg_flags & MSG_COMPAT)
892 ((struct osockaddr *)fromsa)->sa_family =
893 fromsa->sa_family;
894#endif
895 error = copyout(fromsa,
896 (caddr_t)mp->msg_name, (unsigned)len);
897 if (error)
898 goto out;
899 }
900 mp->msg_namelen = len;
901 if (namelenp &&
902 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
903#ifdef COMPAT_OLDSOCK
904 if (mp->msg_flags & MSG_COMPAT)
905 error = 0; /* old recvfrom didn't check */
906 else
907#endif
908 goto out;
909 }
910 }
911 if (mp->msg_control) {
912#ifdef COMPAT_OLDSOCK
913 /*
914 * We assume that old recvmsg calls won't receive access
915 * rights and other control info, esp. as control info
916 * is always optional and those options didn't exist in 4.3.
917 * If we receive rights, trim the cmsghdr; anything else
918 * is tossed.
919 */
920 if (control && mp->msg_flags & MSG_COMPAT) {
921 if (mtod(control, struct cmsghdr *)->cmsg_level !=
922 SOL_SOCKET ||
923 mtod(control, struct cmsghdr *)->cmsg_type !=
924 SCM_RIGHTS) {
925 mp->msg_controllen = 0;
926 goto out;
927 }
928 control->m_len -= sizeof (struct cmsghdr);
929 control->m_data += sizeof (struct cmsghdr);
930 }
931#endif
932 len = mp->msg_controllen;
933 m = control;
934 mp->msg_controllen = 0;
935 ctlbuf = (caddr_t) mp->msg_control;
936
937 while (m && len > 0) {
938 unsigned int tocopy;
939
940 if (len >= m->m_len)
941 tocopy = m->m_len;
942 else {
943 mp->msg_flags |= MSG_CTRUNC;
944 tocopy = len;
945 }
946
947 if ((error = copyout((caddr_t)mtod(m, caddr_t),
948 ctlbuf, tocopy)) != 0)
949 goto out;
950
951 ctlbuf += tocopy;
952 len -= tocopy;
953 m = m->m_next;
954 }
955 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
956 }
957out:
958 fputsock(so);
959 if (fromsa)
960 FREE(fromsa, M_SONAME);
961 if (control)
962 m_freem(control);
963 return (error);
964}
965
966/*
967 * MPSAFE
968 */
969int
970recvfrom(td, uap)
971 struct thread *td;
972 register struct recvfrom_args /* {
973 int s;
974 caddr_t buf;
975 size_t len;
976 int flags;
977 caddr_t from;
978 int *fromlenaddr;
979 } */ *uap;
980{
981 struct msghdr msg;
982 struct iovec aiov;
983 int error;
984
985 mtx_lock(&Giant);
986 if (uap->fromlenaddr) {
987 error = copyin((caddr_t)uap->fromlenaddr,
988 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
989 if (error)
990 goto done2;
991 } else {
992 msg.msg_namelen = 0;
993 }
994 msg.msg_name = uap->from;
995 msg.msg_iov = &aiov;
996 msg.msg_iovlen = 1;
997 aiov.iov_base = uap->buf;
998 aiov.iov_len = uap->len;
999 msg.msg_control = 0;
1000 msg.msg_flags = uap->flags;
1001 error = recvit(td, uap->s, &msg, (caddr_t)uap->fromlenaddr);
1002done2:
1003 mtx_unlock(&Giant);
1004 return(error);
1005}
1006
1007#ifdef COMPAT_OLDSOCK
1008/*
1009 * MPSAFE
1010 */
1011int
1012orecvfrom(td, uap)
1013 struct thread *td;
1014 struct recvfrom_args *uap;
1015{
1016
1017 uap->flags |= MSG_COMPAT;
1018 return (recvfrom(td, uap));
1019}
1020#endif
1021
1022
1023#ifdef COMPAT_OLDSOCK
1024/*
1025 * MPSAFE
1026 */
1027int
1028orecv(td, uap)
1029 struct thread *td;
1030 register struct orecv_args /* {
1031 int s;
1032 caddr_t buf;
1033 int len;
1034 int flags;
1035 } */ *uap;
1036{
1037 struct msghdr msg;
1038 struct iovec aiov;
1039 int error;
1040
1041 mtx_lock(&Giant);
1042 msg.msg_name = 0;
1043 msg.msg_namelen = 0;
1044 msg.msg_iov = &aiov;
1045 msg.msg_iovlen = 1;
1046 aiov.iov_base = uap->buf;
1047 aiov.iov_len = uap->len;
1048 msg.msg_control = 0;
1049 msg.msg_flags = uap->flags;
1050 error = recvit(td, uap->s, &msg, (caddr_t)0);
1051 mtx_unlock(&Giant);
1052 return (error);
1053}
1054
1055/*
1056 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1057 * overlays the new one, missing only the flags, and with the (old) access
1058 * rights where the control fields are now.
1059 *
1060 * MPSAFE
1061 */
1062int
1063orecvmsg(td, uap)
1064 struct thread *td;
1065 register struct orecvmsg_args /* {
1066 int s;
1067 struct omsghdr *msg;
1068 int flags;
1069 } */ *uap;
1070{
1071 struct msghdr msg;
1072 struct iovec aiov[UIO_SMALLIOV], *iov;
1073 int error;
1074
1075 error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
1076 sizeof (struct omsghdr));
1077 if (error)
1078 return (error);
1079
1080 mtx_lock(&Giant);
1081 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1082 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1083 error = EMSGSIZE;
1084 goto done2;
1085 }
1086 MALLOC(iov, struct iovec *,
1087 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1088 M_WAITOK);
1089 } else {
1090 iov = aiov;
1091 }
1092 msg.msg_flags = uap->flags | MSG_COMPAT;
1093 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
1094 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1095 if (error)
1096 goto done;
1097 msg.msg_iov = iov;
1098 error = recvit(td, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen);
1099
1100 if (msg.msg_controllen && error == 0)
1101 error = copyout((caddr_t)&msg.msg_controllen,
1102 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
1103done:
1104 if (iov != aiov)
1105 FREE(iov, M_IOV);
1106done2:
1107 mtx_unlock(&Giant);
1108 return (error);
1109}
1110#endif
1111
1112/*
1113 * MPSAFE
1114 */
1115int
1116recvmsg(td, uap)
1117 struct thread *td;
1118 register struct recvmsg_args /* {
1119 int s;
1120 struct msghdr *msg;
1121 int flags;
1122 } */ *uap;
1123{
1124 struct msghdr msg;
1125 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1126 register int error;
1127
1128 mtx_lock(&Giant);
1129 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
1130 if (error)
1131 goto done2;
1132 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1133 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1134 error = EMSGSIZE;
1135 goto done2;
1136 }
1137 MALLOC(iov, struct iovec *,
1138 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1139 M_WAITOK);
1140 } else {
1141 iov = aiov;
1142 }
1143#ifdef COMPAT_OLDSOCK
1144 msg.msg_flags = uap->flags &~ MSG_COMPAT;
1145#else
1146 msg.msg_flags = uap->flags;
1147#endif
1148 uiov = msg.msg_iov;
1149 msg.msg_iov = iov;
1150 error = copyin((caddr_t)uiov, (caddr_t)iov,
1151 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1152 if (error)
1153 goto done;
1154 error = recvit(td, uap->s, &msg, (caddr_t)0);
1155 if (!error) {
1156 msg.msg_iov = uiov;
1157 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
1158 }
1159done:
1160 if (iov != aiov)
1161 FREE(iov, M_IOV);
1162done2:
1163 mtx_unlock(&Giant);
1164 return (error);
1165}
1166
1167/*
1168 * MPSAFE
1169 */
1170/* ARGSUSED */
1171int
1172shutdown(td, uap)
1173 struct thread *td;
1174 register struct shutdown_args /* {
1175 int s;
1176 int how;
1177 } */ *uap;
1178{
1179 struct socket *so;
1180 int error;
1181
1182 mtx_lock(&Giant);
1183 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1184 error = soshutdown(so, uap->how);
1185 fputsock(so);
1186 }
1187 mtx_unlock(&Giant);
1188 return(error);
1189}
1190
1191/*
1192 * MPSAFE
1193 */
1194/* ARGSUSED */
1195int
1196setsockopt(td, uap)
1197 struct thread *td;
1198 register struct setsockopt_args /* {
1199 int s;
1200 int level;
1201 int name;
1202 caddr_t val;
1203 int valsize;
1204 } */ *uap;
1205{
1206 struct socket *so;
1207 struct sockopt sopt;
1208 int error;
1209
1210 if (uap->val == 0 && uap->valsize != 0)
1211 return (EFAULT);
1212 if (uap->valsize < 0)
1213 return (EINVAL);
1214
1215 mtx_lock(&Giant);
1216 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1217 sopt.sopt_dir = SOPT_SET;
1218 sopt.sopt_level = uap->level;
1219 sopt.sopt_name = uap->name;
1220 sopt.sopt_val = uap->val;
1221 sopt.sopt_valsize = uap->valsize;
1222 sopt.sopt_td = td;
1223 error = sosetopt(so, &sopt);
1224 fputsock(so);
1225 }
1226 mtx_unlock(&Giant);
1227 return(error);
1228}
1229
1230/*
1231 * MPSAFE
1232 */
1233/* ARGSUSED */
1234int
1235getsockopt(td, uap)
1236 struct thread *td;
1237 register struct getsockopt_args /* {
1238 int s;
1239 int level;
1240 int name;
1241 caddr_t val;
1242 int *avalsize;
1243 } */ *uap;
1244{
1245 int valsize, error;
1246 struct socket *so;
1247 struct sockopt sopt;
1248
1249 mtx_lock(&Giant);
1250 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1251 goto done2;
1252 if (uap->val) {
1253 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1254 sizeof (valsize));
1255 if (error)
1256 goto done1;
1257 if (valsize < 0) {
1258 error = EINVAL;
1259 goto done1;
1260 }
1261 } else {
1262 valsize = 0;
1263 }
1264
1265 sopt.sopt_dir = SOPT_GET;
1266 sopt.sopt_level = uap->level;
1267 sopt.sopt_name = uap->name;
1268 sopt.sopt_val = uap->val;
1269 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1270 sopt.sopt_td = td;
1271
1272 error = sogetopt(so, &sopt);
1273 if (error == 0) {
1274 valsize = sopt.sopt_valsize;
1275 error = copyout((caddr_t)&valsize,
1276 (caddr_t)uap->avalsize, sizeof (valsize));
1277 }
1278done1:
1279 fputsock(so);
1280done2:
1281 mtx_unlock(&Giant);
1282 return (error);
1283}
1284
1285/*
1286 * getsockname1() - Get socket name.
1287 *
1288 * MPSAFE
1289 */
1290/* ARGSUSED */
1291static int
1292getsockname1(td, uap, compat)
1293 struct thread *td;
1294 register struct getsockname_args /* {
1295 int fdes;
1296 caddr_t asa;
1297 int *alen;
1298 } */ *uap;
1299 int compat;
1300{
1301 struct socket *so;
1302 struct sockaddr *sa;
1303 int len, error;
1304
1305 mtx_lock(&Giant);
1306 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1307 goto done2;
1308 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1309 if (error)
1310 goto done1;
1311 sa = 0;
1312 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1313 if (error)
1314 goto bad;
1315 if (sa == 0) {
1316 len = 0;
1317 goto gotnothing;
1318 }
1319
1320 len = MIN(len, sa->sa_len);
1321#ifdef COMPAT_OLDSOCK
1322 if (compat)
1323 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1324#endif
1325 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1326 if (error == 0)
1327gotnothing:
1328 error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1329 sizeof (len));
1330bad:
1331 if (sa)
1332 FREE(sa, M_SONAME);
1333done1:
1334 fputsock(so);
1335done2:
1336 mtx_unlock(&Giant);
1337 return (error);
1338}
1339
1340/*
1341 * MPSAFE
1342 */
1343int
1344getsockname(td, uap)
1345 struct thread *td;
1346 struct getsockname_args *uap;
1347{
1348
1349 return (getsockname1(td, uap, 0));
1350}
1351
1352#ifdef COMPAT_OLDSOCK
1353/*
1354 * MPSAFE
1355 */
1356int
1357ogetsockname(td, uap)
1358 struct thread *td;
1359 struct getsockname_args *uap;
1360{
1361
1362 return (getsockname1(td, uap, 1));
1363}
1364#endif /* COMPAT_OLDSOCK */
1365
1366/*
1367 * getpeername1() - Get name of peer for connected socket.
1368 *
1369 * MPSAFE
1370 */
1371/* ARGSUSED */
1372static int
1373getpeername1(td, uap, compat)
1374 struct thread *td;
1375 register struct getpeername_args /* {
1376 int fdes;
1377 caddr_t asa;
1378 int *alen;
1379 } */ *uap;
1380 int compat;
1381{
1382 struct socket *so;
1383 struct sockaddr *sa;
1384 int len, error;
1385
1386 mtx_lock(&Giant);
1387 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1388 goto done2;
1389 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1390 error = ENOTCONN;
1391 goto done1;
1392 }
1393 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1394 if (error)
1395 goto done1;
1396 sa = 0;
1397 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1398 if (error)
1399 goto bad;
1400 if (sa == 0) {
1401 len = 0;
1402 goto gotnothing;
1403 }
1404 len = MIN(len, sa->sa_len);
1405#ifdef COMPAT_OLDSOCK
1406 if (compat)
1407 ((struct osockaddr *)sa)->sa_family =
1408 sa->sa_family;
1409#endif
1410 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1411 if (error)
1412 goto bad;
1413gotnothing:
1414 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1415bad:
1416 if (sa)
1417 FREE(sa, M_SONAME);
1418done1:
1419 fputsock(so);
1420done2:
1421 mtx_unlock(&Giant);
1422 return (error);
1423}
1424
1425/*
1426 * MPSAFE
1427 */
1428int
1429getpeername(td, uap)
1430 struct thread *td;
1431 struct getpeername_args *uap;
1432{
1433
1434 return (getpeername1(td, uap, 0));
1435}
1436
1437#ifdef COMPAT_OLDSOCK
1438/*
1439 * MPSAFE
1440 */
1441int
1442ogetpeername(td, uap)
1443 struct thread *td;
1444 struct ogetpeername_args *uap;
1445{
1446
1447 /* XXX uap should have type `getpeername_args *' to begin with. */
1448 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1449}
1450#endif /* COMPAT_OLDSOCK */
1451
1452int
1453sockargs(mp, buf, buflen, type)
1454 struct mbuf **mp;
1455 caddr_t buf;
1456 int buflen, type;
1457{
1458 register struct sockaddr *sa;
1459 register struct mbuf *m;
1460 int error;
1461
1462 if ((u_int)buflen > MLEN) {
1463#ifdef COMPAT_OLDSOCK
1464 if (type == MT_SONAME && (u_int)buflen <= 112)
1465 buflen = MLEN; /* unix domain compat. hack */
1466 else
1467#endif
1468 return (EINVAL);
1469 }
1470 m = m_get(M_TRYWAIT, type);
1471 if (m == NULL)
1472 return (ENOBUFS);
1473 m->m_len = buflen;
1474 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1475 if (error)
1476 (void) m_free(m);
1477 else {
1478 *mp = m;
1479 if (type == MT_SONAME) {
1480 sa = mtod(m, struct sockaddr *);
1481
1482#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1483 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1484 sa->sa_family = sa->sa_len;
1485#endif
1486 sa->sa_len = buflen;
1487 }
1488 }
1489 return (error);
1490}
1491
1492int
1493getsockaddr(namp, uaddr, len)
1494 struct sockaddr **namp;
1495 caddr_t uaddr;
1496 size_t len;
1497{
1498 struct sockaddr *sa;
1499 int error;
1500
1501 if (len > SOCK_MAXADDRLEN)
1502 return ENAMETOOLONG;
1503 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1504 error = copyin(uaddr, sa, len);
1505 if (error) {
1506 FREE(sa, M_SONAME);
1507 } else {
1508#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1509 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1510 sa->sa_family = sa->sa_len;
1511#endif
1512 sa->sa_len = len;
1513 *namp = sa;
1514 }
1515 return error;
1516}
1517
1518/*
1519 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1520 * XXX - The sf_buf functions are currently private to sendfile(2), so have
1521 * been made static, but may be useful in the future for doing zero-copy in
1522 * other parts of the networking code.
1523 */
1524static void
1525sf_buf_init(void *arg)
1526{
1527 int i;
1528
1529 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", MTX_DEF);
1530 mtx_lock(&sf_freelist.sf_lock);
1531 SLIST_INIT(&sf_freelist.sf_head);
1532 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1533 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
1534 M_NOWAIT | M_ZERO);
1535 for (i = 0; i < nsfbufs; i++) {
1536 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1537 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
1538 }
1539 sf_buf_alloc_want = 0;
1540 mtx_unlock(&sf_freelist.sf_lock);
1541}
1542
1543/*
1544 * Get an sf_buf from the freelist. Will block if none are available.
1545 */
1546static struct sf_buf *
1547sf_buf_alloc()
1548{
1549 struct sf_buf *sf;
1550 int error;
1551
1552 mtx_lock(&sf_freelist.sf_lock);
1553 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
1554 sf_buf_alloc_want++;
1555 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH,
1556 "sfbufa", 0);
1557 sf_buf_alloc_want--;
1558
1559 /*
1560 * If we got a signal, don't risk going back to sleep.
1561 */
1562 if (error)
1563 break;
1564 }
1565 if (sf != NULL)
1566 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
1567 mtx_unlock(&sf_freelist.sf_lock);
1568 return (sf);
1569}
1570
1571#define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1572
1573/*
1574 * Detatch mapped page and release resources back to the system.
1575 */
1576static void
1577sf_buf_free(caddr_t addr, void *args)
1578{
1579 struct sf_buf *sf;
1580 struct vm_page *m;
1581
1582 GIANT_REQUIRED;
1583
1584 sf = dtosf(addr);
1585 pmap_qremove((vm_offset_t)addr, 1);
1586 m = sf->m;
1587 vm_page_unwire(m, 0);
1588 /*
1589 * Check for the object going away on us. This can
1590 * happen since we don't hold a reference to it.
1591 * If so, we're responsible for freeing the page.
1592 */
1593 if (m->wire_count == 0 && m->object == NULL)
1594 vm_page_free(m);
1595 sf->m = NULL;
1596 mtx_lock(&sf_freelist.sf_lock);
1597 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
1598 if (sf_buf_alloc_want > 0)
1599 wakeup_one(&sf_freelist);
1600 mtx_unlock(&sf_freelist.sf_lock);
1601}
1602
1603/*
1604 * sendfile(2)
1605 *
1606 * MPSAFE
1607 *
1608 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1609 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1610 *
1611 * Send a file specified by 'fd' and starting at 'offset' to a socket
1612 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1613 * nbytes == 0. Optionally add a header and/or trailer to the socket
1614 * output. If specified, write the total number of bytes sent into *sbytes.
1615 *
1616 */
1617int
1618sendfile(struct thread *td, struct sendfile_args *uap)
1619{
1620 struct vnode *vp;
1621 struct vm_object *obj;
1622 struct socket *so = NULL;
1623 struct mbuf *m;
1624 struct sf_buf *sf;
1625 struct vm_page *pg;
1626 struct writev_args nuap;
1627 struct sf_hdtr hdtr;
1628 off_t off, xfsize, sbytes = 0;
1629 int error, s;
1630
1631 mtx_lock(&Giant);
1632
1633 /*
1634 * The descriptor must be a regular file and have a backing VM object.
1635 */
1636 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1637 goto done;
1638 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1639 error = EINVAL;
1640 goto done;
1641 }
1642 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1643 goto done;
1644 if (so->so_type != SOCK_STREAM) {
1645 error = EINVAL;
1646 goto done;
1647 }
1648 if ((so->so_state & SS_ISCONNECTED) == 0) {
1649 error = ENOTCONN;
1650 goto done;
1651 }
1652 if (uap->offset < 0) {
1653 error = EINVAL;
1654 goto done;
1655 }
1656
1657 /*
1658 * If specified, get the pointer to the sf_hdtr struct for
1659 * any headers/trailers.
1660 */
1661 if (uap->hdtr != NULL) {
1662 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1663 if (error)
1664 goto done;
1665 /*
1666 * Send any headers. Wimp out and use writev(2).
1667 */
1668 if (hdtr.headers != NULL) {
1669 nuap.fd = uap->s;
1670 nuap.iovp = hdtr.headers;
1671 nuap.iovcnt = hdtr.hdr_cnt;
1672 error = writev(td, &nuap);
1673 if (error)
1674 goto done;
1675 sbytes += td->td_retval[0];
1676 }
1677 }
1678
1679 /*
1680 * Protect against multiple writers to the socket.
1681 */
1682 (void) sblock(&so->so_snd, M_WAITOK);
1683
1684 /*
1685 * Loop through the pages in the file, starting with the requested
1686 * offset. Get a file page (do I/O if necessary), map the file page
1687 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1688 * it on the socket.
1689 */
1690 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1691 vm_pindex_t pindex;
1692 vm_offset_t pgoff;
1693
1694 pindex = OFF_TO_IDX(off);
1695retry_lookup:
1696 /*
1697 * Calculate the amount to transfer. Not to exceed a page,
1698 * the EOF, or the passed in nbytes.
1699 */
1700 xfsize = obj->un_pager.vnp.vnp_size - off;
1701 if (xfsize > PAGE_SIZE)
1702 xfsize = PAGE_SIZE;
1703 pgoff = (vm_offset_t)(off & PAGE_MASK);
1704 if (PAGE_SIZE - pgoff < xfsize)
1705 xfsize = PAGE_SIZE - pgoff;
1706 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1707 xfsize = uap->nbytes - sbytes;
1708 if (xfsize <= 0)
1709 break;
1710 /*
1711 * Optimize the non-blocking case by looking at the socket space
1712 * before going to the extra work of constituting the sf_buf.
1713 */
1714 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1715 if (so->so_state & SS_CANTSENDMORE)
1716 error = EPIPE;
1717 else
1718 error = EAGAIN;
1719 sbunlock(&so->so_snd);
1720 goto done;
1721 }
1722 /*
1723 * Attempt to look up the page.
1724 *
1725 * Allocate if not found
1726 *
1727 * Wait and loop if busy.
1728 */
1729 pg = vm_page_lookup(obj, pindex);
1730
1731 if (pg == NULL) {
1732 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1733 if (pg == NULL) {
1734 VM_WAIT;
1735 goto retry_lookup;
1736 }
1737 vm_page_wakeup(pg);
1738 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) {
1739 goto retry_lookup;
1740 }
1741
1742 /*
1743 * Wire the page so it does not get ripped out from under
1744 * us.
1745 */
1746
1747 vm_page_wire(pg);
1748
1749 /*
1750 * If page is not valid for what we need, initiate I/O
1751 */
1752
1753 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1754 struct uio auio;
1755 struct iovec aiov;
1756 int bsize;
1757
1758 /*
1759 * Ensure that our page is still around when the I/O
1760 * completes.
1761 */
1762 vm_page_io_start(pg);
1763
1764 /*
1765 * Get the page from backing store.
1766 */
1767 bsize = vp->v_mount->mnt_stat.f_iosize;
1768 auio.uio_iov = &aiov;
1769 auio.uio_iovcnt = 1;
1770 aiov.iov_base = 0;
1771 aiov.iov_len = MAXBSIZE;
1772 auio.uio_resid = MAXBSIZE;
1773 auio.uio_offset = trunc_page(off);
1774 auio.uio_segflg = UIO_NOCOPY;
1775 auio.uio_rw = UIO_READ;
1776 auio.uio_td = td;
1777 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1778 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1779 td->td_proc->p_ucred);
1780 VOP_UNLOCK(vp, 0, td);
1781 vm_page_flag_clear(pg, PG_ZERO);
1782 vm_page_io_finish(pg);
1783 if (error) {
1784 vm_page_unwire(pg, 0);
1785 /*
1786 * See if anyone else might know about this page.
1787 * If not and it is not valid, then free it.
1788 */
1789 if (pg->wire_count == 0 && pg->valid == 0 &&
1790 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1791 pg->hold_count == 0) {
1792 vm_page_busy(pg);
1793 vm_page_free(pg);
1794 }
1795 sbunlock(&so->so_snd);
1796 goto done;
1797 }
1798 }
1799
1800
1801 /*
1802 * Get a sendfile buf. We usually wait as long as necessary,
1803 * but this wait can be interrupted.
1804 */
1805 if ((sf = sf_buf_alloc()) == NULL) {
1806 vm_page_unwire(pg, 0);
1807 if (pg->wire_count == 0 && pg->object == NULL)
1808 vm_page_free(pg);
1809 sbunlock(&so->so_snd);
1810 error = EINTR;
1811 goto done;
1812 }
1813
1814 /*
1815 * Allocate a kernel virtual page and insert the physical page
1816 * into it.
1817 */
1818 sf->m = pg;
1819 pmap_qenter(sf->kva, &pg, 1);
1820 /*
1821 * Get an mbuf header and set it up as having external storage.
1822 */
1823 MGETHDR(m, M_TRYWAIT, MT_DATA);
1824 if (m == NULL) {
1825 error = ENOBUFS;
1826 sf_buf_free((void *)sf->kva, NULL);
1827 sbunlock(&so->so_snd);
1828 goto done;
1829 }
1830 /*
1831 * Setup external storage for mbuf.
1832 */
1833 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY,
1834 EXT_SFBUF);
1835 m->m_data = (char *) sf->kva + pgoff;
1836 m->m_pkthdr.len = m->m_len = xfsize;
1837 /*
1838 * Add the buffer to the socket buffer chain.
1839 */
1840 s = splnet();
1841retry_space:
1842 /*
1843 * Make sure that the socket is still able to take more data.
1844 * CANTSENDMORE being true usually means that the connection
1845 * was closed. so_error is true when an error was sensed after
1846 * a previous send.
1847 * The state is checked after the page mapping and buffer
1848 * allocation above since those operations may block and make
1849 * any socket checks stale. From this point forward, nothing
1850 * blocks before the pru_send (or more accurately, any blocking
1851 * results in a loop back to here to re-check).
1852 */
1853 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1854 if (so->so_state & SS_CANTSENDMORE) {
1855 error = EPIPE;
1856 } else {
1857 error = so->so_error;
1858 so->so_error = 0;
1859 }
1860 m_freem(m);
1861 sbunlock(&so->so_snd);
1862 splx(s);
1863 goto done;
1864 }
1865 /*
1866 * Wait for socket space to become available. We do this just
1867 * after checking the connection state above in order to avoid
1868 * a race condition with sbwait().
1869 */
1870 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1871 if (so->so_state & SS_NBIO) {
1872 m_freem(m);
1873 sbunlock(&so->so_snd);
1874 splx(s);
1875 error = EAGAIN;
1876 goto done;
1877 }
1878 error = sbwait(&so->so_snd);
1879 /*
1880 * An error from sbwait usually indicates that we've
1881 * been interrupted by a signal. If we've sent anything
1882 * then return bytes sent, otherwise return the error.
1883 */
1884 if (error) {
1885 m_freem(m);
1886 sbunlock(&so->so_snd);
1887 splx(s);
1888 goto done;
1889 }
1890 goto retry_space;
1891 }
1892 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
1893 splx(s);
1894 if (error) {
1895 sbunlock(&so->so_snd);
1896 goto done;
1897 }
1898 }
1899 sbunlock(&so->so_snd);
1900
1901 /*
1902 * Send trailers. Wimp out and use writev(2).
1903 */
1904 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1905 nuap.fd = uap->s;
1906 nuap.iovp = hdtr.trailers;
1907 nuap.iovcnt = hdtr.trl_cnt;
1908 error = writev(td, &nuap);
1909 if (error)
1910 goto done;
1911 sbytes += td->td_retval[0];
1912 }
1913
1914done:
1915 /*
1916 * If there was no error we have to clear td->td_retval[0]
1917 * because it may have been set by writev.
1918 */
1919 if (error == 0) {
1920 td->td_retval[0] = 0;
1921 }
1922 if (uap->sbytes != NULL) {
1923 copyout(&sbytes, uap->sbytes, sizeof(off_t));
1924 }
1925 if (vp)
1926 vrele(vp);
1927 if (so)
1928 fputsock(so);
1929 mtx_unlock(&Giant);
1930 return (error);
1931}
1932
488 if (error)
489 goto free1;
490 error = falloc(td, &fp1, &fd);
491 if (error)
492 goto free2;
493 fhold(fp1);
494 sv[0] = fd;
495 fp1->f_data = (caddr_t)so1; /* so1 already has ref count */
496 error = falloc(td, &fp2, &fd);
497 if (error)
498 goto free3;
499 fhold(fp2);
500 fp2->f_data = (caddr_t)so2; /* so2 already has ref count */
501 sv[1] = fd;
502 error = soconnect2(so1, so2);
503 if (error)
504 goto free4;
505 if (uap->type == SOCK_DGRAM) {
506 /*
507 * Datagram socket connection is asymmetric.
508 */
509 error = soconnect2(so2, so1);
510 if (error)
511 goto free4;
512 }
513 fp1->f_flag = fp2->f_flag = FREAD|FWRITE;
514 fp1->f_ops = fp2->f_ops = &socketops;
515 fp1->f_type = fp2->f_type = DTYPE_SOCKET;
516 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
517 fdrop(fp1, td);
518 fdrop(fp2, td);
519 goto done2;
520free4:
521 if (fdp->fd_ofiles[sv[1]] == fp2) {
522 fdp->fd_ofiles[sv[1]] = NULL;
523 fdrop(fp2, td);
524 }
525 fdrop(fp2, td);
526free3:
527 if (fdp->fd_ofiles[sv[0]] == fp1) {
528 fdp->fd_ofiles[sv[0]] = NULL;
529 fdrop(fp1, td);
530 }
531 fdrop(fp1, td);
532free2:
533 (void)soclose(so2);
534free1:
535 (void)soclose(so1);
536done2:
537 mtx_unlock(&Giant);
538 return (error);
539}
540
541static int
542sendit(td, s, mp, flags)
543 register struct thread *td;
544 int s;
545 register struct msghdr *mp;
546 int flags;
547{
548 struct uio auio;
549 register struct iovec *iov;
550 register int i;
551 struct mbuf *control;
552 struct sockaddr *to = NULL;
553 int len, error;
554 struct socket *so;
555#ifdef KTRACE
556 struct iovec *ktriov = NULL;
557 struct uio ktruio;
558#endif
559
560 if ((error = fgetsock(td, s, &so, NULL)) != 0)
561 return (error);
562 auio.uio_iov = mp->msg_iov;
563 auio.uio_iovcnt = mp->msg_iovlen;
564 auio.uio_segflg = UIO_USERSPACE;
565 auio.uio_rw = UIO_WRITE;
566 auio.uio_td = td;
567 auio.uio_offset = 0; /* XXX */
568 auio.uio_resid = 0;
569 iov = mp->msg_iov;
570 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
571 if ((auio.uio_resid += iov->iov_len) < 0) {
572 error = EINVAL;
573 goto bad;
574 }
575 }
576 if (mp->msg_name) {
577 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
578 if (error)
579 goto bad;
580 }
581 if (mp->msg_control) {
582 if (mp->msg_controllen < sizeof(struct cmsghdr)
583#ifdef COMPAT_OLDSOCK
584 && mp->msg_flags != MSG_COMPAT
585#endif
586 ) {
587 error = EINVAL;
588 goto bad;
589 }
590 error = sockargs(&control, mp->msg_control,
591 mp->msg_controllen, MT_CONTROL);
592 if (error)
593 goto bad;
594#ifdef COMPAT_OLDSOCK
595 if (mp->msg_flags == MSG_COMPAT) {
596 register struct cmsghdr *cm;
597
598 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
599 if (control == 0) {
600 error = ENOBUFS;
601 goto bad;
602 } else {
603 cm = mtod(control, struct cmsghdr *);
604 cm->cmsg_len = control->m_len;
605 cm->cmsg_level = SOL_SOCKET;
606 cm->cmsg_type = SCM_RIGHTS;
607 }
608 }
609#endif
610 } else {
611 control = 0;
612 }
613#ifdef KTRACE
614 if (KTRPOINT(td->td_proc, KTR_GENIO)) {
615 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
616
617 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
618 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
619 ktruio = auio;
620 }
621#endif
622 len = auio.uio_resid;
623 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
624 flags, td);
625 if (error) {
626 if (auio.uio_resid != len && (error == ERESTART ||
627 error == EINTR || error == EWOULDBLOCK))
628 error = 0;
629 if (error == EPIPE) {
630 PROC_LOCK(td->td_proc);
631 psignal(td->td_proc, SIGPIPE);
632 PROC_UNLOCK(td->td_proc);
633 }
634 }
635 if (error == 0)
636 td->td_retval[0] = len - auio.uio_resid;
637#ifdef KTRACE
638 if (ktriov != NULL) {
639 if (error == 0) {
640 ktruio.uio_iov = ktriov;
641 ktruio.uio_resid = td->td_retval[0];
642 ktrgenio(td->td_proc->p_tracep, s, UIO_WRITE, &ktruio, error);
643 }
644 FREE(ktriov, M_TEMP);
645 }
646#endif
647bad:
648 fputsock(so);
649 if (to)
650 FREE(to, M_SONAME);
651 return (error);
652}
653
654/*
655 * MPSAFE
656 */
657int
658sendto(td, uap)
659 struct thread *td;
660 register struct sendto_args /* {
661 int s;
662 caddr_t buf;
663 size_t len;
664 int flags;
665 caddr_t to;
666 int tolen;
667 } */ *uap;
668{
669 struct msghdr msg;
670 struct iovec aiov;
671 int error;
672
673 msg.msg_name = uap->to;
674 msg.msg_namelen = uap->tolen;
675 msg.msg_iov = &aiov;
676 msg.msg_iovlen = 1;
677 msg.msg_control = 0;
678#ifdef COMPAT_OLDSOCK
679 msg.msg_flags = 0;
680#endif
681 aiov.iov_base = uap->buf;
682 aiov.iov_len = uap->len;
683 mtx_lock(&Giant);
684 error = sendit(td, uap->s, &msg, uap->flags);
685 mtx_unlock(&Giant);
686 return (error);
687}
688
689#ifdef COMPAT_OLDSOCK
690/*
691 * MPSAFE
692 */
693int
694osend(td, uap)
695 struct thread *td;
696 register struct osend_args /* {
697 int s;
698 caddr_t buf;
699 int len;
700 int flags;
701 } */ *uap;
702{
703 struct msghdr msg;
704 struct iovec aiov;
705 int error;
706
707 msg.msg_name = 0;
708 msg.msg_namelen = 0;
709 msg.msg_iov = &aiov;
710 msg.msg_iovlen = 1;
711 aiov.iov_base = uap->buf;
712 aiov.iov_len = uap->len;
713 msg.msg_control = 0;
714 msg.msg_flags = 0;
715 mtx_lock(&Giant);
716 error = sendit(td, uap->s, &msg, uap->flags);
717 mtx_unlock(&Giant);
718 return (error);
719}
720
721/*
722 * MPSAFE
723 */
724int
725osendmsg(td, uap)
726 struct thread *td;
727 register struct osendmsg_args /* {
728 int s;
729 caddr_t msg;
730 int flags;
731 } */ *uap;
732{
733 struct msghdr msg;
734 struct iovec aiov[UIO_SMALLIOV], *iov;
735 int error;
736
737 mtx_lock(&Giant);
738 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
739 if (error)
740 goto done2;
741 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
742 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
743 error = EMSGSIZE;
744 goto done2;
745 }
746 MALLOC(iov, struct iovec *,
747 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
748 M_WAITOK);
749 } else {
750 iov = aiov;
751 }
752 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
753 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
754 if (error)
755 goto done;
756 msg.msg_flags = MSG_COMPAT;
757 msg.msg_iov = iov;
758 error = sendit(td, uap->s, &msg, uap->flags);
759done:
760 if (iov != aiov)
761 FREE(iov, M_IOV);
762done2:
763 mtx_unlock(&Giant);
764 return (error);
765}
766#endif
767
768/*
769 * MPSAFE
770 */
771int
772sendmsg(td, uap)
773 struct thread *td;
774 register struct sendmsg_args /* {
775 int s;
776 caddr_t msg;
777 int flags;
778 } */ *uap;
779{
780 struct msghdr msg;
781 struct iovec aiov[UIO_SMALLIOV], *iov;
782 int error;
783
784 mtx_lock(&Giant);
785 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
786 if (error)
787 goto done2;
788 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
789 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
790 error = EMSGSIZE;
791 goto done2;
792 }
793 MALLOC(iov, struct iovec *,
794 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
795 M_WAITOK);
796 } else {
797 iov = aiov;
798 }
799 if (msg.msg_iovlen &&
800 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
801 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
802 goto done;
803 msg.msg_iov = iov;
804#ifdef COMPAT_OLDSOCK
805 msg.msg_flags = 0;
806#endif
807 error = sendit(td, uap->s, &msg, uap->flags);
808done:
809 if (iov != aiov)
810 FREE(iov, M_IOV);
811done2:
812 mtx_unlock(&Giant);
813 return (error);
814}
815
816static int
817recvit(td, s, mp, namelenp)
818 register struct thread *td;
819 int s;
820 register struct msghdr *mp;
821 caddr_t namelenp;
822{
823 struct uio auio;
824 register struct iovec *iov;
825 register int i;
826 int len, error;
827 struct mbuf *m, *control = 0;
828 caddr_t ctlbuf;
829 struct socket *so;
830 struct sockaddr *fromsa = 0;
831#ifdef KTRACE
832 struct iovec *ktriov = NULL;
833 struct uio ktruio;
834#endif
835
836 if ((error = fgetsock(td, s, &so, NULL)) != 0)
837 return (error);
838 auio.uio_iov = mp->msg_iov;
839 auio.uio_iovcnt = mp->msg_iovlen;
840 auio.uio_segflg = UIO_USERSPACE;
841 auio.uio_rw = UIO_READ;
842 auio.uio_td = td;
843 auio.uio_offset = 0; /* XXX */
844 auio.uio_resid = 0;
845 iov = mp->msg_iov;
846 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
847 if ((auio.uio_resid += iov->iov_len) < 0) {
848 fputsock(so);
849 return (EINVAL);
850 }
851 }
852#ifdef KTRACE
853 if (KTRPOINT(td->td_proc, KTR_GENIO)) {
854 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
855
856 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
857 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
858 ktruio = auio;
859 }
860#endif
861 len = auio.uio_resid;
862 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
863 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
864 &mp->msg_flags);
865 if (error) {
866 if (auio.uio_resid != len && (error == ERESTART ||
867 error == EINTR || error == EWOULDBLOCK))
868 error = 0;
869 }
870#ifdef KTRACE
871 if (ktriov != NULL) {
872 if (error == 0) {
873 ktruio.uio_iov = ktriov;
874 ktruio.uio_resid = len - auio.uio_resid;
875 ktrgenio(td->td_proc->p_tracep, s, UIO_READ, &ktruio, error);
876 }
877 FREE(ktriov, M_TEMP);
878 }
879#endif
880 if (error)
881 goto out;
882 td->td_retval[0] = len - auio.uio_resid;
883 if (mp->msg_name) {
884 len = mp->msg_namelen;
885 if (len <= 0 || fromsa == 0)
886 len = 0;
887 else {
888#ifndef MIN
889#define MIN(a,b) ((a)>(b)?(b):(a))
890#endif
891 /* save sa_len before it is destroyed by MSG_COMPAT */
892 len = MIN(len, fromsa->sa_len);
893#ifdef COMPAT_OLDSOCK
894 if (mp->msg_flags & MSG_COMPAT)
895 ((struct osockaddr *)fromsa)->sa_family =
896 fromsa->sa_family;
897#endif
898 error = copyout(fromsa,
899 (caddr_t)mp->msg_name, (unsigned)len);
900 if (error)
901 goto out;
902 }
903 mp->msg_namelen = len;
904 if (namelenp &&
905 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
906#ifdef COMPAT_OLDSOCK
907 if (mp->msg_flags & MSG_COMPAT)
908 error = 0; /* old recvfrom didn't check */
909 else
910#endif
911 goto out;
912 }
913 }
914 if (mp->msg_control) {
915#ifdef COMPAT_OLDSOCK
916 /*
917 * We assume that old recvmsg calls won't receive access
918 * rights and other control info, esp. as control info
919 * is always optional and those options didn't exist in 4.3.
920 * If we receive rights, trim the cmsghdr; anything else
921 * is tossed.
922 */
923 if (control && mp->msg_flags & MSG_COMPAT) {
924 if (mtod(control, struct cmsghdr *)->cmsg_level !=
925 SOL_SOCKET ||
926 mtod(control, struct cmsghdr *)->cmsg_type !=
927 SCM_RIGHTS) {
928 mp->msg_controllen = 0;
929 goto out;
930 }
931 control->m_len -= sizeof (struct cmsghdr);
932 control->m_data += sizeof (struct cmsghdr);
933 }
934#endif
935 len = mp->msg_controllen;
936 m = control;
937 mp->msg_controllen = 0;
938 ctlbuf = (caddr_t) mp->msg_control;
939
940 while (m && len > 0) {
941 unsigned int tocopy;
942
943 if (len >= m->m_len)
944 tocopy = m->m_len;
945 else {
946 mp->msg_flags |= MSG_CTRUNC;
947 tocopy = len;
948 }
949
950 if ((error = copyout((caddr_t)mtod(m, caddr_t),
951 ctlbuf, tocopy)) != 0)
952 goto out;
953
954 ctlbuf += tocopy;
955 len -= tocopy;
956 m = m->m_next;
957 }
958 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
959 }
960out:
961 fputsock(so);
962 if (fromsa)
963 FREE(fromsa, M_SONAME);
964 if (control)
965 m_freem(control);
966 return (error);
967}
968
969/*
970 * MPSAFE
971 */
972int
973recvfrom(td, uap)
974 struct thread *td;
975 register struct recvfrom_args /* {
976 int s;
977 caddr_t buf;
978 size_t len;
979 int flags;
980 caddr_t from;
981 int *fromlenaddr;
982 } */ *uap;
983{
984 struct msghdr msg;
985 struct iovec aiov;
986 int error;
987
988 mtx_lock(&Giant);
989 if (uap->fromlenaddr) {
990 error = copyin((caddr_t)uap->fromlenaddr,
991 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
992 if (error)
993 goto done2;
994 } else {
995 msg.msg_namelen = 0;
996 }
997 msg.msg_name = uap->from;
998 msg.msg_iov = &aiov;
999 msg.msg_iovlen = 1;
1000 aiov.iov_base = uap->buf;
1001 aiov.iov_len = uap->len;
1002 msg.msg_control = 0;
1003 msg.msg_flags = uap->flags;
1004 error = recvit(td, uap->s, &msg, (caddr_t)uap->fromlenaddr);
1005done2:
1006 mtx_unlock(&Giant);
1007 return(error);
1008}
1009
1010#ifdef COMPAT_OLDSOCK
1011/*
1012 * MPSAFE
1013 */
1014int
1015orecvfrom(td, uap)
1016 struct thread *td;
1017 struct recvfrom_args *uap;
1018{
1019
1020 uap->flags |= MSG_COMPAT;
1021 return (recvfrom(td, uap));
1022}
1023#endif
1024
1025
1026#ifdef COMPAT_OLDSOCK
1027/*
1028 * MPSAFE
1029 */
1030int
1031orecv(td, uap)
1032 struct thread *td;
1033 register struct orecv_args /* {
1034 int s;
1035 caddr_t buf;
1036 int len;
1037 int flags;
1038 } */ *uap;
1039{
1040 struct msghdr msg;
1041 struct iovec aiov;
1042 int error;
1043
1044 mtx_lock(&Giant);
1045 msg.msg_name = 0;
1046 msg.msg_namelen = 0;
1047 msg.msg_iov = &aiov;
1048 msg.msg_iovlen = 1;
1049 aiov.iov_base = uap->buf;
1050 aiov.iov_len = uap->len;
1051 msg.msg_control = 0;
1052 msg.msg_flags = uap->flags;
1053 error = recvit(td, uap->s, &msg, (caddr_t)0);
1054 mtx_unlock(&Giant);
1055 return (error);
1056}
1057
1058/*
1059 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1060 * overlays the new one, missing only the flags, and with the (old) access
1061 * rights where the control fields are now.
1062 *
1063 * MPSAFE
1064 */
1065int
1066orecvmsg(td, uap)
1067 struct thread *td;
1068 register struct orecvmsg_args /* {
1069 int s;
1070 struct omsghdr *msg;
1071 int flags;
1072 } */ *uap;
1073{
1074 struct msghdr msg;
1075 struct iovec aiov[UIO_SMALLIOV], *iov;
1076 int error;
1077
1078 error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
1079 sizeof (struct omsghdr));
1080 if (error)
1081 return (error);
1082
1083 mtx_lock(&Giant);
1084 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1085 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1086 error = EMSGSIZE;
1087 goto done2;
1088 }
1089 MALLOC(iov, struct iovec *,
1090 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1091 M_WAITOK);
1092 } else {
1093 iov = aiov;
1094 }
1095 msg.msg_flags = uap->flags | MSG_COMPAT;
1096 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
1097 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1098 if (error)
1099 goto done;
1100 msg.msg_iov = iov;
1101 error = recvit(td, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen);
1102
1103 if (msg.msg_controllen && error == 0)
1104 error = copyout((caddr_t)&msg.msg_controllen,
1105 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
1106done:
1107 if (iov != aiov)
1108 FREE(iov, M_IOV);
1109done2:
1110 mtx_unlock(&Giant);
1111 return (error);
1112}
1113#endif
1114
1115/*
1116 * MPSAFE
1117 */
1118int
1119recvmsg(td, uap)
1120 struct thread *td;
1121 register struct recvmsg_args /* {
1122 int s;
1123 struct msghdr *msg;
1124 int flags;
1125 } */ *uap;
1126{
1127 struct msghdr msg;
1128 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1129 register int error;
1130
1131 mtx_lock(&Giant);
1132 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
1133 if (error)
1134 goto done2;
1135 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1136 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1137 error = EMSGSIZE;
1138 goto done2;
1139 }
1140 MALLOC(iov, struct iovec *,
1141 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1142 M_WAITOK);
1143 } else {
1144 iov = aiov;
1145 }
1146#ifdef COMPAT_OLDSOCK
1147 msg.msg_flags = uap->flags &~ MSG_COMPAT;
1148#else
1149 msg.msg_flags = uap->flags;
1150#endif
1151 uiov = msg.msg_iov;
1152 msg.msg_iov = iov;
1153 error = copyin((caddr_t)uiov, (caddr_t)iov,
1154 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1155 if (error)
1156 goto done;
1157 error = recvit(td, uap->s, &msg, (caddr_t)0);
1158 if (!error) {
1159 msg.msg_iov = uiov;
1160 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
1161 }
1162done:
1163 if (iov != aiov)
1164 FREE(iov, M_IOV);
1165done2:
1166 mtx_unlock(&Giant);
1167 return (error);
1168}
1169
1170/*
1171 * MPSAFE
1172 */
1173/* ARGSUSED */
1174int
1175shutdown(td, uap)
1176 struct thread *td;
1177 register struct shutdown_args /* {
1178 int s;
1179 int how;
1180 } */ *uap;
1181{
1182 struct socket *so;
1183 int error;
1184
1185 mtx_lock(&Giant);
1186 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1187 error = soshutdown(so, uap->how);
1188 fputsock(so);
1189 }
1190 mtx_unlock(&Giant);
1191 return(error);
1192}
1193
1194/*
1195 * MPSAFE
1196 */
1197/* ARGSUSED */
1198int
1199setsockopt(td, uap)
1200 struct thread *td;
1201 register struct setsockopt_args /* {
1202 int s;
1203 int level;
1204 int name;
1205 caddr_t val;
1206 int valsize;
1207 } */ *uap;
1208{
1209 struct socket *so;
1210 struct sockopt sopt;
1211 int error;
1212
1213 if (uap->val == 0 && uap->valsize != 0)
1214 return (EFAULT);
1215 if (uap->valsize < 0)
1216 return (EINVAL);
1217
1218 mtx_lock(&Giant);
1219 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1220 sopt.sopt_dir = SOPT_SET;
1221 sopt.sopt_level = uap->level;
1222 sopt.sopt_name = uap->name;
1223 sopt.sopt_val = uap->val;
1224 sopt.sopt_valsize = uap->valsize;
1225 sopt.sopt_td = td;
1226 error = sosetopt(so, &sopt);
1227 fputsock(so);
1228 }
1229 mtx_unlock(&Giant);
1230 return(error);
1231}
1232
1233/*
1234 * MPSAFE
1235 */
1236/* ARGSUSED */
1237int
1238getsockopt(td, uap)
1239 struct thread *td;
1240 register struct getsockopt_args /* {
1241 int s;
1242 int level;
1243 int name;
1244 caddr_t val;
1245 int *avalsize;
1246 } */ *uap;
1247{
1248 int valsize, error;
1249 struct socket *so;
1250 struct sockopt sopt;
1251
1252 mtx_lock(&Giant);
1253 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1254 goto done2;
1255 if (uap->val) {
1256 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1257 sizeof (valsize));
1258 if (error)
1259 goto done1;
1260 if (valsize < 0) {
1261 error = EINVAL;
1262 goto done1;
1263 }
1264 } else {
1265 valsize = 0;
1266 }
1267
1268 sopt.sopt_dir = SOPT_GET;
1269 sopt.sopt_level = uap->level;
1270 sopt.sopt_name = uap->name;
1271 sopt.sopt_val = uap->val;
1272 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1273 sopt.sopt_td = td;
1274
1275 error = sogetopt(so, &sopt);
1276 if (error == 0) {
1277 valsize = sopt.sopt_valsize;
1278 error = copyout((caddr_t)&valsize,
1279 (caddr_t)uap->avalsize, sizeof (valsize));
1280 }
1281done1:
1282 fputsock(so);
1283done2:
1284 mtx_unlock(&Giant);
1285 return (error);
1286}
1287
1288/*
1289 * getsockname1() - Get socket name.
1290 *
1291 * MPSAFE
1292 */
1293/* ARGSUSED */
1294static int
1295getsockname1(td, uap, compat)
1296 struct thread *td;
1297 register struct getsockname_args /* {
1298 int fdes;
1299 caddr_t asa;
1300 int *alen;
1301 } */ *uap;
1302 int compat;
1303{
1304 struct socket *so;
1305 struct sockaddr *sa;
1306 int len, error;
1307
1308 mtx_lock(&Giant);
1309 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1310 goto done2;
1311 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1312 if (error)
1313 goto done1;
1314 sa = 0;
1315 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1316 if (error)
1317 goto bad;
1318 if (sa == 0) {
1319 len = 0;
1320 goto gotnothing;
1321 }
1322
1323 len = MIN(len, sa->sa_len);
1324#ifdef COMPAT_OLDSOCK
1325 if (compat)
1326 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1327#endif
1328 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1329 if (error == 0)
1330gotnothing:
1331 error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1332 sizeof (len));
1333bad:
1334 if (sa)
1335 FREE(sa, M_SONAME);
1336done1:
1337 fputsock(so);
1338done2:
1339 mtx_unlock(&Giant);
1340 return (error);
1341}
1342
1343/*
1344 * MPSAFE
1345 */
1346int
1347getsockname(td, uap)
1348 struct thread *td;
1349 struct getsockname_args *uap;
1350{
1351
1352 return (getsockname1(td, uap, 0));
1353}
1354
1355#ifdef COMPAT_OLDSOCK
1356/*
1357 * MPSAFE
1358 */
1359int
1360ogetsockname(td, uap)
1361 struct thread *td;
1362 struct getsockname_args *uap;
1363{
1364
1365 return (getsockname1(td, uap, 1));
1366}
1367#endif /* COMPAT_OLDSOCK */
1368
1369/*
1370 * getpeername1() - Get name of peer for connected socket.
1371 *
1372 * MPSAFE
1373 */
1374/* ARGSUSED */
1375static int
1376getpeername1(td, uap, compat)
1377 struct thread *td;
1378 register struct getpeername_args /* {
1379 int fdes;
1380 caddr_t asa;
1381 int *alen;
1382 } */ *uap;
1383 int compat;
1384{
1385 struct socket *so;
1386 struct sockaddr *sa;
1387 int len, error;
1388
1389 mtx_lock(&Giant);
1390 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1391 goto done2;
1392 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1393 error = ENOTCONN;
1394 goto done1;
1395 }
1396 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1397 if (error)
1398 goto done1;
1399 sa = 0;
1400 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1401 if (error)
1402 goto bad;
1403 if (sa == 0) {
1404 len = 0;
1405 goto gotnothing;
1406 }
1407 len = MIN(len, sa->sa_len);
1408#ifdef COMPAT_OLDSOCK
1409 if (compat)
1410 ((struct osockaddr *)sa)->sa_family =
1411 sa->sa_family;
1412#endif
1413 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1414 if (error)
1415 goto bad;
1416gotnothing:
1417 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1418bad:
1419 if (sa)
1420 FREE(sa, M_SONAME);
1421done1:
1422 fputsock(so);
1423done2:
1424 mtx_unlock(&Giant);
1425 return (error);
1426}
1427
1428/*
1429 * MPSAFE
1430 */
1431int
1432getpeername(td, uap)
1433 struct thread *td;
1434 struct getpeername_args *uap;
1435{
1436
1437 return (getpeername1(td, uap, 0));
1438}
1439
1440#ifdef COMPAT_OLDSOCK
1441/*
1442 * MPSAFE
1443 */
1444int
1445ogetpeername(td, uap)
1446 struct thread *td;
1447 struct ogetpeername_args *uap;
1448{
1449
1450 /* XXX uap should have type `getpeername_args *' to begin with. */
1451 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1452}
1453#endif /* COMPAT_OLDSOCK */
1454
1455int
1456sockargs(mp, buf, buflen, type)
1457 struct mbuf **mp;
1458 caddr_t buf;
1459 int buflen, type;
1460{
1461 register struct sockaddr *sa;
1462 register struct mbuf *m;
1463 int error;
1464
1465 if ((u_int)buflen > MLEN) {
1466#ifdef COMPAT_OLDSOCK
1467 if (type == MT_SONAME && (u_int)buflen <= 112)
1468 buflen = MLEN; /* unix domain compat. hack */
1469 else
1470#endif
1471 return (EINVAL);
1472 }
1473 m = m_get(M_TRYWAIT, type);
1474 if (m == NULL)
1475 return (ENOBUFS);
1476 m->m_len = buflen;
1477 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1478 if (error)
1479 (void) m_free(m);
1480 else {
1481 *mp = m;
1482 if (type == MT_SONAME) {
1483 sa = mtod(m, struct sockaddr *);
1484
1485#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1486 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1487 sa->sa_family = sa->sa_len;
1488#endif
1489 sa->sa_len = buflen;
1490 }
1491 }
1492 return (error);
1493}
1494
1495int
1496getsockaddr(namp, uaddr, len)
1497 struct sockaddr **namp;
1498 caddr_t uaddr;
1499 size_t len;
1500{
1501 struct sockaddr *sa;
1502 int error;
1503
1504 if (len > SOCK_MAXADDRLEN)
1505 return ENAMETOOLONG;
1506 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1507 error = copyin(uaddr, sa, len);
1508 if (error) {
1509 FREE(sa, M_SONAME);
1510 } else {
1511#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1512 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1513 sa->sa_family = sa->sa_len;
1514#endif
1515 sa->sa_len = len;
1516 *namp = sa;
1517 }
1518 return error;
1519}
1520
1521/*
1522 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1523 * XXX - The sf_buf functions are currently private to sendfile(2), so have
1524 * been made static, but may be useful in the future for doing zero-copy in
1525 * other parts of the networking code.
1526 */
1527static void
1528sf_buf_init(void *arg)
1529{
1530 int i;
1531
1532 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", MTX_DEF);
1533 mtx_lock(&sf_freelist.sf_lock);
1534 SLIST_INIT(&sf_freelist.sf_head);
1535 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1536 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
1537 M_NOWAIT | M_ZERO);
1538 for (i = 0; i < nsfbufs; i++) {
1539 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1540 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
1541 }
1542 sf_buf_alloc_want = 0;
1543 mtx_unlock(&sf_freelist.sf_lock);
1544}
1545
1546/*
1547 * Get an sf_buf from the freelist. Will block if none are available.
1548 */
1549static struct sf_buf *
1550sf_buf_alloc()
1551{
1552 struct sf_buf *sf;
1553 int error;
1554
1555 mtx_lock(&sf_freelist.sf_lock);
1556 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
1557 sf_buf_alloc_want++;
1558 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH,
1559 "sfbufa", 0);
1560 sf_buf_alloc_want--;
1561
1562 /*
1563 * If we got a signal, don't risk going back to sleep.
1564 */
1565 if (error)
1566 break;
1567 }
1568 if (sf != NULL)
1569 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
1570 mtx_unlock(&sf_freelist.sf_lock);
1571 return (sf);
1572}
1573
1574#define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1575
1576/*
1577 * Detatch mapped page and release resources back to the system.
1578 */
1579static void
1580sf_buf_free(caddr_t addr, void *args)
1581{
1582 struct sf_buf *sf;
1583 struct vm_page *m;
1584
1585 GIANT_REQUIRED;
1586
1587 sf = dtosf(addr);
1588 pmap_qremove((vm_offset_t)addr, 1);
1589 m = sf->m;
1590 vm_page_unwire(m, 0);
1591 /*
1592 * Check for the object going away on us. This can
1593 * happen since we don't hold a reference to it.
1594 * If so, we're responsible for freeing the page.
1595 */
1596 if (m->wire_count == 0 && m->object == NULL)
1597 vm_page_free(m);
1598 sf->m = NULL;
1599 mtx_lock(&sf_freelist.sf_lock);
1600 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
1601 if (sf_buf_alloc_want > 0)
1602 wakeup_one(&sf_freelist);
1603 mtx_unlock(&sf_freelist.sf_lock);
1604}
1605
1606/*
1607 * sendfile(2)
1608 *
1609 * MPSAFE
1610 *
1611 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1612 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1613 *
1614 * Send a file specified by 'fd' and starting at 'offset' to a socket
1615 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1616 * nbytes == 0. Optionally add a header and/or trailer to the socket
1617 * output. If specified, write the total number of bytes sent into *sbytes.
1618 *
1619 */
1620int
1621sendfile(struct thread *td, struct sendfile_args *uap)
1622{
1623 struct vnode *vp;
1624 struct vm_object *obj;
1625 struct socket *so = NULL;
1626 struct mbuf *m;
1627 struct sf_buf *sf;
1628 struct vm_page *pg;
1629 struct writev_args nuap;
1630 struct sf_hdtr hdtr;
1631 off_t off, xfsize, sbytes = 0;
1632 int error, s;
1633
1634 mtx_lock(&Giant);
1635
1636 /*
1637 * The descriptor must be a regular file and have a backing VM object.
1638 */
1639 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1640 goto done;
1641 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1642 error = EINVAL;
1643 goto done;
1644 }
1645 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1646 goto done;
1647 if (so->so_type != SOCK_STREAM) {
1648 error = EINVAL;
1649 goto done;
1650 }
1651 if ((so->so_state & SS_ISCONNECTED) == 0) {
1652 error = ENOTCONN;
1653 goto done;
1654 }
1655 if (uap->offset < 0) {
1656 error = EINVAL;
1657 goto done;
1658 }
1659
1660 /*
1661 * If specified, get the pointer to the sf_hdtr struct for
1662 * any headers/trailers.
1663 */
1664 if (uap->hdtr != NULL) {
1665 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1666 if (error)
1667 goto done;
1668 /*
1669 * Send any headers. Wimp out and use writev(2).
1670 */
1671 if (hdtr.headers != NULL) {
1672 nuap.fd = uap->s;
1673 nuap.iovp = hdtr.headers;
1674 nuap.iovcnt = hdtr.hdr_cnt;
1675 error = writev(td, &nuap);
1676 if (error)
1677 goto done;
1678 sbytes += td->td_retval[0];
1679 }
1680 }
1681
1682 /*
1683 * Protect against multiple writers to the socket.
1684 */
1685 (void) sblock(&so->so_snd, M_WAITOK);
1686
1687 /*
1688 * Loop through the pages in the file, starting with the requested
1689 * offset. Get a file page (do I/O if necessary), map the file page
1690 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1691 * it on the socket.
1692 */
1693 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1694 vm_pindex_t pindex;
1695 vm_offset_t pgoff;
1696
1697 pindex = OFF_TO_IDX(off);
1698retry_lookup:
1699 /*
1700 * Calculate the amount to transfer. Not to exceed a page,
1701 * the EOF, or the passed in nbytes.
1702 */
1703 xfsize = obj->un_pager.vnp.vnp_size - off;
1704 if (xfsize > PAGE_SIZE)
1705 xfsize = PAGE_SIZE;
1706 pgoff = (vm_offset_t)(off & PAGE_MASK);
1707 if (PAGE_SIZE - pgoff < xfsize)
1708 xfsize = PAGE_SIZE - pgoff;
1709 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1710 xfsize = uap->nbytes - sbytes;
1711 if (xfsize <= 0)
1712 break;
1713 /*
1714 * Optimize the non-blocking case by looking at the socket space
1715 * before going to the extra work of constituting the sf_buf.
1716 */
1717 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1718 if (so->so_state & SS_CANTSENDMORE)
1719 error = EPIPE;
1720 else
1721 error = EAGAIN;
1722 sbunlock(&so->so_snd);
1723 goto done;
1724 }
1725 /*
1726 * Attempt to look up the page.
1727 *
1728 * Allocate if not found
1729 *
1730 * Wait and loop if busy.
1731 */
1732 pg = vm_page_lookup(obj, pindex);
1733
1734 if (pg == NULL) {
1735 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1736 if (pg == NULL) {
1737 VM_WAIT;
1738 goto retry_lookup;
1739 }
1740 vm_page_wakeup(pg);
1741 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) {
1742 goto retry_lookup;
1743 }
1744
1745 /*
1746 * Wire the page so it does not get ripped out from under
1747 * us.
1748 */
1749
1750 vm_page_wire(pg);
1751
1752 /*
1753 * If page is not valid for what we need, initiate I/O
1754 */
1755
1756 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1757 struct uio auio;
1758 struct iovec aiov;
1759 int bsize;
1760
1761 /*
1762 * Ensure that our page is still around when the I/O
1763 * completes.
1764 */
1765 vm_page_io_start(pg);
1766
1767 /*
1768 * Get the page from backing store.
1769 */
1770 bsize = vp->v_mount->mnt_stat.f_iosize;
1771 auio.uio_iov = &aiov;
1772 auio.uio_iovcnt = 1;
1773 aiov.iov_base = 0;
1774 aiov.iov_len = MAXBSIZE;
1775 auio.uio_resid = MAXBSIZE;
1776 auio.uio_offset = trunc_page(off);
1777 auio.uio_segflg = UIO_NOCOPY;
1778 auio.uio_rw = UIO_READ;
1779 auio.uio_td = td;
1780 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1781 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1782 td->td_proc->p_ucred);
1783 VOP_UNLOCK(vp, 0, td);
1784 vm_page_flag_clear(pg, PG_ZERO);
1785 vm_page_io_finish(pg);
1786 if (error) {
1787 vm_page_unwire(pg, 0);
1788 /*
1789 * See if anyone else might know about this page.
1790 * If not and it is not valid, then free it.
1791 */
1792 if (pg->wire_count == 0 && pg->valid == 0 &&
1793 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1794 pg->hold_count == 0) {
1795 vm_page_busy(pg);
1796 vm_page_free(pg);
1797 }
1798 sbunlock(&so->so_snd);
1799 goto done;
1800 }
1801 }
1802
1803
1804 /*
1805 * Get a sendfile buf. We usually wait as long as necessary,
1806 * but this wait can be interrupted.
1807 */
1808 if ((sf = sf_buf_alloc()) == NULL) {
1809 vm_page_unwire(pg, 0);
1810 if (pg->wire_count == 0 && pg->object == NULL)
1811 vm_page_free(pg);
1812 sbunlock(&so->so_snd);
1813 error = EINTR;
1814 goto done;
1815 }
1816
1817 /*
1818 * Allocate a kernel virtual page and insert the physical page
1819 * into it.
1820 */
1821 sf->m = pg;
1822 pmap_qenter(sf->kva, &pg, 1);
1823 /*
1824 * Get an mbuf header and set it up as having external storage.
1825 */
1826 MGETHDR(m, M_TRYWAIT, MT_DATA);
1827 if (m == NULL) {
1828 error = ENOBUFS;
1829 sf_buf_free((void *)sf->kva, NULL);
1830 sbunlock(&so->so_snd);
1831 goto done;
1832 }
1833 /*
1834 * Setup external storage for mbuf.
1835 */
1836 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY,
1837 EXT_SFBUF);
1838 m->m_data = (char *) sf->kva + pgoff;
1839 m->m_pkthdr.len = m->m_len = xfsize;
1840 /*
1841 * Add the buffer to the socket buffer chain.
1842 */
1843 s = splnet();
1844retry_space:
1845 /*
1846 * Make sure that the socket is still able to take more data.
1847 * CANTSENDMORE being true usually means that the connection
1848 * was closed. so_error is true when an error was sensed after
1849 * a previous send.
1850 * The state is checked after the page mapping and buffer
1851 * allocation above since those operations may block and make
1852 * any socket checks stale. From this point forward, nothing
1853 * blocks before the pru_send (or more accurately, any blocking
1854 * results in a loop back to here to re-check).
1855 */
1856 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1857 if (so->so_state & SS_CANTSENDMORE) {
1858 error = EPIPE;
1859 } else {
1860 error = so->so_error;
1861 so->so_error = 0;
1862 }
1863 m_freem(m);
1864 sbunlock(&so->so_snd);
1865 splx(s);
1866 goto done;
1867 }
1868 /*
1869 * Wait for socket space to become available. We do this just
1870 * after checking the connection state above in order to avoid
1871 * a race condition with sbwait().
1872 */
1873 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1874 if (so->so_state & SS_NBIO) {
1875 m_freem(m);
1876 sbunlock(&so->so_snd);
1877 splx(s);
1878 error = EAGAIN;
1879 goto done;
1880 }
1881 error = sbwait(&so->so_snd);
1882 /*
1883 * An error from sbwait usually indicates that we've
1884 * been interrupted by a signal. If we've sent anything
1885 * then return bytes sent, otherwise return the error.
1886 */
1887 if (error) {
1888 m_freem(m);
1889 sbunlock(&so->so_snd);
1890 splx(s);
1891 goto done;
1892 }
1893 goto retry_space;
1894 }
1895 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
1896 splx(s);
1897 if (error) {
1898 sbunlock(&so->so_snd);
1899 goto done;
1900 }
1901 }
1902 sbunlock(&so->so_snd);
1903
1904 /*
1905 * Send trailers. Wimp out and use writev(2).
1906 */
1907 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1908 nuap.fd = uap->s;
1909 nuap.iovp = hdtr.trailers;
1910 nuap.iovcnt = hdtr.trl_cnt;
1911 error = writev(td, &nuap);
1912 if (error)
1913 goto done;
1914 sbytes += td->td_retval[0];
1915 }
1916
1917done:
1918 /*
1919 * If there was no error we have to clear td->td_retval[0]
1920 * because it may have been set by writev.
1921 */
1922 if (error == 0) {
1923 td->td_retval[0] = 0;
1924 }
1925 if (uap->sbytes != NULL) {
1926 copyout(&sbytes, uap->sbytes, sizeof(off_t));
1927 }
1928 if (vp)
1929 vrele(vp);
1930 if (so)
1931 fputsock(so);
1932 mtx_unlock(&Giant);
1933 return (error);
1934}
1935