Deleted Added
full compact
kern_sendfile.c (43301) kern_sendfile.c (49413)
1/*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
1/*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
37 * $Id: uipc_syscalls.c,v 1.54 1999/01/25 16:53:53 fenner Exp $
37 * $Id: uipc_syscalls.c,v 1.55 1999/01/27 21:49:57 dillon Exp $
38 */
39
40#include "opt_compat.h"
41#include "opt_ktrace.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/sysproto.h>
47#include <sys/malloc.h>
48#include <sys/filedesc.h>
49#include <sys/proc.h>
50#include <sys/fcntl.h>
51#include <sys/file.h>
52#include <sys/mbuf.h>
53#include <sys/protosw.h>
54#include <sys/socket.h>
55#include <sys/socketvar.h>
56#include <sys/signalvar.h>
57#include <sys/uio.h>
58#include <sys/vnode.h>
59#include <sys/lock.h>
60#include <sys/mount.h>
61#ifdef KTRACE
62#include <sys/ktrace.h>
63#endif
64#include <vm/vm.h>
65#include <vm/vm_prot.h>
66#include <vm/vm_object.h>
67#include <vm/vm_page.h>
68#include <vm/vm_pager.h>
69#include <vm/vm_pageout.h>
70#include <vm/vm_kern.h>
71#include <vm/vm_extern.h>
72#include <machine/limits.h>
73
74static void sf_buf_init(void *arg);
75SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
76static struct sf_buf *sf_buf_alloc(void);
77static void sf_buf_ref(caddr_t addr, u_int size);
78static void sf_buf_free(caddr_t addr, u_int size);
79
80static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags));
81static int recvit __P((struct proc *p, int s, struct msghdr *mp,
82 caddr_t namelenp));
83
84static int accept1 __P((struct proc *p, struct accept_args *uap, int compat));
85static int getsockname1 __P((struct proc *p, struct getsockname_args *uap,
86 int compat));
87static int getpeername1 __P((struct proc *p, struct getpeername_args *uap,
88 int compat));
89
90static SLIST_HEAD(, sf_buf) sf_freelist;
91static vm_offset_t sf_base;
92static struct sf_buf *sf_bufs;
93static int sf_buf_alloc_want;
94
95/*
96 * System call interface to the socket abstraction.
97 */
98#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
99#define COMPAT_OLDSOCK
100#endif
101
102extern struct fileops socketops;
103
104int
105socket(p, uap)
106 struct proc *p;
107 register struct socket_args /* {
108 int domain;
109 int type;
110 int protocol;
111 } */ *uap;
112{
113 struct filedesc *fdp = p->p_fd;
114 struct socket *so;
115 struct file *fp;
116 int fd, error;
117
118 error = falloc(p, &fp, &fd);
119 if (error)
120 return (error);
38 */
39
40#include "opt_compat.h"
41#include "opt_ktrace.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/sysproto.h>
47#include <sys/malloc.h>
48#include <sys/filedesc.h>
49#include <sys/proc.h>
50#include <sys/fcntl.h>
51#include <sys/file.h>
52#include <sys/mbuf.h>
53#include <sys/protosw.h>
54#include <sys/socket.h>
55#include <sys/socketvar.h>
56#include <sys/signalvar.h>
57#include <sys/uio.h>
58#include <sys/vnode.h>
59#include <sys/lock.h>
60#include <sys/mount.h>
61#ifdef KTRACE
62#include <sys/ktrace.h>
63#endif
64#include <vm/vm.h>
65#include <vm/vm_prot.h>
66#include <vm/vm_object.h>
67#include <vm/vm_page.h>
68#include <vm/vm_pager.h>
69#include <vm/vm_pageout.h>
70#include <vm/vm_kern.h>
71#include <vm/vm_extern.h>
72#include <machine/limits.h>
73
74static void sf_buf_init(void *arg);
75SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
76static struct sf_buf *sf_buf_alloc(void);
77static void sf_buf_ref(caddr_t addr, u_int size);
78static void sf_buf_free(caddr_t addr, u_int size);
79
80static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags));
81static int recvit __P((struct proc *p, int s, struct msghdr *mp,
82 caddr_t namelenp));
83
84static int accept1 __P((struct proc *p, struct accept_args *uap, int compat));
85static int getsockname1 __P((struct proc *p, struct getsockname_args *uap,
86 int compat));
87static int getpeername1 __P((struct proc *p, struct getpeername_args *uap,
88 int compat));
89
90static SLIST_HEAD(, sf_buf) sf_freelist;
91static vm_offset_t sf_base;
92static struct sf_buf *sf_bufs;
93static int sf_buf_alloc_want;
94
95/*
96 * System call interface to the socket abstraction.
97 */
98#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
99#define COMPAT_OLDSOCK
100#endif
101
102extern struct fileops socketops;
103
104int
105socket(p, uap)
106 struct proc *p;
107 register struct socket_args /* {
108 int domain;
109 int type;
110 int protocol;
111 } */ *uap;
112{
113 struct filedesc *fdp = p->p_fd;
114 struct socket *so;
115 struct file *fp;
116 int fd, error;
117
118 error = falloc(p, &fp, &fd);
119 if (error)
120 return (error);
121 fp->f_flag = FREAD|FWRITE;
122 fp->f_type = DTYPE_SOCKET;
123 fp->f_ops = &socketops;
124 error = socreate(uap->domain, &so, uap->type, uap->protocol, p);
125 if (error) {
126 fdp->fd_ofiles[fd] = 0;
127 ffree(fp);
128 } else {
129 fp->f_data = (caddr_t)so;
121 error = socreate(uap->domain, &so, uap->type, uap->protocol, p);
122 if (error) {
123 fdp->fd_ofiles[fd] = 0;
124 ffree(fp);
125 } else {
126 fp->f_data = (caddr_t)so;
127 fp->f_flag = FREAD|FWRITE;
128 fp->f_ops = &socketops;
129 fp->f_type = DTYPE_SOCKET;
130 p->p_retval[0] = fd;
131 }
132 return (error);
133}
134
135/* ARGSUSED */
136int
137bind(p, uap)
138 struct proc *p;
139 register struct bind_args /* {
140 int s;
141 caddr_t name;
142 int namelen;
143 } */ *uap;
144{
145 struct file *fp;
146 struct sockaddr *sa;
147 int error;
148
149 error = getsock(p->p_fd, uap->s, &fp);
150 if (error)
151 return (error);
152 error = getsockaddr(&sa, uap->name, uap->namelen);
153 if (error)
154 return (error);
155 error = sobind((struct socket *)fp->f_data, sa, p);
156 FREE(sa, M_SONAME);
157 return (error);
158}
159
160/* ARGSUSED */
161int
162listen(p, uap)
163 struct proc *p;
164 register struct listen_args /* {
165 int s;
166 int backlog;
167 } */ *uap;
168{
169 struct file *fp;
170 int error;
171
172 error = getsock(p->p_fd, uap->s, &fp);
173 if (error)
174 return (error);
175 return (solisten((struct socket *)fp->f_data, uap->backlog, p));
176}
177
178static int
179accept1(p, uap, compat)
180 struct proc *p;
181 register struct accept_args /* {
182 int s;
183 caddr_t name;
184 int *anamelen;
185 } */ *uap;
186 int compat;
187{
188 struct file *fp;
189 struct sockaddr *sa;
190 int namelen, error, s;
191 struct socket *head, *so;
192 int fd;
193 short fflag; /* type must match fp->f_flag */
194
195 if (uap->name) {
196 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
197 sizeof (namelen));
198 if(error)
199 return (error);
200 }
201 error = getsock(p->p_fd, uap->s, &fp);
202 if (error)
203 return (error);
204 s = splnet();
205 head = (struct socket *)fp->f_data;
206 if ((head->so_options & SO_ACCEPTCONN) == 0) {
207 splx(s);
208 return (EINVAL);
209 }
210 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
211 splx(s);
212 return (EWOULDBLOCK);
213 }
214 while (head->so_comp.tqh_first == NULL && head->so_error == 0) {
215 if (head->so_state & SS_CANTRCVMORE) {
216 head->so_error = ECONNABORTED;
217 break;
218 }
219 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
220 "accept", 0);
221 if (error) {
222 splx(s);
223 return (error);
224 }
225 }
226 if (head->so_error) {
227 error = head->so_error;
228 head->so_error = 0;
229 splx(s);
230 return (error);
231 }
232
233 /*
234 * At this point we know that there is at least one connection
235 * ready to be accepted. Remove it from the queue prior to
236 * allocating the file descriptor for it since falloc() may
237 * block allowing another process to accept the connection
238 * instead.
239 */
240 so = head->so_comp.tqh_first;
241 TAILQ_REMOVE(&head->so_comp, so, so_list);
242 head->so_qlen--;
243
244 fflag = fp->f_flag;
245 error = falloc(p, &fp, &fd);
246 if (error) {
247 /*
248 * Probably ran out of file descriptors. Put the
249 * unaccepted connection back onto the queue and
250 * do another wakeup so some other process might
251 * have a chance at it.
252 */
253 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
254 head->so_qlen++;
255 wakeup_one(&head->so_timeo);
256 splx(s);
257 return (error);
258 } else
259 p->p_retval[0] = fd;
260
261 so->so_state &= ~SS_COMP;
262 so->so_head = NULL;
263 if (head->so_sigio != NULL)
264 fsetown(fgetown(head->so_sigio), &so->so_sigio);
265
130 p->p_retval[0] = fd;
131 }
132 return (error);
133}
134
135/* ARGSUSED */
136int
137bind(p, uap)
138 struct proc *p;
139 register struct bind_args /* {
140 int s;
141 caddr_t name;
142 int namelen;
143 } */ *uap;
144{
145 struct file *fp;
146 struct sockaddr *sa;
147 int error;
148
149 error = getsock(p->p_fd, uap->s, &fp);
150 if (error)
151 return (error);
152 error = getsockaddr(&sa, uap->name, uap->namelen);
153 if (error)
154 return (error);
155 error = sobind((struct socket *)fp->f_data, sa, p);
156 FREE(sa, M_SONAME);
157 return (error);
158}
159
160/* ARGSUSED */
161int
162listen(p, uap)
163 struct proc *p;
164 register struct listen_args /* {
165 int s;
166 int backlog;
167 } */ *uap;
168{
169 struct file *fp;
170 int error;
171
172 error = getsock(p->p_fd, uap->s, &fp);
173 if (error)
174 return (error);
175 return (solisten((struct socket *)fp->f_data, uap->backlog, p));
176}
177
178static int
179accept1(p, uap, compat)
180 struct proc *p;
181 register struct accept_args /* {
182 int s;
183 caddr_t name;
184 int *anamelen;
185 } */ *uap;
186 int compat;
187{
188 struct file *fp;
189 struct sockaddr *sa;
190 int namelen, error, s;
191 struct socket *head, *so;
192 int fd;
193 short fflag; /* type must match fp->f_flag */
194
195 if (uap->name) {
196 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
197 sizeof (namelen));
198 if(error)
199 return (error);
200 }
201 error = getsock(p->p_fd, uap->s, &fp);
202 if (error)
203 return (error);
204 s = splnet();
205 head = (struct socket *)fp->f_data;
206 if ((head->so_options & SO_ACCEPTCONN) == 0) {
207 splx(s);
208 return (EINVAL);
209 }
210 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
211 splx(s);
212 return (EWOULDBLOCK);
213 }
214 while (head->so_comp.tqh_first == NULL && head->so_error == 0) {
215 if (head->so_state & SS_CANTRCVMORE) {
216 head->so_error = ECONNABORTED;
217 break;
218 }
219 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
220 "accept", 0);
221 if (error) {
222 splx(s);
223 return (error);
224 }
225 }
226 if (head->so_error) {
227 error = head->so_error;
228 head->so_error = 0;
229 splx(s);
230 return (error);
231 }
232
233 /*
234 * At this point we know that there is at least one connection
235 * ready to be accepted. Remove it from the queue prior to
236 * allocating the file descriptor for it since falloc() may
237 * block allowing another process to accept the connection
238 * instead.
239 */
240 so = head->so_comp.tqh_first;
241 TAILQ_REMOVE(&head->so_comp, so, so_list);
242 head->so_qlen--;
243
244 fflag = fp->f_flag;
245 error = falloc(p, &fp, &fd);
246 if (error) {
247 /*
248 * Probably ran out of file descriptors. Put the
249 * unaccepted connection back onto the queue and
250 * do another wakeup so some other process might
251 * have a chance at it.
252 */
253 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
254 head->so_qlen++;
255 wakeup_one(&head->so_timeo);
256 splx(s);
257 return (error);
258 } else
259 p->p_retval[0] = fd;
260
261 so->so_state &= ~SS_COMP;
262 so->so_head = NULL;
263 if (head->so_sigio != NULL)
264 fsetown(fgetown(head->so_sigio), &so->so_sigio);
265
266 fp->f_type = DTYPE_SOCKET;
266 fp->f_data = (caddr_t)so;
267 fp->f_flag = fflag;
268 fp->f_ops = &socketops;
267 fp->f_flag = fflag;
268 fp->f_ops = &socketops;
269 fp->f_data = (caddr_t)so;
269 fp->f_type = DTYPE_SOCKET;
270 sa = 0;
271 (void) soaccept(so, &sa);
272 if (sa == 0) {
273 namelen = 0;
274 if (uap->name)
275 goto gotnoname;
276 return 0;
277 }
278 if (uap->name) {
279 /* check sa_len before it is destroyed */
280 if (namelen > sa->sa_len)
281 namelen = sa->sa_len;
282#ifdef COMPAT_OLDSOCK
283 if (compat)
284 ((struct osockaddr *)sa)->sa_family =
285 sa->sa_family;
286#endif
287 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
288 if (!error)
289gotnoname:
290 error = copyout((caddr_t)&namelen,
291 (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
292 }
293 if (sa)
294 FREE(sa, M_SONAME);
295 splx(s);
296 return (error);
297}
298
299int
300accept(p, uap)
301 struct proc *p;
302 struct accept_args *uap;
303{
304
305 return (accept1(p, uap, 0));
306}
307
308#ifdef COMPAT_OLDSOCK
309int
310oaccept(p, uap)
311 struct proc *p;
312 struct accept_args *uap;
313{
314
315 return (accept1(p, uap, 1));
316}
317#endif /* COMPAT_OLDSOCK */
318
319/* ARGSUSED */
320int
321connect(p, uap)
322 struct proc *p;
323 register struct connect_args /* {
324 int s;
325 caddr_t name;
326 int namelen;
327 } */ *uap;
328{
329 struct file *fp;
330 register struct socket *so;
331 struct sockaddr *sa;
332 int error, s;
333
334 error = getsock(p->p_fd, uap->s, &fp);
335 if (error)
336 return (error);
337 so = (struct socket *)fp->f_data;
338 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
339 return (EALREADY);
340 error = getsockaddr(&sa, uap->name, uap->namelen);
341 if (error)
342 return (error);
343 error = soconnect(so, sa, p);
344 if (error)
345 goto bad;
346 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
347 FREE(sa, M_SONAME);
348 return (EINPROGRESS);
349 }
350 s = splnet();
351 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
352 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
353 "connec", 0);
354 if (error)
355 break;
356 }
357 if (error == 0) {
358 error = so->so_error;
359 so->so_error = 0;
360 }
361 splx(s);
362bad:
363 so->so_state &= ~SS_ISCONNECTING;
364 FREE(sa, M_SONAME);
365 if (error == ERESTART)
366 error = EINTR;
367 return (error);
368}
369
370int
371socketpair(p, uap)
372 struct proc *p;
373 register struct socketpair_args /* {
374 int domain;
375 int type;
376 int protocol;
377 int *rsv;
378 } */ *uap;
379{
380 register struct filedesc *fdp = p->p_fd;
381 struct file *fp1, *fp2;
382 struct socket *so1, *so2;
383 int fd, error, sv[2];
384
385 error = socreate(uap->domain, &so1, uap->type, uap->protocol, p);
386 if (error)
387 return (error);
388 error = socreate(uap->domain, &so2, uap->type, uap->protocol, p);
389 if (error)
390 goto free1;
391 error = falloc(p, &fp1, &fd);
392 if (error)
393 goto free2;
394 sv[0] = fd;
270 sa = 0;
271 (void) soaccept(so, &sa);
272 if (sa == 0) {
273 namelen = 0;
274 if (uap->name)
275 goto gotnoname;
276 return 0;
277 }
278 if (uap->name) {
279 /* check sa_len before it is destroyed */
280 if (namelen > sa->sa_len)
281 namelen = sa->sa_len;
282#ifdef COMPAT_OLDSOCK
283 if (compat)
284 ((struct osockaddr *)sa)->sa_family =
285 sa->sa_family;
286#endif
287 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
288 if (!error)
289gotnoname:
290 error = copyout((caddr_t)&namelen,
291 (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
292 }
293 if (sa)
294 FREE(sa, M_SONAME);
295 splx(s);
296 return (error);
297}
298
299int
300accept(p, uap)
301 struct proc *p;
302 struct accept_args *uap;
303{
304
305 return (accept1(p, uap, 0));
306}
307
308#ifdef COMPAT_OLDSOCK
309int
310oaccept(p, uap)
311 struct proc *p;
312 struct accept_args *uap;
313{
314
315 return (accept1(p, uap, 1));
316}
317#endif /* COMPAT_OLDSOCK */
318
319/* ARGSUSED */
320int
321connect(p, uap)
322 struct proc *p;
323 register struct connect_args /* {
324 int s;
325 caddr_t name;
326 int namelen;
327 } */ *uap;
328{
329 struct file *fp;
330 register struct socket *so;
331 struct sockaddr *sa;
332 int error, s;
333
334 error = getsock(p->p_fd, uap->s, &fp);
335 if (error)
336 return (error);
337 so = (struct socket *)fp->f_data;
338 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
339 return (EALREADY);
340 error = getsockaddr(&sa, uap->name, uap->namelen);
341 if (error)
342 return (error);
343 error = soconnect(so, sa, p);
344 if (error)
345 goto bad;
346 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
347 FREE(sa, M_SONAME);
348 return (EINPROGRESS);
349 }
350 s = splnet();
351 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
352 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
353 "connec", 0);
354 if (error)
355 break;
356 }
357 if (error == 0) {
358 error = so->so_error;
359 so->so_error = 0;
360 }
361 splx(s);
362bad:
363 so->so_state &= ~SS_ISCONNECTING;
364 FREE(sa, M_SONAME);
365 if (error == ERESTART)
366 error = EINTR;
367 return (error);
368}
369
370int
371socketpair(p, uap)
372 struct proc *p;
373 register struct socketpair_args /* {
374 int domain;
375 int type;
376 int protocol;
377 int *rsv;
378 } */ *uap;
379{
380 register struct filedesc *fdp = p->p_fd;
381 struct file *fp1, *fp2;
382 struct socket *so1, *so2;
383 int fd, error, sv[2];
384
385 error = socreate(uap->domain, &so1, uap->type, uap->protocol, p);
386 if (error)
387 return (error);
388 error = socreate(uap->domain, &so2, uap->type, uap->protocol, p);
389 if (error)
390 goto free1;
391 error = falloc(p, &fp1, &fd);
392 if (error)
393 goto free2;
394 sv[0] = fd;
395 fp1->f_flag = FREAD|FWRITE;
396 fp1->f_type = DTYPE_SOCKET;
397 fp1->f_ops = &socketops;
398 fp1->f_data = (caddr_t)so1;
399 error = falloc(p, &fp2, &fd);
400 if (error)
401 goto free3;
395 fp1->f_data = (caddr_t)so1;
396 error = falloc(p, &fp2, &fd);
397 if (error)
398 goto free3;
402 fp2->f_flag = FREAD|FWRITE;
403 fp2->f_type = DTYPE_SOCKET;
404 fp2->f_ops = &socketops;
405 fp2->f_data = (caddr_t)so2;
406 sv[1] = fd;
407 error = soconnect2(so1, so2);
408 if (error)
409 goto free4;
410 if (uap->type == SOCK_DGRAM) {
411 /*
412 * Datagram socket connection is asymmetric.
413 */
414 error = soconnect2(so2, so1);
415 if (error)
416 goto free4;
417 }
399 fp2->f_data = (caddr_t)so2;
400 sv[1] = fd;
401 error = soconnect2(so1, so2);
402 if (error)
403 goto free4;
404 if (uap->type == SOCK_DGRAM) {
405 /*
406 * Datagram socket connection is asymmetric.
407 */
408 error = soconnect2(so2, so1);
409 if (error)
410 goto free4;
411 }
412 fp1->f_flag = fp2->f_flag = FREAD|FWRITE;
413 fp1->f_ops = fp2->f_ops = &socketops;
414 fp1->f_type = fp2->f_type = DTYPE_SOCKET;
418 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
419 return (error);
420free4:
415 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
416 return (error);
417free4:
421 ffree(fp2);
422 fdp->fd_ofiles[sv[1]] = 0;
418 fdp->fd_ofiles[sv[1]] = 0;
419 ffree(fp2);
423free3:
420free3:
424 ffree(fp1);
425 fdp->fd_ofiles[sv[0]] = 0;
421 fdp->fd_ofiles[sv[0]] = 0;
422 ffree(fp1);
426free2:
427 (void)soclose(so2);
428free1:
429 (void)soclose(so1);
430 return (error);
431}
432
433static int
434sendit(p, s, mp, flags)
435 register struct proc *p;
436 int s;
437 register struct msghdr *mp;
438 int flags;
439{
440 struct file *fp;
441 struct uio auio;
442 register struct iovec *iov;
443 register int i;
444 struct mbuf *control;
445 struct sockaddr *to;
446 int len, error;
447 struct socket *so;
448#ifdef KTRACE
449 struct iovec *ktriov = NULL;
450#endif
451
452 error = getsock(p->p_fd, s, &fp);
453 if (error)
454 return (error);
455 auio.uio_iov = mp->msg_iov;
456 auio.uio_iovcnt = mp->msg_iovlen;
457 auio.uio_segflg = UIO_USERSPACE;
458 auio.uio_rw = UIO_WRITE;
459 auio.uio_procp = p;
460 auio.uio_offset = 0; /* XXX */
461 auio.uio_resid = 0;
462 iov = mp->msg_iov;
463 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
464 if ((auio.uio_resid += iov->iov_len) < 0)
465 return (EINVAL);
466 }
467 if (mp->msg_name) {
468 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
469 if (error)
470 return (error);
471 } else
472 to = 0;
473 if (mp->msg_control) {
474 if (mp->msg_controllen < sizeof(struct cmsghdr)
475#ifdef COMPAT_OLDSOCK
476 && mp->msg_flags != MSG_COMPAT
477#endif
478 ) {
479 error = EINVAL;
480 goto bad;
481 }
482 error = sockargs(&control, mp->msg_control,
483 mp->msg_controllen, MT_CONTROL);
484 if (error)
485 goto bad;
486#ifdef COMPAT_OLDSOCK
487 if (mp->msg_flags == MSG_COMPAT) {
488 register struct cmsghdr *cm;
489
490 M_PREPEND(control, sizeof(*cm), M_WAIT);
491 if (control == 0) {
492 error = ENOBUFS;
493 goto bad;
494 } else {
495 cm = mtod(control, struct cmsghdr *);
496 cm->cmsg_len = control->m_len;
497 cm->cmsg_level = SOL_SOCKET;
498 cm->cmsg_type = SCM_RIGHTS;
499 }
500 }
501#endif
502 } else
503 control = 0;
504#ifdef KTRACE
505 if (KTRPOINT(p, KTR_GENIO)) {
506 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
507
508 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
509 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
510 }
511#endif
512 len = auio.uio_resid;
513 so = (struct socket *)fp->f_data;
514 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
515 flags, p);
516 if (error) {
517 if (auio.uio_resid != len && (error == ERESTART ||
518 error == EINTR || error == EWOULDBLOCK))
519 error = 0;
520 if (error == EPIPE)
521 psignal(p, SIGPIPE);
522 }
523 if (error == 0)
524 p->p_retval[0] = len - auio.uio_resid;
525#ifdef KTRACE
526 if (ktriov != NULL) {
527 if (error == 0)
528 ktrgenio(p->p_tracep, s, UIO_WRITE,
529 ktriov, p->p_retval[0], error);
530 FREE(ktriov, M_TEMP);
531 }
532#endif
533bad:
534 if (to)
535 FREE(to, M_SONAME);
536 return (error);
537}
538
539int
540sendto(p, uap)
541 struct proc *p;
542 register struct sendto_args /* {
543 int s;
544 caddr_t buf;
545 size_t len;
546 int flags;
547 caddr_t to;
548 int tolen;
549 } */ *uap;
550{
551 struct msghdr msg;
552 struct iovec aiov;
553
554 msg.msg_name = uap->to;
555 msg.msg_namelen = uap->tolen;
556 msg.msg_iov = &aiov;
557 msg.msg_iovlen = 1;
558 msg.msg_control = 0;
559#ifdef COMPAT_OLDSOCK
560 msg.msg_flags = 0;
561#endif
562 aiov.iov_base = uap->buf;
563 aiov.iov_len = uap->len;
564 return (sendit(p, uap->s, &msg, uap->flags));
565}
566
567#ifdef COMPAT_OLDSOCK
568int
569osend(p, uap)
570 struct proc *p;
571 register struct osend_args /* {
572 int s;
573 caddr_t buf;
574 int len;
575 int flags;
576 } */ *uap;
577{
578 struct msghdr msg;
579 struct iovec aiov;
580
581 msg.msg_name = 0;
582 msg.msg_namelen = 0;
583 msg.msg_iov = &aiov;
584 msg.msg_iovlen = 1;
585 aiov.iov_base = uap->buf;
586 aiov.iov_len = uap->len;
587 msg.msg_control = 0;
588 msg.msg_flags = 0;
589 return (sendit(p, uap->s, &msg, uap->flags));
590}
591
592int
593osendmsg(p, uap)
594 struct proc *p;
595 register struct osendmsg_args /* {
596 int s;
597 caddr_t msg;
598 int flags;
599 } */ *uap;
600{
601 struct msghdr msg;
602 struct iovec aiov[UIO_SMALLIOV], *iov;
603 int error;
604
605 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
606 if (error)
607 return (error);
608 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
609 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
610 return (EMSGSIZE);
611 MALLOC(iov, struct iovec *,
612 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
613 M_WAITOK);
614 } else
615 iov = aiov;
616 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
617 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
618 if (error)
619 goto done;
620 msg.msg_flags = MSG_COMPAT;
621 msg.msg_iov = iov;
622 error = sendit(p, uap->s, &msg, uap->flags);
623done:
624 if (iov != aiov)
625 FREE(iov, M_IOV);
626 return (error);
627}
628#endif
629
630int
631sendmsg(p, uap)
632 struct proc *p;
633 register struct sendmsg_args /* {
634 int s;
635 caddr_t msg;
636 int flags;
637 } */ *uap;
638{
639 struct msghdr msg;
640 struct iovec aiov[UIO_SMALLIOV], *iov;
641 int error;
642
643 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
644 if (error)
645 return (error);
646 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
647 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
648 return (EMSGSIZE);
649 MALLOC(iov, struct iovec *,
650 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
651 M_WAITOK);
652 } else
653 iov = aiov;
654 if (msg.msg_iovlen &&
655 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
656 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
657 goto done;
658 msg.msg_iov = iov;
659#ifdef COMPAT_OLDSOCK
660 msg.msg_flags = 0;
661#endif
662 error = sendit(p, uap->s, &msg, uap->flags);
663done:
664 if (iov != aiov)
665 FREE(iov, M_IOV);
666 return (error);
667}
668
669static int
670recvit(p, s, mp, namelenp)
671 register struct proc *p;
672 int s;
673 register struct msghdr *mp;
674 caddr_t namelenp;
675{
676 struct file *fp;
677 struct uio auio;
678 register struct iovec *iov;
679 register int i;
680 int len, error;
681 struct mbuf *m, *control = 0;
682 caddr_t ctlbuf;
683 struct socket *so;
684 struct sockaddr *fromsa = 0;
685#ifdef KTRACE
686 struct iovec *ktriov = NULL;
687#endif
688
689 error = getsock(p->p_fd, s, &fp);
690 if (error)
691 return (error);
692 auio.uio_iov = mp->msg_iov;
693 auio.uio_iovcnt = mp->msg_iovlen;
694 auio.uio_segflg = UIO_USERSPACE;
695 auio.uio_rw = UIO_READ;
696 auio.uio_procp = p;
697 auio.uio_offset = 0; /* XXX */
698 auio.uio_resid = 0;
699 iov = mp->msg_iov;
700 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
701 if ((auio.uio_resid += iov->iov_len) < 0)
702 return (EINVAL);
703 }
704#ifdef KTRACE
705 if (KTRPOINT(p, KTR_GENIO)) {
706 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
707
708 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
709 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
710 }
711#endif
712 len = auio.uio_resid;
713 so = (struct socket *)fp->f_data;
714 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
715 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
716 &mp->msg_flags);
717 if (error) {
718 if (auio.uio_resid != len && (error == ERESTART ||
719 error == EINTR || error == EWOULDBLOCK))
720 error = 0;
721 }
722#ifdef KTRACE
723 if (ktriov != NULL) {
724 if (error == 0)
725 ktrgenio(p->p_tracep, s, UIO_READ,
726 ktriov, len - auio.uio_resid, error);
727 FREE(ktriov, M_TEMP);
728 }
729#endif
730 if (error)
731 goto out;
732 p->p_retval[0] = len - auio.uio_resid;
733 if (mp->msg_name) {
734 len = mp->msg_namelen;
735 if (len <= 0 || fromsa == 0)
736 len = 0;
737 else {
738#ifndef MIN
739#define MIN(a,b) ((a)>(b)?(b):(a))
740#endif
741 /* save sa_len before it is destroyed by MSG_COMPAT */
742 len = MIN(len, fromsa->sa_len);
743#ifdef COMPAT_OLDSOCK
744 if (mp->msg_flags & MSG_COMPAT)
745 ((struct osockaddr *)fromsa)->sa_family =
746 fromsa->sa_family;
747#endif
748 error = copyout(fromsa,
749 (caddr_t)mp->msg_name, (unsigned)len);
750 if (error)
751 goto out;
752 }
753 mp->msg_namelen = len;
754 if (namelenp &&
755 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
756#ifdef COMPAT_OLDSOCK
757 if (mp->msg_flags & MSG_COMPAT)
758 error = 0; /* old recvfrom didn't check */
759 else
760#endif
761 goto out;
762 }
763 }
764 if (mp->msg_control) {
765#ifdef COMPAT_OLDSOCK
766 /*
767 * We assume that old recvmsg calls won't receive access
768 * rights and other control info, esp. as control info
769 * is always optional and those options didn't exist in 4.3.
770 * If we receive rights, trim the cmsghdr; anything else
771 * is tossed.
772 */
773 if (control && mp->msg_flags & MSG_COMPAT) {
774 if (mtod(control, struct cmsghdr *)->cmsg_level !=
775 SOL_SOCKET ||
776 mtod(control, struct cmsghdr *)->cmsg_type !=
777 SCM_RIGHTS) {
778 mp->msg_controllen = 0;
779 goto out;
780 }
781 control->m_len -= sizeof (struct cmsghdr);
782 control->m_data += sizeof (struct cmsghdr);
783 }
784#endif
785 len = mp->msg_controllen;
786 m = control;
787 mp->msg_controllen = 0;
788 ctlbuf = (caddr_t) mp->msg_control;
789
790 while (m && len > 0) {
791 unsigned int tocopy;
792
793 if (len >= m->m_len)
794 tocopy = m->m_len;
795 else {
796 mp->msg_flags |= MSG_CTRUNC;
797 tocopy = len;
798 }
799
800 if ((error = copyout((caddr_t)mtod(m, caddr_t),
801 ctlbuf, tocopy)) != 0)
802 goto out;
803
804 ctlbuf += tocopy;
805 len -= tocopy;
806 m = m->m_next;
807 }
808 mp->msg_controllen = ctlbuf - mp->msg_control;
809 }
810out:
811 if (fromsa)
812 FREE(fromsa, M_SONAME);
813 if (control)
814 m_freem(control);
815 return (error);
816}
817
818int
819recvfrom(p, uap)
820 struct proc *p;
821 register struct recvfrom_args /* {
822 int s;
823 caddr_t buf;
824 size_t len;
825 int flags;
826 caddr_t from;
827 int *fromlenaddr;
828 } */ *uap;
829{
830 struct msghdr msg;
831 struct iovec aiov;
832 int error;
833
834 if (uap->fromlenaddr) {
835 error = copyin((caddr_t)uap->fromlenaddr,
836 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
837 if (error)
838 return (error);
839 } else
840 msg.msg_namelen = 0;
841 msg.msg_name = uap->from;
842 msg.msg_iov = &aiov;
843 msg.msg_iovlen = 1;
844 aiov.iov_base = uap->buf;
845 aiov.iov_len = uap->len;
846 msg.msg_control = 0;
847 msg.msg_flags = uap->flags;
848 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr));
849}
850
851#ifdef COMPAT_OLDSOCK
852int
853orecvfrom(p, uap)
854 struct proc *p;
855 struct recvfrom_args *uap;
856{
857
858 uap->flags |= MSG_COMPAT;
859 return (recvfrom(p, uap));
860}
861#endif
862
863
864#ifdef COMPAT_OLDSOCK
865int
866orecv(p, uap)
867 struct proc *p;
868 register struct orecv_args /* {
869 int s;
870 caddr_t buf;
871 int len;
872 int flags;
873 } */ *uap;
874{
875 struct msghdr msg;
876 struct iovec aiov;
877
878 msg.msg_name = 0;
879 msg.msg_namelen = 0;
880 msg.msg_iov = &aiov;
881 msg.msg_iovlen = 1;
882 aiov.iov_base = uap->buf;
883 aiov.iov_len = uap->len;
884 msg.msg_control = 0;
885 msg.msg_flags = uap->flags;
886 return (recvit(p, uap->s, &msg, (caddr_t)0));
887}
888
889/*
890 * Old recvmsg. This code takes advantage of the fact that the old msghdr
891 * overlays the new one, missing only the flags, and with the (old) access
892 * rights where the control fields are now.
893 */
894int
895orecvmsg(p, uap)
896 struct proc *p;
897 register struct orecvmsg_args /* {
898 int s;
899 struct omsghdr *msg;
900 int flags;
901 } */ *uap;
902{
903 struct msghdr msg;
904 struct iovec aiov[UIO_SMALLIOV], *iov;
905 int error;
906
907 error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
908 sizeof (struct omsghdr));
909 if (error)
910 return (error);
911 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
912 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
913 return (EMSGSIZE);
914 MALLOC(iov, struct iovec *,
915 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
916 M_WAITOK);
917 } else
918 iov = aiov;
919 msg.msg_flags = uap->flags | MSG_COMPAT;
920 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
921 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
922 if (error)
923 goto done;
924 msg.msg_iov = iov;
925 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen);
926
927 if (msg.msg_controllen && error == 0)
928 error = copyout((caddr_t)&msg.msg_controllen,
929 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
930done:
931 if (iov != aiov)
932 FREE(iov, M_IOV);
933 return (error);
934}
935#endif
936
937int
938recvmsg(p, uap)
939 struct proc *p;
940 register struct recvmsg_args /* {
941 int s;
942 struct msghdr *msg;
943 int flags;
944 } */ *uap;
945{
946 struct msghdr msg;
947 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
948 register int error;
949
950 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
951 if (error)
952 return (error);
953 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
954 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
955 return (EMSGSIZE);
956 MALLOC(iov, struct iovec *,
957 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
958 M_WAITOK);
959 } else
960 iov = aiov;
961#ifdef COMPAT_OLDSOCK
962 msg.msg_flags = uap->flags &~ MSG_COMPAT;
963#else
964 msg.msg_flags = uap->flags;
965#endif
966 uiov = msg.msg_iov;
967 msg.msg_iov = iov;
968 error = copyin((caddr_t)uiov, (caddr_t)iov,
969 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
970 if (error)
971 goto done;
972 error = recvit(p, uap->s, &msg, (caddr_t)0);
973 if (!error) {
974 msg.msg_iov = uiov;
975 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
976 }
977done:
978 if (iov != aiov)
979 FREE(iov, M_IOV);
980 return (error);
981}
982
983/* ARGSUSED */
984int
985shutdown(p, uap)
986 struct proc *p;
987 register struct shutdown_args /* {
988 int s;
989 int how;
990 } */ *uap;
991{
992 struct file *fp;
993 int error;
994
995 error = getsock(p->p_fd, uap->s, &fp);
996 if (error)
997 return (error);
998 return (soshutdown((struct socket *)fp->f_data, uap->how));
999}
1000
1001/* ARGSUSED */
1002int
1003setsockopt(p, uap)
1004 struct proc *p;
1005 register struct setsockopt_args /* {
1006 int s;
1007 int level;
1008 int name;
1009 caddr_t val;
1010 int valsize;
1011 } */ *uap;
1012{
1013 struct file *fp;
1014 struct sockopt sopt;
1015 int error;
1016
1017 if (uap->val == 0 && uap->valsize != 0)
1018 return (EFAULT);
1019 if (uap->valsize < 0)
1020 return (EINVAL);
1021
1022 error = getsock(p->p_fd, uap->s, &fp);
1023 if (error)
1024 return (error);
1025
1026 sopt.sopt_dir = SOPT_SET;
1027 sopt.sopt_level = uap->level;
1028 sopt.sopt_name = uap->name;
1029 sopt.sopt_val = uap->val;
1030 sopt.sopt_valsize = uap->valsize;
1031 sopt.sopt_p = p;
1032
1033 return (sosetopt((struct socket *)fp->f_data, &sopt));
1034}
1035
1036/* ARGSUSED */
1037int
1038getsockopt(p, uap)
1039 struct proc *p;
1040 register struct getsockopt_args /* {
1041 int s;
1042 int level;
1043 int name;
1044 caddr_t val;
1045 int *avalsize;
1046 } */ *uap;
1047{
1048 int valsize, error;
1049 struct file *fp;
1050 struct sockopt sopt;
1051
1052 error = getsock(p->p_fd, uap->s, &fp);
1053 if (error)
1054 return (error);
1055 if (uap->val) {
1056 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1057 sizeof (valsize));
1058 if (error)
1059 return (error);
1060 if (valsize < 0)
1061 return (EINVAL);
1062 } else
1063 valsize = 0;
1064
1065 sopt.sopt_dir = SOPT_GET;
1066 sopt.sopt_level = uap->level;
1067 sopt.sopt_name = uap->name;
1068 sopt.sopt_val = uap->val;
1069 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1070 sopt.sopt_p = p;
1071
1072 error = sogetopt((struct socket *)fp->f_data, &sopt);
1073 if (error == 0) {
1074 valsize = sopt.sopt_valsize;
1075 error = copyout((caddr_t)&valsize,
1076 (caddr_t)uap->avalsize, sizeof (valsize));
1077 }
1078 return (error);
1079}
1080
1081/*
1082 * Get socket name.
1083 */
1084/* ARGSUSED */
1085static int
1086getsockname1(p, uap, compat)
1087 struct proc *p;
1088 register struct getsockname_args /* {
1089 int fdes;
1090 caddr_t asa;
1091 int *alen;
1092 } */ *uap;
1093 int compat;
1094{
1095 struct file *fp;
1096 register struct socket *so;
1097 struct sockaddr *sa;
1098 int len, error;
1099
1100 error = getsock(p->p_fd, uap->fdes, &fp);
1101 if (error)
1102 return (error);
1103 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1104 if (error)
1105 return (error);
1106 so = (struct socket *)fp->f_data;
1107 sa = 0;
1108 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1109 if (error)
1110 goto bad;
1111 if (sa == 0) {
1112 len = 0;
1113 goto gotnothing;
1114 }
1115
1116 len = MIN(len, sa->sa_len);
1117#ifdef COMPAT_OLDSOCK
1118 if (compat)
1119 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1120#endif
1121 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1122 if (error == 0)
1123gotnothing:
1124 error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1125 sizeof (len));
1126bad:
1127 if (sa)
1128 FREE(sa, M_SONAME);
1129 return (error);
1130}
1131
1132int
1133getsockname(p, uap)
1134 struct proc *p;
1135 struct getsockname_args *uap;
1136{
1137
1138 return (getsockname1(p, uap, 0));
1139}
1140
1141#ifdef COMPAT_OLDSOCK
1142int
1143ogetsockname(p, uap)
1144 struct proc *p;
1145 struct getsockname_args *uap;
1146{
1147
1148 return (getsockname1(p, uap, 1));
1149}
1150#endif /* COMPAT_OLDSOCK */
1151
1152/*
1153 * Get name of peer for connected socket.
1154 */
1155/* ARGSUSED */
1156static int
1157getpeername1(p, uap, compat)
1158 struct proc *p;
1159 register struct getpeername_args /* {
1160 int fdes;
1161 caddr_t asa;
1162 int *alen;
1163 } */ *uap;
1164 int compat;
1165{
1166 struct file *fp;
1167 register struct socket *so;
1168 struct sockaddr *sa;
1169 int len, error;
1170
1171 error = getsock(p->p_fd, uap->fdes, &fp);
1172 if (error)
1173 return (error);
1174 so = (struct socket *)fp->f_data;
1175 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
1176 return (ENOTCONN);
1177 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1178 if (error)
1179 return (error);
1180 sa = 0;
1181 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1182 if (error)
1183 goto bad;
1184 if (sa == 0) {
1185 len = 0;
1186 goto gotnothing;
1187 }
1188 len = MIN(len, sa->sa_len);
1189#ifdef COMPAT_OLDSOCK
1190 if (compat)
1191 ((struct osockaddr *)sa)->sa_family =
1192 sa->sa_family;
1193#endif
1194 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1195 if (error)
1196 goto bad;
1197gotnothing:
1198 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1199bad:
1200 if (sa) FREE(sa, M_SONAME);
1201 return (error);
1202}
1203
1204int
1205getpeername(p, uap)
1206 struct proc *p;
1207 struct getpeername_args *uap;
1208{
1209
1210 return (getpeername1(p, uap, 0));
1211}
1212
1213#ifdef COMPAT_OLDSOCK
1214int
1215ogetpeername(p, uap)
1216 struct proc *p;
1217 struct ogetpeername_args *uap;
1218{
1219
1220 /* XXX uap should have type `getpeername_args *' to begin with. */
1221 return (getpeername1(p, (struct getpeername_args *)uap, 1));
1222}
1223#endif /* COMPAT_OLDSOCK */
1224
1225int
1226sockargs(mp, buf, buflen, type)
1227 struct mbuf **mp;
1228 caddr_t buf;
1229 int buflen, type;
1230{
1231 register struct sockaddr *sa;
1232 register struct mbuf *m;
1233 int error;
1234
1235 if ((u_int)buflen > MLEN) {
1236#ifdef COMPAT_OLDSOCK
1237 if (type == MT_SONAME && (u_int)buflen <= 112)
1238 buflen = MLEN; /* unix domain compat. hack */
1239 else
1240#endif
1241 return (EINVAL);
1242 }
1243 m = m_get(M_WAIT, type);
1244 if (m == NULL)
1245 return (ENOBUFS);
1246 m->m_len = buflen;
1247 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1248 if (error)
1249 (void) m_free(m);
1250 else {
1251 *mp = m;
1252 if (type == MT_SONAME) {
1253 sa = mtod(m, struct sockaddr *);
1254
1255#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1256 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1257 sa->sa_family = sa->sa_len;
1258#endif
1259 sa->sa_len = buflen;
1260 }
1261 }
1262 return (error);
1263}
1264
1265int
1266getsockaddr(namp, uaddr, len)
1267 struct sockaddr **namp;
1268 caddr_t uaddr;
1269 size_t len;
1270{
1271 struct sockaddr *sa;
1272 int error;
1273
1274 if (len > SOCK_MAXADDRLEN)
1275 return ENAMETOOLONG;
1276 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1277 error = copyin(uaddr, sa, len);
1278 if (error) {
1279 FREE(sa, M_SONAME);
1280 } else {
1281#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1282 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1283 sa->sa_family = sa->sa_len;
1284#endif
1285 sa->sa_len = len;
1286 *namp = sa;
1287 }
1288 return error;
1289}
1290
1291int
1292getsock(fdp, fdes, fpp)
1293 struct filedesc *fdp;
1294 int fdes;
1295 struct file **fpp;
1296{
1297 register struct file *fp;
1298
1299 if ((unsigned)fdes >= fdp->fd_nfiles ||
1300 (fp = fdp->fd_ofiles[fdes]) == NULL)
1301 return (EBADF);
1302 if (fp->f_type != DTYPE_SOCKET)
1303 return (ENOTSOCK);
1304 *fpp = fp;
1305 return (0);
1306}
1307
1308/*
1309 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1310 * XXX - The sf_buf functions are currently private to sendfile(2), so have
1311 * been made static, but may be useful in the future for doing zero-copy in
1312 * other parts of the networking code.
1313 */
1314static void
1315sf_buf_init(void *arg)
1316{
1317 int i;
1318
1319 SLIST_INIT(&sf_freelist);
1320 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1321 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT);
1322 bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf));
1323 for (i = 0; i < nsfbufs; i++) {
1324 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1325 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
1326 }
1327}
1328
1329/*
1330 * Get an sf_buf from the freelist. Will block if none are available.
1331 */
1332static struct sf_buf *
1333sf_buf_alloc()
1334{
1335 struct sf_buf *sf;
1336 int s;
1337
1338 s = splimp();
1339 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
1340 sf_buf_alloc_want = 1;
1341 tsleep(&sf_freelist, PVM, "sfbufa", 0);
1342 }
1343 SLIST_REMOVE_HEAD(&sf_freelist, free_list);
1344 splx(s);
1345 sf->refcnt = 1;
1346 return (sf);
1347}
1348
1349#define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1350static void
1351sf_buf_ref(caddr_t addr, u_int size)
1352{
1353 struct sf_buf *sf;
1354
1355 sf = dtosf(addr);
1356 if (sf->refcnt == 0)
1357 panic("sf_buf_ref: referencing a free sf_buf");
1358 sf->refcnt++;
1359}
1360
1361/*
1362 * Lose a reference to an sf_buf. When none left, detach mapped page
1363 * and release resources back to the system.
1364 *
1365 * Must be called at splimp.
1366 */
1367static void
1368sf_buf_free(caddr_t addr, u_int size)
1369{
1370 struct sf_buf *sf;
1371 struct vm_page *m;
1372 int s;
1373
1374 sf = dtosf(addr);
1375 if (sf->refcnt == 0)
1376 panic("sf_buf_free: freeing free sf_buf");
1377 sf->refcnt--;
1378 if (sf->refcnt == 0) {
1379 pmap_qremove((vm_offset_t)addr, 1);
1380 m = sf->m;
1381 s = splvm();
1382 vm_page_unwire(m, 0);
1383 /*
1384 * Check for the object going away on us. This can
1385 * happen since we don't hold a reference to it.
1386 * If so, we're responsible for freeing the page.
1387 */
1388 if (m->wire_count == 0 && m->object == NULL)
1389 vm_page_free(m);
1390 splx(s);
1391 sf->m = NULL;
1392 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
1393 if (sf_buf_alloc_want) {
1394 sf_buf_alloc_want = 0;
1395 wakeup(&sf_freelist);
1396 }
1397 }
1398}
1399
1400/*
1401 * sendfile(2).
1402 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1403 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1404 *
1405 * Send a file specified by 'fd' and starting at 'offset' to a socket
1406 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1407 * nbytes == 0. Optionally add a header and/or trailer to the socket
1408 * output. If specified, write the total number of bytes sent into *sbytes.
1409 */
1410int
1411sendfile(struct proc *p, struct sendfile_args *uap)
1412{
1413 struct file *fp;
1414 struct filedesc *fdp = p->p_fd;
1415 struct vnode *vp;
1416 struct vm_object *obj;
1417 struct socket *so;
1418 struct mbuf *m;
1419 struct sf_buf *sf;
1420 struct vm_page *pg;
1421 struct writev_args nuap;
1422 struct sf_hdtr hdtr;
1423 off_t off, xfsize, sbytes = 0;
1424 int error = 0, s;
1425
1426 /*
1427 * Do argument checking. Must be a regular file in, stream
1428 * type and connected socket out, positive offset.
1429 */
1430 if (((u_int)uap->fd) >= fdp->fd_nfiles ||
1431 (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
1432 (fp->f_flag & FREAD) == 0) {
1433 error = EBADF;
1434 goto done;
1435 }
1436 if (fp->f_type != DTYPE_VNODE) {
1437 error = EINVAL;
1438 goto done;
1439 }
1440 vp = (struct vnode *)fp->f_data;
1441 obj = vp->v_object;
1442 if (vp->v_type != VREG || obj == NULL) {
1443 error = EINVAL;
1444 goto done;
1445 }
1446 error = getsock(p->p_fd, uap->s, &fp);
1447 if (error)
1448 goto done;
1449 so = (struct socket *)fp->f_data;
1450 if (so->so_type != SOCK_STREAM) {
1451 error = EINVAL;
1452 goto done;
1453 }
1454 if ((so->so_state & SS_ISCONNECTED) == 0) {
1455 error = ENOTCONN;
1456 goto done;
1457 }
1458 if (uap->offset < 0) {
1459 error = EINVAL;
1460 goto done;
1461 }
1462
1463 /*
1464 * If specified, get the pointer to the sf_hdtr struct for
1465 * any headers/trailers.
1466 */
1467 if (uap->hdtr != NULL) {
1468 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1469 if (error)
1470 goto done;
1471 /*
1472 * Send any headers. Wimp out and use writev(2).
1473 */
1474 if (hdtr.headers != NULL) {
1475 nuap.fd = uap->s;
1476 nuap.iovp = hdtr.headers;
1477 nuap.iovcnt = hdtr.hdr_cnt;
1478 error = writev(p, &nuap);
1479 if (error)
1480 goto done;
1481 sbytes += p->p_retval[0];
1482 }
1483 }
1484
1485 /*
1486 * Protect against multiple writers to the socket.
1487 */
1488 (void) sblock(&so->so_snd, M_WAITOK);
1489
1490 /*
1491 * Loop through the pages in the file, starting with the requested
1492 * offset. Get a file page (do I/O if necessary), map the file page
1493 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1494 * it on the socket.
1495 */
1496 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1497 vm_pindex_t pindex;
1498 vm_offset_t pgoff;
1499
1500 pindex = OFF_TO_IDX(off);
1501retry_lookup:
1502 /*
1503 * Calculate the amount to transfer. Not to exceed a page,
1504 * the EOF, or the passed in nbytes.
1505 */
1506 xfsize = obj->un_pager.vnp.vnp_size - off;
1507 if (xfsize > PAGE_SIZE)
1508 xfsize = PAGE_SIZE;
1509 pgoff = (vm_offset_t)(off & PAGE_MASK);
1510 if (PAGE_SIZE - pgoff < xfsize)
1511 xfsize = PAGE_SIZE - pgoff;
1512 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1513 xfsize = uap->nbytes - sbytes;
1514 if (xfsize <= 0)
1515 break;
1516 /*
1517 * Optimize the non-blocking case by looking at the socket space
1518 * before going to the extra work of constituting the sf_buf.
1519 */
1520 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1521 if (so->so_state & SS_CANTSENDMORE)
1522 error = EPIPE;
1523 else
1524 error = EAGAIN;
1525 sbunlock(&so->so_snd);
1526 goto done;
1527 }
1528 /*
1529 * Attempt to look up the page.
1530 *
1531 * Allocate if not found
1532 *
1533 * Wait and loop if busy.
1534 */
1535 pg = vm_page_lookup(obj, pindex);
1536
1537 if (pg == NULL) {
1538 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1539 if (pg == NULL) {
1540 VM_WAIT;
1541 goto retry_lookup;
1542 }
1543 vm_page_wakeup(pg);
1544 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) {
1545 goto retry_lookup;
1546 }
1547
1548 /*
1549 * Wire the page so it does not get ripped out from under
1550 * us.
1551 */
1552
1553 vm_page_wire(pg);
1554
1555 /*
1556 * If page is not valid for what we need, initiate I/O
1557 */
1558
1559 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1560 struct uio auio;
1561 struct iovec aiov;
1562 int bsize;
1563
1564 /*
1565 * Ensure that our page is still around when the I/O
1566 * completes.
1567 */
1568 vm_page_io_start(pg);
1569
1570 /*
1571 * Get the page from backing store.
1572 */
1573 bsize = vp->v_mount->mnt_stat.f_iosize;
1574 auio.uio_iov = &aiov;
1575 auio.uio_iovcnt = 1;
1576 aiov.iov_base = 0;
1577 aiov.iov_len = MAXBSIZE;
1578 auio.uio_resid = MAXBSIZE;
1579 auio.uio_offset = trunc_page(off);
1580 auio.uio_segflg = UIO_NOCOPY;
1581 auio.uio_rw = UIO_READ;
1582 auio.uio_procp = p;
1583 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
1584 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1585 p->p_ucred);
1586 VOP_UNLOCK(vp, 0, p);
1587 vm_page_flag_clear(pg, PG_ZERO);
1588 vm_page_io_finish(pg);
1589 if (error) {
1590 vm_page_unwire(pg, 0);
1591 /*
1592 * See if anyone else might know about this page.
1593 * If not and it is not valid, then free it.
1594 */
1595 if (pg->wire_count == 0 && pg->valid == 0 &&
1596 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1597 pg->hold_count == 0)
1598 vm_page_free(pg);
1599 sbunlock(&so->so_snd);
1600 goto done;
1601 }
1602 }
1603
1604 /*
1605 * Allocate a kernel virtual page and insert the physical page
1606 * into it.
1607 */
1608
1609 sf = sf_buf_alloc();
1610 sf->m = pg;
1611 pmap_qenter(sf->kva, &pg, 1);
1612 /*
1613 * Get an mbuf header and set it up as having external storage.
1614 */
1615 MGETHDR(m, M_WAIT, MT_DATA);
1616 m->m_ext.ext_free = sf_buf_free;
1617 m->m_ext.ext_ref = sf_buf_ref;
1618 m->m_ext.ext_buf = (void *)sf->kva;
1619 m->m_ext.ext_size = PAGE_SIZE;
1620 m->m_data = (char *) sf->kva + pgoff;
1621 m->m_flags |= M_EXT;
1622 m->m_pkthdr.len = m->m_len = xfsize;
1623 /*
1624 * Add the buffer to the socket buffer chain.
1625 */
1626 s = splnet();
1627retry_space:
1628 /*
1629 * Make sure that the socket is still able to take more data.
1630 * CANTSENDMORE being true usually means that the connection
1631 * was closed. so_error is true when an error was sensed after
1632 * a previous send.
1633 * The state is checked after the page mapping and buffer
1634 * allocation above since those operations may block and make
1635 * any socket checks stale. From this point forward, nothing
1636 * blocks before the pru_send (or more accurately, any blocking
1637 * results in a loop back to here to re-check).
1638 */
1639 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1640 if (so->so_state & SS_CANTSENDMORE) {
1641 error = EPIPE;
1642 } else {
1643 error = so->so_error;
1644 so->so_error = 0;
1645 }
1646 m_freem(m);
1647 sbunlock(&so->so_snd);
1648 splx(s);
1649 goto done;
1650 }
1651 /*
1652 * Wait for socket space to become available. We do this just
1653 * after checking the connection state above in order to avoid
1654 * a race condition with sbwait().
1655 */
1656 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1657 if (so->so_state & SS_NBIO) {
1658 m_freem(m);
1659 sbunlock(&so->so_snd);
1660 splx(s);
1661 error = EAGAIN;
1662 goto done;
1663 }
1664 error = sbwait(&so->so_snd);
1665 /*
1666 * An error from sbwait usually indicates that we've
1667 * been interrupted by a signal. If we've sent anything
1668 * then return bytes sent, otherwise return the error.
1669 */
1670 if (error) {
1671 m_freem(m);
1672 sbunlock(&so->so_snd);
1673 splx(s);
1674 goto done;
1675 }
1676 goto retry_space;
1677 }
1678 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
1679 splx(s);
1680 if (error) {
1681 sbunlock(&so->so_snd);
1682 goto done;
1683 }
1684 }
1685 sbunlock(&so->so_snd);
1686
1687 /*
1688 * Send trailers. Wimp out and use writev(2).
1689 */
1690 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1691 nuap.fd = uap->s;
1692 nuap.iovp = hdtr.trailers;
1693 nuap.iovcnt = hdtr.trl_cnt;
1694 error = writev(p, &nuap);
1695 if (error)
1696 goto done;
1697 sbytes += p->p_retval[0];
1698 }
1699
1700done:
1701 if (uap->sbytes != NULL) {
1702 copyout(&sbytes, uap->sbytes, sizeof(off_t));
1703 }
1704 return (error);
1705}
423free2:
424 (void)soclose(so2);
425free1:
426 (void)soclose(so1);
427 return (error);
428}
429
430static int
431sendit(p, s, mp, flags)
432 register struct proc *p;
433 int s;
434 register struct msghdr *mp;
435 int flags;
436{
437 struct file *fp;
438 struct uio auio;
439 register struct iovec *iov;
440 register int i;
441 struct mbuf *control;
442 struct sockaddr *to;
443 int len, error;
444 struct socket *so;
445#ifdef KTRACE
446 struct iovec *ktriov = NULL;
447#endif
448
449 error = getsock(p->p_fd, s, &fp);
450 if (error)
451 return (error);
452 auio.uio_iov = mp->msg_iov;
453 auio.uio_iovcnt = mp->msg_iovlen;
454 auio.uio_segflg = UIO_USERSPACE;
455 auio.uio_rw = UIO_WRITE;
456 auio.uio_procp = p;
457 auio.uio_offset = 0; /* XXX */
458 auio.uio_resid = 0;
459 iov = mp->msg_iov;
460 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
461 if ((auio.uio_resid += iov->iov_len) < 0)
462 return (EINVAL);
463 }
464 if (mp->msg_name) {
465 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
466 if (error)
467 return (error);
468 } else
469 to = 0;
470 if (mp->msg_control) {
471 if (mp->msg_controllen < sizeof(struct cmsghdr)
472#ifdef COMPAT_OLDSOCK
473 && mp->msg_flags != MSG_COMPAT
474#endif
475 ) {
476 error = EINVAL;
477 goto bad;
478 }
479 error = sockargs(&control, mp->msg_control,
480 mp->msg_controllen, MT_CONTROL);
481 if (error)
482 goto bad;
483#ifdef COMPAT_OLDSOCK
484 if (mp->msg_flags == MSG_COMPAT) {
485 register struct cmsghdr *cm;
486
487 M_PREPEND(control, sizeof(*cm), M_WAIT);
488 if (control == 0) {
489 error = ENOBUFS;
490 goto bad;
491 } else {
492 cm = mtod(control, struct cmsghdr *);
493 cm->cmsg_len = control->m_len;
494 cm->cmsg_level = SOL_SOCKET;
495 cm->cmsg_type = SCM_RIGHTS;
496 }
497 }
498#endif
499 } else
500 control = 0;
501#ifdef KTRACE
502 if (KTRPOINT(p, KTR_GENIO)) {
503 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
504
505 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
506 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
507 }
508#endif
509 len = auio.uio_resid;
510 so = (struct socket *)fp->f_data;
511 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
512 flags, p);
513 if (error) {
514 if (auio.uio_resid != len && (error == ERESTART ||
515 error == EINTR || error == EWOULDBLOCK))
516 error = 0;
517 if (error == EPIPE)
518 psignal(p, SIGPIPE);
519 }
520 if (error == 0)
521 p->p_retval[0] = len - auio.uio_resid;
522#ifdef KTRACE
523 if (ktriov != NULL) {
524 if (error == 0)
525 ktrgenio(p->p_tracep, s, UIO_WRITE,
526 ktriov, p->p_retval[0], error);
527 FREE(ktriov, M_TEMP);
528 }
529#endif
530bad:
531 if (to)
532 FREE(to, M_SONAME);
533 return (error);
534}
535
536int
537sendto(p, uap)
538 struct proc *p;
539 register struct sendto_args /* {
540 int s;
541 caddr_t buf;
542 size_t len;
543 int flags;
544 caddr_t to;
545 int tolen;
546 } */ *uap;
547{
548 struct msghdr msg;
549 struct iovec aiov;
550
551 msg.msg_name = uap->to;
552 msg.msg_namelen = uap->tolen;
553 msg.msg_iov = &aiov;
554 msg.msg_iovlen = 1;
555 msg.msg_control = 0;
556#ifdef COMPAT_OLDSOCK
557 msg.msg_flags = 0;
558#endif
559 aiov.iov_base = uap->buf;
560 aiov.iov_len = uap->len;
561 return (sendit(p, uap->s, &msg, uap->flags));
562}
563
564#ifdef COMPAT_OLDSOCK
565int
566osend(p, uap)
567 struct proc *p;
568 register struct osend_args /* {
569 int s;
570 caddr_t buf;
571 int len;
572 int flags;
573 } */ *uap;
574{
575 struct msghdr msg;
576 struct iovec aiov;
577
578 msg.msg_name = 0;
579 msg.msg_namelen = 0;
580 msg.msg_iov = &aiov;
581 msg.msg_iovlen = 1;
582 aiov.iov_base = uap->buf;
583 aiov.iov_len = uap->len;
584 msg.msg_control = 0;
585 msg.msg_flags = 0;
586 return (sendit(p, uap->s, &msg, uap->flags));
587}
588
589int
590osendmsg(p, uap)
591 struct proc *p;
592 register struct osendmsg_args /* {
593 int s;
594 caddr_t msg;
595 int flags;
596 } */ *uap;
597{
598 struct msghdr msg;
599 struct iovec aiov[UIO_SMALLIOV], *iov;
600 int error;
601
602 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
603 if (error)
604 return (error);
605 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
606 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
607 return (EMSGSIZE);
608 MALLOC(iov, struct iovec *,
609 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
610 M_WAITOK);
611 } else
612 iov = aiov;
613 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
614 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
615 if (error)
616 goto done;
617 msg.msg_flags = MSG_COMPAT;
618 msg.msg_iov = iov;
619 error = sendit(p, uap->s, &msg, uap->flags);
620done:
621 if (iov != aiov)
622 FREE(iov, M_IOV);
623 return (error);
624}
625#endif
626
627int
628sendmsg(p, uap)
629 struct proc *p;
630 register struct sendmsg_args /* {
631 int s;
632 caddr_t msg;
633 int flags;
634 } */ *uap;
635{
636 struct msghdr msg;
637 struct iovec aiov[UIO_SMALLIOV], *iov;
638 int error;
639
640 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
641 if (error)
642 return (error);
643 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
644 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
645 return (EMSGSIZE);
646 MALLOC(iov, struct iovec *,
647 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
648 M_WAITOK);
649 } else
650 iov = aiov;
651 if (msg.msg_iovlen &&
652 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
653 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
654 goto done;
655 msg.msg_iov = iov;
656#ifdef COMPAT_OLDSOCK
657 msg.msg_flags = 0;
658#endif
659 error = sendit(p, uap->s, &msg, uap->flags);
660done:
661 if (iov != aiov)
662 FREE(iov, M_IOV);
663 return (error);
664}
665
666static int
667recvit(p, s, mp, namelenp)
668 register struct proc *p;
669 int s;
670 register struct msghdr *mp;
671 caddr_t namelenp;
672{
673 struct file *fp;
674 struct uio auio;
675 register struct iovec *iov;
676 register int i;
677 int len, error;
678 struct mbuf *m, *control = 0;
679 caddr_t ctlbuf;
680 struct socket *so;
681 struct sockaddr *fromsa = 0;
682#ifdef KTRACE
683 struct iovec *ktriov = NULL;
684#endif
685
686 error = getsock(p->p_fd, s, &fp);
687 if (error)
688 return (error);
689 auio.uio_iov = mp->msg_iov;
690 auio.uio_iovcnt = mp->msg_iovlen;
691 auio.uio_segflg = UIO_USERSPACE;
692 auio.uio_rw = UIO_READ;
693 auio.uio_procp = p;
694 auio.uio_offset = 0; /* XXX */
695 auio.uio_resid = 0;
696 iov = mp->msg_iov;
697 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
698 if ((auio.uio_resid += iov->iov_len) < 0)
699 return (EINVAL);
700 }
701#ifdef KTRACE
702 if (KTRPOINT(p, KTR_GENIO)) {
703 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
704
705 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
706 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
707 }
708#endif
709 len = auio.uio_resid;
710 so = (struct socket *)fp->f_data;
711 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
712 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
713 &mp->msg_flags);
714 if (error) {
715 if (auio.uio_resid != len && (error == ERESTART ||
716 error == EINTR || error == EWOULDBLOCK))
717 error = 0;
718 }
719#ifdef KTRACE
720 if (ktriov != NULL) {
721 if (error == 0)
722 ktrgenio(p->p_tracep, s, UIO_READ,
723 ktriov, len - auio.uio_resid, error);
724 FREE(ktriov, M_TEMP);
725 }
726#endif
727 if (error)
728 goto out;
729 p->p_retval[0] = len - auio.uio_resid;
730 if (mp->msg_name) {
731 len = mp->msg_namelen;
732 if (len <= 0 || fromsa == 0)
733 len = 0;
734 else {
735#ifndef MIN
736#define MIN(a,b) ((a)>(b)?(b):(a))
737#endif
738 /* save sa_len before it is destroyed by MSG_COMPAT */
739 len = MIN(len, fromsa->sa_len);
740#ifdef COMPAT_OLDSOCK
741 if (mp->msg_flags & MSG_COMPAT)
742 ((struct osockaddr *)fromsa)->sa_family =
743 fromsa->sa_family;
744#endif
745 error = copyout(fromsa,
746 (caddr_t)mp->msg_name, (unsigned)len);
747 if (error)
748 goto out;
749 }
750 mp->msg_namelen = len;
751 if (namelenp &&
752 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
753#ifdef COMPAT_OLDSOCK
754 if (mp->msg_flags & MSG_COMPAT)
755 error = 0; /* old recvfrom didn't check */
756 else
757#endif
758 goto out;
759 }
760 }
761 if (mp->msg_control) {
762#ifdef COMPAT_OLDSOCK
763 /*
764 * We assume that old recvmsg calls won't receive access
765 * rights and other control info, esp. as control info
766 * is always optional and those options didn't exist in 4.3.
767 * If we receive rights, trim the cmsghdr; anything else
768 * is tossed.
769 */
770 if (control && mp->msg_flags & MSG_COMPAT) {
771 if (mtod(control, struct cmsghdr *)->cmsg_level !=
772 SOL_SOCKET ||
773 mtod(control, struct cmsghdr *)->cmsg_type !=
774 SCM_RIGHTS) {
775 mp->msg_controllen = 0;
776 goto out;
777 }
778 control->m_len -= sizeof (struct cmsghdr);
779 control->m_data += sizeof (struct cmsghdr);
780 }
781#endif
782 len = mp->msg_controllen;
783 m = control;
784 mp->msg_controllen = 0;
785 ctlbuf = (caddr_t) mp->msg_control;
786
787 while (m && len > 0) {
788 unsigned int tocopy;
789
790 if (len >= m->m_len)
791 tocopy = m->m_len;
792 else {
793 mp->msg_flags |= MSG_CTRUNC;
794 tocopy = len;
795 }
796
797 if ((error = copyout((caddr_t)mtod(m, caddr_t),
798 ctlbuf, tocopy)) != 0)
799 goto out;
800
801 ctlbuf += tocopy;
802 len -= tocopy;
803 m = m->m_next;
804 }
805 mp->msg_controllen = ctlbuf - mp->msg_control;
806 }
807out:
808 if (fromsa)
809 FREE(fromsa, M_SONAME);
810 if (control)
811 m_freem(control);
812 return (error);
813}
814
815int
816recvfrom(p, uap)
817 struct proc *p;
818 register struct recvfrom_args /* {
819 int s;
820 caddr_t buf;
821 size_t len;
822 int flags;
823 caddr_t from;
824 int *fromlenaddr;
825 } */ *uap;
826{
827 struct msghdr msg;
828 struct iovec aiov;
829 int error;
830
831 if (uap->fromlenaddr) {
832 error = copyin((caddr_t)uap->fromlenaddr,
833 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
834 if (error)
835 return (error);
836 } else
837 msg.msg_namelen = 0;
838 msg.msg_name = uap->from;
839 msg.msg_iov = &aiov;
840 msg.msg_iovlen = 1;
841 aiov.iov_base = uap->buf;
842 aiov.iov_len = uap->len;
843 msg.msg_control = 0;
844 msg.msg_flags = uap->flags;
845 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr));
846}
847
848#ifdef COMPAT_OLDSOCK
849int
850orecvfrom(p, uap)
851 struct proc *p;
852 struct recvfrom_args *uap;
853{
854
855 uap->flags |= MSG_COMPAT;
856 return (recvfrom(p, uap));
857}
858#endif
859
860
861#ifdef COMPAT_OLDSOCK
862int
863orecv(p, uap)
864 struct proc *p;
865 register struct orecv_args /* {
866 int s;
867 caddr_t buf;
868 int len;
869 int flags;
870 } */ *uap;
871{
872 struct msghdr msg;
873 struct iovec aiov;
874
875 msg.msg_name = 0;
876 msg.msg_namelen = 0;
877 msg.msg_iov = &aiov;
878 msg.msg_iovlen = 1;
879 aiov.iov_base = uap->buf;
880 aiov.iov_len = uap->len;
881 msg.msg_control = 0;
882 msg.msg_flags = uap->flags;
883 return (recvit(p, uap->s, &msg, (caddr_t)0));
884}
885
886/*
887 * Old recvmsg. This code takes advantage of the fact that the old msghdr
888 * overlays the new one, missing only the flags, and with the (old) access
889 * rights where the control fields are now.
890 */
891int
892orecvmsg(p, uap)
893 struct proc *p;
894 register struct orecvmsg_args /* {
895 int s;
896 struct omsghdr *msg;
897 int flags;
898 } */ *uap;
899{
900 struct msghdr msg;
901 struct iovec aiov[UIO_SMALLIOV], *iov;
902 int error;
903
904 error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
905 sizeof (struct omsghdr));
906 if (error)
907 return (error);
908 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
909 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
910 return (EMSGSIZE);
911 MALLOC(iov, struct iovec *,
912 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
913 M_WAITOK);
914 } else
915 iov = aiov;
916 msg.msg_flags = uap->flags | MSG_COMPAT;
917 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
918 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
919 if (error)
920 goto done;
921 msg.msg_iov = iov;
922 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen);
923
924 if (msg.msg_controllen && error == 0)
925 error = copyout((caddr_t)&msg.msg_controllen,
926 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
927done:
928 if (iov != aiov)
929 FREE(iov, M_IOV);
930 return (error);
931}
932#endif
933
934int
935recvmsg(p, uap)
936 struct proc *p;
937 register struct recvmsg_args /* {
938 int s;
939 struct msghdr *msg;
940 int flags;
941 } */ *uap;
942{
943 struct msghdr msg;
944 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
945 register int error;
946
947 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
948 if (error)
949 return (error);
950 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
951 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
952 return (EMSGSIZE);
953 MALLOC(iov, struct iovec *,
954 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
955 M_WAITOK);
956 } else
957 iov = aiov;
958#ifdef COMPAT_OLDSOCK
959 msg.msg_flags = uap->flags &~ MSG_COMPAT;
960#else
961 msg.msg_flags = uap->flags;
962#endif
963 uiov = msg.msg_iov;
964 msg.msg_iov = iov;
965 error = copyin((caddr_t)uiov, (caddr_t)iov,
966 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
967 if (error)
968 goto done;
969 error = recvit(p, uap->s, &msg, (caddr_t)0);
970 if (!error) {
971 msg.msg_iov = uiov;
972 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
973 }
974done:
975 if (iov != aiov)
976 FREE(iov, M_IOV);
977 return (error);
978}
979
980/* ARGSUSED */
981int
982shutdown(p, uap)
983 struct proc *p;
984 register struct shutdown_args /* {
985 int s;
986 int how;
987 } */ *uap;
988{
989 struct file *fp;
990 int error;
991
992 error = getsock(p->p_fd, uap->s, &fp);
993 if (error)
994 return (error);
995 return (soshutdown((struct socket *)fp->f_data, uap->how));
996}
997
998/* ARGSUSED */
999int
1000setsockopt(p, uap)
1001 struct proc *p;
1002 register struct setsockopt_args /* {
1003 int s;
1004 int level;
1005 int name;
1006 caddr_t val;
1007 int valsize;
1008 } */ *uap;
1009{
1010 struct file *fp;
1011 struct sockopt sopt;
1012 int error;
1013
1014 if (uap->val == 0 && uap->valsize != 0)
1015 return (EFAULT);
1016 if (uap->valsize < 0)
1017 return (EINVAL);
1018
1019 error = getsock(p->p_fd, uap->s, &fp);
1020 if (error)
1021 return (error);
1022
1023 sopt.sopt_dir = SOPT_SET;
1024 sopt.sopt_level = uap->level;
1025 sopt.sopt_name = uap->name;
1026 sopt.sopt_val = uap->val;
1027 sopt.sopt_valsize = uap->valsize;
1028 sopt.sopt_p = p;
1029
1030 return (sosetopt((struct socket *)fp->f_data, &sopt));
1031}
1032
1033/* ARGSUSED */
1034int
1035getsockopt(p, uap)
1036 struct proc *p;
1037 register struct getsockopt_args /* {
1038 int s;
1039 int level;
1040 int name;
1041 caddr_t val;
1042 int *avalsize;
1043 } */ *uap;
1044{
1045 int valsize, error;
1046 struct file *fp;
1047 struct sockopt sopt;
1048
1049 error = getsock(p->p_fd, uap->s, &fp);
1050 if (error)
1051 return (error);
1052 if (uap->val) {
1053 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1054 sizeof (valsize));
1055 if (error)
1056 return (error);
1057 if (valsize < 0)
1058 return (EINVAL);
1059 } else
1060 valsize = 0;
1061
1062 sopt.sopt_dir = SOPT_GET;
1063 sopt.sopt_level = uap->level;
1064 sopt.sopt_name = uap->name;
1065 sopt.sopt_val = uap->val;
1066 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1067 sopt.sopt_p = p;
1068
1069 error = sogetopt((struct socket *)fp->f_data, &sopt);
1070 if (error == 0) {
1071 valsize = sopt.sopt_valsize;
1072 error = copyout((caddr_t)&valsize,
1073 (caddr_t)uap->avalsize, sizeof (valsize));
1074 }
1075 return (error);
1076}
1077
1078/*
1079 * Get socket name.
1080 */
1081/* ARGSUSED */
1082static int
1083getsockname1(p, uap, compat)
1084 struct proc *p;
1085 register struct getsockname_args /* {
1086 int fdes;
1087 caddr_t asa;
1088 int *alen;
1089 } */ *uap;
1090 int compat;
1091{
1092 struct file *fp;
1093 register struct socket *so;
1094 struct sockaddr *sa;
1095 int len, error;
1096
1097 error = getsock(p->p_fd, uap->fdes, &fp);
1098 if (error)
1099 return (error);
1100 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1101 if (error)
1102 return (error);
1103 so = (struct socket *)fp->f_data;
1104 sa = 0;
1105 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1106 if (error)
1107 goto bad;
1108 if (sa == 0) {
1109 len = 0;
1110 goto gotnothing;
1111 }
1112
1113 len = MIN(len, sa->sa_len);
1114#ifdef COMPAT_OLDSOCK
1115 if (compat)
1116 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1117#endif
1118 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1119 if (error == 0)
1120gotnothing:
1121 error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1122 sizeof (len));
1123bad:
1124 if (sa)
1125 FREE(sa, M_SONAME);
1126 return (error);
1127}
1128
1129int
1130getsockname(p, uap)
1131 struct proc *p;
1132 struct getsockname_args *uap;
1133{
1134
1135 return (getsockname1(p, uap, 0));
1136}
1137
1138#ifdef COMPAT_OLDSOCK
1139int
1140ogetsockname(p, uap)
1141 struct proc *p;
1142 struct getsockname_args *uap;
1143{
1144
1145 return (getsockname1(p, uap, 1));
1146}
1147#endif /* COMPAT_OLDSOCK */
1148
1149/*
1150 * Get name of peer for connected socket.
1151 */
1152/* ARGSUSED */
1153static int
1154getpeername1(p, uap, compat)
1155 struct proc *p;
1156 register struct getpeername_args /* {
1157 int fdes;
1158 caddr_t asa;
1159 int *alen;
1160 } */ *uap;
1161 int compat;
1162{
1163 struct file *fp;
1164 register struct socket *so;
1165 struct sockaddr *sa;
1166 int len, error;
1167
1168 error = getsock(p->p_fd, uap->fdes, &fp);
1169 if (error)
1170 return (error);
1171 so = (struct socket *)fp->f_data;
1172 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
1173 return (ENOTCONN);
1174 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1175 if (error)
1176 return (error);
1177 sa = 0;
1178 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1179 if (error)
1180 goto bad;
1181 if (sa == 0) {
1182 len = 0;
1183 goto gotnothing;
1184 }
1185 len = MIN(len, sa->sa_len);
1186#ifdef COMPAT_OLDSOCK
1187 if (compat)
1188 ((struct osockaddr *)sa)->sa_family =
1189 sa->sa_family;
1190#endif
1191 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1192 if (error)
1193 goto bad;
1194gotnothing:
1195 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1196bad:
1197 if (sa) FREE(sa, M_SONAME);
1198 return (error);
1199}
1200
1201int
1202getpeername(p, uap)
1203 struct proc *p;
1204 struct getpeername_args *uap;
1205{
1206
1207 return (getpeername1(p, uap, 0));
1208}
1209
1210#ifdef COMPAT_OLDSOCK
1211int
1212ogetpeername(p, uap)
1213 struct proc *p;
1214 struct ogetpeername_args *uap;
1215{
1216
1217 /* XXX uap should have type `getpeername_args *' to begin with. */
1218 return (getpeername1(p, (struct getpeername_args *)uap, 1));
1219}
1220#endif /* COMPAT_OLDSOCK */
1221
1222int
1223sockargs(mp, buf, buflen, type)
1224 struct mbuf **mp;
1225 caddr_t buf;
1226 int buflen, type;
1227{
1228 register struct sockaddr *sa;
1229 register struct mbuf *m;
1230 int error;
1231
1232 if ((u_int)buflen > MLEN) {
1233#ifdef COMPAT_OLDSOCK
1234 if (type == MT_SONAME && (u_int)buflen <= 112)
1235 buflen = MLEN; /* unix domain compat. hack */
1236 else
1237#endif
1238 return (EINVAL);
1239 }
1240 m = m_get(M_WAIT, type);
1241 if (m == NULL)
1242 return (ENOBUFS);
1243 m->m_len = buflen;
1244 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1245 if (error)
1246 (void) m_free(m);
1247 else {
1248 *mp = m;
1249 if (type == MT_SONAME) {
1250 sa = mtod(m, struct sockaddr *);
1251
1252#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1253 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1254 sa->sa_family = sa->sa_len;
1255#endif
1256 sa->sa_len = buflen;
1257 }
1258 }
1259 return (error);
1260}
1261
1262int
1263getsockaddr(namp, uaddr, len)
1264 struct sockaddr **namp;
1265 caddr_t uaddr;
1266 size_t len;
1267{
1268 struct sockaddr *sa;
1269 int error;
1270
1271 if (len > SOCK_MAXADDRLEN)
1272 return ENAMETOOLONG;
1273 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1274 error = copyin(uaddr, sa, len);
1275 if (error) {
1276 FREE(sa, M_SONAME);
1277 } else {
1278#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1279 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1280 sa->sa_family = sa->sa_len;
1281#endif
1282 sa->sa_len = len;
1283 *namp = sa;
1284 }
1285 return error;
1286}
1287
1288int
1289getsock(fdp, fdes, fpp)
1290 struct filedesc *fdp;
1291 int fdes;
1292 struct file **fpp;
1293{
1294 register struct file *fp;
1295
1296 if ((unsigned)fdes >= fdp->fd_nfiles ||
1297 (fp = fdp->fd_ofiles[fdes]) == NULL)
1298 return (EBADF);
1299 if (fp->f_type != DTYPE_SOCKET)
1300 return (ENOTSOCK);
1301 *fpp = fp;
1302 return (0);
1303}
1304
1305/*
1306 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1307 * XXX - The sf_buf functions are currently private to sendfile(2), so have
1308 * been made static, but may be useful in the future for doing zero-copy in
1309 * other parts of the networking code.
1310 */
1311static void
1312sf_buf_init(void *arg)
1313{
1314 int i;
1315
1316 SLIST_INIT(&sf_freelist);
1317 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1318 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT);
1319 bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf));
1320 for (i = 0; i < nsfbufs; i++) {
1321 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1322 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
1323 }
1324}
1325
1326/*
1327 * Get an sf_buf from the freelist. Will block if none are available.
1328 */
1329static struct sf_buf *
1330sf_buf_alloc()
1331{
1332 struct sf_buf *sf;
1333 int s;
1334
1335 s = splimp();
1336 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
1337 sf_buf_alloc_want = 1;
1338 tsleep(&sf_freelist, PVM, "sfbufa", 0);
1339 }
1340 SLIST_REMOVE_HEAD(&sf_freelist, free_list);
1341 splx(s);
1342 sf->refcnt = 1;
1343 return (sf);
1344}
1345
1346#define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1347static void
1348sf_buf_ref(caddr_t addr, u_int size)
1349{
1350 struct sf_buf *sf;
1351
1352 sf = dtosf(addr);
1353 if (sf->refcnt == 0)
1354 panic("sf_buf_ref: referencing a free sf_buf");
1355 sf->refcnt++;
1356}
1357
1358/*
1359 * Lose a reference to an sf_buf. When none left, detach mapped page
1360 * and release resources back to the system.
1361 *
1362 * Must be called at splimp.
1363 */
1364static void
1365sf_buf_free(caddr_t addr, u_int size)
1366{
1367 struct sf_buf *sf;
1368 struct vm_page *m;
1369 int s;
1370
1371 sf = dtosf(addr);
1372 if (sf->refcnt == 0)
1373 panic("sf_buf_free: freeing free sf_buf");
1374 sf->refcnt--;
1375 if (sf->refcnt == 0) {
1376 pmap_qremove((vm_offset_t)addr, 1);
1377 m = sf->m;
1378 s = splvm();
1379 vm_page_unwire(m, 0);
1380 /*
1381 * Check for the object going away on us. This can
1382 * happen since we don't hold a reference to it.
1383 * If so, we're responsible for freeing the page.
1384 */
1385 if (m->wire_count == 0 && m->object == NULL)
1386 vm_page_free(m);
1387 splx(s);
1388 sf->m = NULL;
1389 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
1390 if (sf_buf_alloc_want) {
1391 sf_buf_alloc_want = 0;
1392 wakeup(&sf_freelist);
1393 }
1394 }
1395}
1396
1397/*
1398 * sendfile(2).
1399 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1400 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1401 *
1402 * Send a file specified by 'fd' and starting at 'offset' to a socket
1403 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1404 * nbytes == 0. Optionally add a header and/or trailer to the socket
1405 * output. If specified, write the total number of bytes sent into *sbytes.
1406 */
1407int
1408sendfile(struct proc *p, struct sendfile_args *uap)
1409{
1410 struct file *fp;
1411 struct filedesc *fdp = p->p_fd;
1412 struct vnode *vp;
1413 struct vm_object *obj;
1414 struct socket *so;
1415 struct mbuf *m;
1416 struct sf_buf *sf;
1417 struct vm_page *pg;
1418 struct writev_args nuap;
1419 struct sf_hdtr hdtr;
1420 off_t off, xfsize, sbytes = 0;
1421 int error = 0, s;
1422
1423 /*
1424 * Do argument checking. Must be a regular file in, stream
1425 * type and connected socket out, positive offset.
1426 */
1427 if (((u_int)uap->fd) >= fdp->fd_nfiles ||
1428 (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
1429 (fp->f_flag & FREAD) == 0) {
1430 error = EBADF;
1431 goto done;
1432 }
1433 if (fp->f_type != DTYPE_VNODE) {
1434 error = EINVAL;
1435 goto done;
1436 }
1437 vp = (struct vnode *)fp->f_data;
1438 obj = vp->v_object;
1439 if (vp->v_type != VREG || obj == NULL) {
1440 error = EINVAL;
1441 goto done;
1442 }
1443 error = getsock(p->p_fd, uap->s, &fp);
1444 if (error)
1445 goto done;
1446 so = (struct socket *)fp->f_data;
1447 if (so->so_type != SOCK_STREAM) {
1448 error = EINVAL;
1449 goto done;
1450 }
1451 if ((so->so_state & SS_ISCONNECTED) == 0) {
1452 error = ENOTCONN;
1453 goto done;
1454 }
1455 if (uap->offset < 0) {
1456 error = EINVAL;
1457 goto done;
1458 }
1459
1460 /*
1461 * If specified, get the pointer to the sf_hdtr struct for
1462 * any headers/trailers.
1463 */
1464 if (uap->hdtr != NULL) {
1465 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1466 if (error)
1467 goto done;
1468 /*
1469 * Send any headers. Wimp out and use writev(2).
1470 */
1471 if (hdtr.headers != NULL) {
1472 nuap.fd = uap->s;
1473 nuap.iovp = hdtr.headers;
1474 nuap.iovcnt = hdtr.hdr_cnt;
1475 error = writev(p, &nuap);
1476 if (error)
1477 goto done;
1478 sbytes += p->p_retval[0];
1479 }
1480 }
1481
1482 /*
1483 * Protect against multiple writers to the socket.
1484 */
1485 (void) sblock(&so->so_snd, M_WAITOK);
1486
1487 /*
1488 * Loop through the pages in the file, starting with the requested
1489 * offset. Get a file page (do I/O if necessary), map the file page
1490 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1491 * it on the socket.
1492 */
1493 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1494 vm_pindex_t pindex;
1495 vm_offset_t pgoff;
1496
1497 pindex = OFF_TO_IDX(off);
1498retry_lookup:
1499 /*
1500 * Calculate the amount to transfer. Not to exceed a page,
1501 * the EOF, or the passed in nbytes.
1502 */
1503 xfsize = obj->un_pager.vnp.vnp_size - off;
1504 if (xfsize > PAGE_SIZE)
1505 xfsize = PAGE_SIZE;
1506 pgoff = (vm_offset_t)(off & PAGE_MASK);
1507 if (PAGE_SIZE - pgoff < xfsize)
1508 xfsize = PAGE_SIZE - pgoff;
1509 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1510 xfsize = uap->nbytes - sbytes;
1511 if (xfsize <= 0)
1512 break;
1513 /*
1514 * Optimize the non-blocking case by looking at the socket space
1515 * before going to the extra work of constituting the sf_buf.
1516 */
1517 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1518 if (so->so_state & SS_CANTSENDMORE)
1519 error = EPIPE;
1520 else
1521 error = EAGAIN;
1522 sbunlock(&so->so_snd);
1523 goto done;
1524 }
1525 /*
1526 * Attempt to look up the page.
1527 *
1528 * Allocate if not found
1529 *
1530 * Wait and loop if busy.
1531 */
1532 pg = vm_page_lookup(obj, pindex);
1533
1534 if (pg == NULL) {
1535 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1536 if (pg == NULL) {
1537 VM_WAIT;
1538 goto retry_lookup;
1539 }
1540 vm_page_wakeup(pg);
1541 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) {
1542 goto retry_lookup;
1543 }
1544
1545 /*
1546 * Wire the page so it does not get ripped out from under
1547 * us.
1548 */
1549
1550 vm_page_wire(pg);
1551
1552 /*
1553 * If page is not valid for what we need, initiate I/O
1554 */
1555
1556 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1557 struct uio auio;
1558 struct iovec aiov;
1559 int bsize;
1560
1561 /*
1562 * Ensure that our page is still around when the I/O
1563 * completes.
1564 */
1565 vm_page_io_start(pg);
1566
1567 /*
1568 * Get the page from backing store.
1569 */
1570 bsize = vp->v_mount->mnt_stat.f_iosize;
1571 auio.uio_iov = &aiov;
1572 auio.uio_iovcnt = 1;
1573 aiov.iov_base = 0;
1574 aiov.iov_len = MAXBSIZE;
1575 auio.uio_resid = MAXBSIZE;
1576 auio.uio_offset = trunc_page(off);
1577 auio.uio_segflg = UIO_NOCOPY;
1578 auio.uio_rw = UIO_READ;
1579 auio.uio_procp = p;
1580 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
1581 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1582 p->p_ucred);
1583 VOP_UNLOCK(vp, 0, p);
1584 vm_page_flag_clear(pg, PG_ZERO);
1585 vm_page_io_finish(pg);
1586 if (error) {
1587 vm_page_unwire(pg, 0);
1588 /*
1589 * See if anyone else might know about this page.
1590 * If not and it is not valid, then free it.
1591 */
1592 if (pg->wire_count == 0 && pg->valid == 0 &&
1593 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1594 pg->hold_count == 0)
1595 vm_page_free(pg);
1596 sbunlock(&so->so_snd);
1597 goto done;
1598 }
1599 }
1600
1601 /*
1602 * Allocate a kernel virtual page and insert the physical page
1603 * into it.
1604 */
1605
1606 sf = sf_buf_alloc();
1607 sf->m = pg;
1608 pmap_qenter(sf->kva, &pg, 1);
1609 /*
1610 * Get an mbuf header and set it up as having external storage.
1611 */
1612 MGETHDR(m, M_WAIT, MT_DATA);
1613 m->m_ext.ext_free = sf_buf_free;
1614 m->m_ext.ext_ref = sf_buf_ref;
1615 m->m_ext.ext_buf = (void *)sf->kva;
1616 m->m_ext.ext_size = PAGE_SIZE;
1617 m->m_data = (char *) sf->kva + pgoff;
1618 m->m_flags |= M_EXT;
1619 m->m_pkthdr.len = m->m_len = xfsize;
1620 /*
1621 * Add the buffer to the socket buffer chain.
1622 */
1623 s = splnet();
1624retry_space:
1625 /*
1626 * Make sure that the socket is still able to take more data.
1627 * CANTSENDMORE being true usually means that the connection
1628 * was closed. so_error is true when an error was sensed after
1629 * a previous send.
1630 * The state is checked after the page mapping and buffer
1631 * allocation above since those operations may block and make
1632 * any socket checks stale. From this point forward, nothing
1633 * blocks before the pru_send (or more accurately, any blocking
1634 * results in a loop back to here to re-check).
1635 */
1636 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1637 if (so->so_state & SS_CANTSENDMORE) {
1638 error = EPIPE;
1639 } else {
1640 error = so->so_error;
1641 so->so_error = 0;
1642 }
1643 m_freem(m);
1644 sbunlock(&so->so_snd);
1645 splx(s);
1646 goto done;
1647 }
1648 /*
1649 * Wait for socket space to become available. We do this just
1650 * after checking the connection state above in order to avoid
1651 * a race condition with sbwait().
1652 */
1653 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1654 if (so->so_state & SS_NBIO) {
1655 m_freem(m);
1656 sbunlock(&so->so_snd);
1657 splx(s);
1658 error = EAGAIN;
1659 goto done;
1660 }
1661 error = sbwait(&so->so_snd);
1662 /*
1663 * An error from sbwait usually indicates that we've
1664 * been interrupted by a signal. If we've sent anything
1665 * then return bytes sent, otherwise return the error.
1666 */
1667 if (error) {
1668 m_freem(m);
1669 sbunlock(&so->so_snd);
1670 splx(s);
1671 goto done;
1672 }
1673 goto retry_space;
1674 }
1675 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
1676 splx(s);
1677 if (error) {
1678 sbunlock(&so->so_snd);
1679 goto done;
1680 }
1681 }
1682 sbunlock(&so->so_snd);
1683
1684 /*
1685 * Send trailers. Wimp out and use writev(2).
1686 */
1687 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1688 nuap.fd = uap->s;
1689 nuap.iovp = hdtr.trailers;
1690 nuap.iovcnt = hdtr.trl_cnt;
1691 error = writev(p, &nuap);
1692 if (error)
1693 goto done;
1694 sbytes += p->p_retval[0];
1695 }
1696
1697done:
1698 if (uap->sbytes != NULL) {
1699 copyout(&sbytes, uap->sbytes, sizeof(off_t));
1700 }
1701 return (error);
1702}