Deleted Added
full compact
uipc_socket.c (50477) uipc_socket.c (51381)
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
34 * $FreeBSD: head/sys/kern/uipc_socket.c 50477 1999-08-28 01:08:13Z peter $
34 * $FreeBSD: head/sys/kern/uipc_socket.c 51381 1999-09-19 02:17:02Z green $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/fcntl.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/domain.h>
43#include <sys/kernel.h>
44#include <sys/malloc.h>
45#include <sys/poll.h>
46#include <sys/proc.h>
47#include <sys/protosw.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/resourcevar.h>
51#include <sys/signalvar.h>
52#include <sys/sysctl.h>
53#include <sys/uio.h>
54#include <vm/vm_zone.h>
55
56#include <machine/limits.h>
57
58struct vm_zone *socket_zone;
59so_gen_t so_gencnt; /* generation count for sockets */
60
61MALLOC_DEFINE(M_SONAME, "soname", "socket name");
62MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
63
64SYSCTL_DECL(_kern_ipc);
65
66static int somaxconn = SOMAXCONN;
67SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW,
68 &somaxconn, 0, "Maximum pending socket connection queue size");
69
70/*
71 * Socket operation routines.
72 * These routines are called by the routines in
73 * sys_socket.c or from a system process, and
74 * implement the semantics of socket operations by
75 * switching out to the protocol specific routines.
76 */
77
78/*
79 * Get a socket structure from our zone, and initialize it.
80 * We don't implement `waitok' yet (see comments in uipc_domain.c).
81 * Note that it would probably be better to allocate socket
82 * and PCB at the same time, but I'm not convinced that all
83 * the protocols can be easily modified to do this.
84 */
85struct socket *
86soalloc(waitok)
87 int waitok;
88{
89 struct socket *so;
90
91 so = zalloci(socket_zone);
92 if (so) {
93 /* XXX race condition for reentrant kernel */
94 bzero(so, sizeof *so);
95 so->so_gencnt = ++so_gencnt;
96 so->so_zone = socket_zone;
97 }
98 return so;
99}
100
101int
102socreate(dom, aso, type, proto, p)
103 int dom;
104 struct socket **aso;
105 register int type;
106 int proto;
107 struct proc *p;
108{
109 register struct protosw *prp;
110 register struct socket *so;
111 register int error;
112
113 if (proto)
114 prp = pffindproto(dom, proto, type);
115 else
116 prp = pffindtype(dom, type);
117 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
118 return (EPROTONOSUPPORT);
119 if (prp->pr_type != type)
120 return (EPROTOTYPE);
121 so = soalloc(p != 0);
122 if (so == 0)
123 return (ENOBUFS);
124
125 TAILQ_INIT(&so->so_incomp);
126 TAILQ_INIT(&so->so_comp);
127 so->so_type = type;
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/fcntl.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/domain.h>
43#include <sys/kernel.h>
44#include <sys/malloc.h>
45#include <sys/poll.h>
46#include <sys/proc.h>
47#include <sys/protosw.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/resourcevar.h>
51#include <sys/signalvar.h>
52#include <sys/sysctl.h>
53#include <sys/uio.h>
54#include <vm/vm_zone.h>
55
56#include <machine/limits.h>
57
58struct vm_zone *socket_zone;
59so_gen_t so_gencnt; /* generation count for sockets */
60
61MALLOC_DEFINE(M_SONAME, "soname", "socket name");
62MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
63
64SYSCTL_DECL(_kern_ipc);
65
66static int somaxconn = SOMAXCONN;
67SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW,
68 &somaxconn, 0, "Maximum pending socket connection queue size");
69
70/*
71 * Socket operation routines.
72 * These routines are called by the routines in
73 * sys_socket.c or from a system process, and
74 * implement the semantics of socket operations by
75 * switching out to the protocol specific routines.
76 */
77
78/*
79 * Get a socket structure from our zone, and initialize it.
80 * We don't implement `waitok' yet (see comments in uipc_domain.c).
81 * Note that it would probably be better to allocate socket
82 * and PCB at the same time, but I'm not convinced that all
83 * the protocols can be easily modified to do this.
84 */
85struct socket *
86soalloc(waitok)
87 int waitok;
88{
89 struct socket *so;
90
91 so = zalloci(socket_zone);
92 if (so) {
93 /* XXX race condition for reentrant kernel */
94 bzero(so, sizeof *so);
95 so->so_gencnt = ++so_gencnt;
96 so->so_zone = socket_zone;
97 }
98 return so;
99}
100
101int
102socreate(dom, aso, type, proto, p)
103 int dom;
104 struct socket **aso;
105 register int type;
106 int proto;
107 struct proc *p;
108{
109 register struct protosw *prp;
110 register struct socket *so;
111 register int error;
112
113 if (proto)
114 prp = pffindproto(dom, proto, type);
115 else
116 prp = pffindtype(dom, type);
117 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
118 return (EPROTONOSUPPORT);
119 if (prp->pr_type != type)
120 return (EPROTOTYPE);
121 so = soalloc(p != 0);
122 if (so == 0)
123 return (ENOBUFS);
124
125 TAILQ_INIT(&so->so_incomp);
126 TAILQ_INIT(&so->so_comp);
127 so->so_type = type;
128 if (p) {
129 so->so_cred = p->p_cred;
130 so->so_cred->p_refcnt++;
131 } else so->so_cred = NULL;
128 so->so_cred = p->p_ucred;
129 crhold(so->so_cred);
132 so->so_proto = prp;
133 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
134 if (error) {
135 so->so_state |= SS_NOFDREF;
136 sofree(so);
137 return (error);
138 }
139 *aso = so;
140 return (0);
141}
142
143int
144sobind(so, nam, p)
145 struct socket *so;
146 struct sockaddr *nam;
147 struct proc *p;
148{
149 int s = splnet();
150 int error;
151
152 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
153 splx(s);
154 return (error);
155}
156
157void
158sodealloc(so)
159 struct socket *so;
160{
130 so->so_proto = prp;
131 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
132 if (error) {
133 so->so_state |= SS_NOFDREF;
134 sofree(so);
135 return (error);
136 }
137 *aso = so;
138 return (0);
139}
140
141int
142sobind(so, nam, p)
143 struct socket *so;
144 struct sockaddr *nam;
145 struct proc *p;
146{
147 int s = splnet();
148 int error;
149
150 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
151 splx(s);
152 return (error);
153}
154
155void
156sodealloc(so)
157 struct socket *so;
158{
159
161 so->so_gencnt = ++so_gencnt;
160 so->so_gencnt = ++so_gencnt;
162 if (so->so_cred && --so->so_cred->p_refcnt == 0) {
163 crfree(so->so_cred->pc_ucred);
164 FREE(so->so_cred, M_SUBPROC);
165 }
161 crfree(so->so_cred);
166 zfreei(so->so_zone, so);
167}
168
169int
170solisten(so, backlog, p)
171 register struct socket *so;
172 int backlog;
173 struct proc *p;
174{
175 int s, error;
176
177 s = splnet();
178 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
179 if (error) {
180 splx(s);
181 return (error);
182 }
183 if (so->so_comp.tqh_first == NULL)
184 so->so_options |= SO_ACCEPTCONN;
185 if (backlog < 0 || backlog > somaxconn)
186 backlog = somaxconn;
187 so->so_qlimit = backlog;
188 splx(s);
189 return (0);
190}
191
192void
193sofree(so)
194 register struct socket *so;
195{
196 struct socket *head = so->so_head;
197
198 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
199 return;
200 if (head != NULL) {
201 if (so->so_state & SS_INCOMP) {
202 TAILQ_REMOVE(&head->so_incomp, so, so_list);
203 head->so_incqlen--;
204 } else if (so->so_state & SS_COMP) {
205 /*
206 * We must not decommission a socket that's
207 * on the accept(2) queue. If we do, then
208 * accept(2) may hang after select(2) indicated
209 * that the listening socket was ready.
210 */
211 return;
212 } else {
213 panic("sofree: not queued");
214 }
215 head->so_qlen--;
216 so->so_state &= ~SS_INCOMP;
217 so->so_head = NULL;
218 }
219 sbrelease(&so->so_snd);
220 sorflush(so);
221 sodealloc(so);
222}
223
224/*
225 * Close a socket on last file table reference removal.
226 * Initiate disconnect if connected.
227 * Free socket when disconnect complete.
228 */
229int
230soclose(so)
231 register struct socket *so;
232{
233 int s = splnet(); /* conservative */
234 int error = 0;
235
236 funsetown(so->so_sigio);
237 if (so->so_options & SO_ACCEPTCONN) {
238 struct socket *sp, *sonext;
239
240 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) {
241 sonext = sp->so_list.tqe_next;
242 (void) soabort(sp);
243 }
244 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) {
245 sonext = sp->so_list.tqe_next;
246 /* Dequeue from so_comp since sofree() won't do it */
247 TAILQ_REMOVE(&so->so_comp, sp, so_list);
248 so->so_qlen--;
249 sp->so_state &= ~SS_COMP;
250 sp->so_head = NULL;
251 (void) soabort(sp);
252 }
253 }
254 if (so->so_pcb == 0)
255 goto discard;
256 if (so->so_state & SS_ISCONNECTED) {
257 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
258 error = sodisconnect(so);
259 if (error)
260 goto drop;
261 }
262 if (so->so_options & SO_LINGER) {
263 if ((so->so_state & SS_ISDISCONNECTING) &&
264 (so->so_state & SS_NBIO))
265 goto drop;
266 while (so->so_state & SS_ISCONNECTED) {
267 error = tsleep((caddr_t)&so->so_timeo,
268 PSOCK | PCATCH, "soclos", so->so_linger * hz);
269 if (error)
270 break;
271 }
272 }
273 }
274drop:
275 if (so->so_pcb) {
276 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
277 if (error == 0)
278 error = error2;
279 }
280discard:
281 if (so->so_state & SS_NOFDREF)
282 panic("soclose: NOFDREF");
283 so->so_state |= SS_NOFDREF;
284 sofree(so);
285 splx(s);
286 return (error);
287}
288
289/*
290 * Must be called at splnet...
291 */
292int
293soabort(so)
294 struct socket *so;
295{
296
297 return (*so->so_proto->pr_usrreqs->pru_abort)(so);
298}
299
300int
301soaccept(so, nam)
302 register struct socket *so;
303 struct sockaddr **nam;
304{
305 int s = splnet();
306 int error;
307
308 if ((so->so_state & SS_NOFDREF) == 0)
309 panic("soaccept: !NOFDREF");
310 so->so_state &= ~SS_NOFDREF;
311 if ((so->so_state & SS_ISDISCONNECTED) == 0)
312 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
313 else {
314 if (nam)
315 *nam = 0;
316 error = 0;
317 }
318 splx(s);
319 return (error);
320}
321
322int
323soconnect(so, nam, p)
324 register struct socket *so;
325 struct sockaddr *nam;
326 struct proc *p;
327{
328 int s;
329 int error;
330
331 if (so->so_options & SO_ACCEPTCONN)
332 return (EOPNOTSUPP);
333 s = splnet();
334 /*
335 * If protocol is connection-based, can only connect once.
336 * Otherwise, if connected, try to disconnect first.
337 * This allows user to disconnect by connecting to, e.g.,
338 * a null address.
339 */
340 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
341 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
342 (error = sodisconnect(so))))
343 error = EISCONN;
344 else
345 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
346 splx(s);
347 return (error);
348}
349
350int
351soconnect2(so1, so2)
352 register struct socket *so1;
353 struct socket *so2;
354{
355 int s = splnet();
356 int error;
357
358 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
359 splx(s);
360 return (error);
361}
362
363int
364sodisconnect(so)
365 register struct socket *so;
366{
367 int s = splnet();
368 int error;
369
370 if ((so->so_state & SS_ISCONNECTED) == 0) {
371 error = ENOTCONN;
372 goto bad;
373 }
374 if (so->so_state & SS_ISDISCONNECTING) {
375 error = EALREADY;
376 goto bad;
377 }
378 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
379bad:
380 splx(s);
381 return (error);
382}
383
384#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
385/*
386 * Send on a socket.
387 * If send must go all at once and message is larger than
388 * send buffering, then hard error.
389 * Lock against other senders.
390 * If must go all at once and not enough room now, then
391 * inform user that this would block and do nothing.
392 * Otherwise, if nonblocking, send as much as possible.
393 * The data to be sent is described by "uio" if nonzero,
394 * otherwise by the mbuf chain "top" (which must be null
395 * if uio is not). Data provided in mbuf chain must be small
396 * enough to send all at once.
397 *
398 * Returns nonzero on error, timeout or signal; callers
399 * must check for short counts if EINTR/ERESTART are returned.
400 * Data and control buffers are freed on return.
401 */
402int
403sosend(so, addr, uio, top, control, flags, p)
404 register struct socket *so;
405 struct sockaddr *addr;
406 struct uio *uio;
407 struct mbuf *top;
408 struct mbuf *control;
409 int flags;
410 struct proc *p;
411{
412 struct mbuf **mp;
413 register struct mbuf *m;
414 register long space, len, resid;
415 int clen = 0, error, s, dontroute, mlen;
416 int atomic = sosendallatonce(so) || top;
417
418 if (uio)
419 resid = uio->uio_resid;
420 else
421 resid = top->m_pkthdr.len;
422 /*
423 * In theory resid should be unsigned.
424 * However, space must be signed, as it might be less than 0
425 * if we over-committed, and we must use a signed comparison
426 * of space and resid. On the other hand, a negative resid
427 * causes us to loop sending 0-length segments to the protocol.
428 *
429 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
430 * type sockets since that's an error.
431 */
432 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
433 error = EINVAL;
434 goto out;
435 }
436
437 dontroute =
438 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
439 (so->so_proto->pr_flags & PR_ATOMIC);
440 if (p)
441 p->p_stats->p_ru.ru_msgsnd++;
442 if (control)
443 clen = control->m_len;
444#define snderr(errno) { error = errno; splx(s); goto release; }
445
446restart:
447 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
448 if (error)
449 goto out;
450 do {
451 s = splnet();
452 if (so->so_state & SS_CANTSENDMORE)
453 snderr(EPIPE);
454 if (so->so_error) {
455 error = so->so_error;
456 so->so_error = 0;
457 splx(s);
458 goto release;
459 }
460 if ((so->so_state & SS_ISCONNECTED) == 0) {
461 /*
462 * `sendto' and `sendmsg' is allowed on a connection-
463 * based socket if it supports implied connect.
464 * Return ENOTCONN if not connected and no address is
465 * supplied.
466 */
467 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
468 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
469 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
470 !(resid == 0 && clen != 0))
471 snderr(ENOTCONN);
472 } else if (addr == 0)
473 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
474 ENOTCONN : EDESTADDRREQ);
475 }
476 space = sbspace(&so->so_snd);
477 if (flags & MSG_OOB)
478 space += 1024;
479 if ((atomic && resid > so->so_snd.sb_hiwat) ||
480 clen > so->so_snd.sb_hiwat)
481 snderr(EMSGSIZE);
482 if (space < resid + clen && uio &&
483 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
484 if (so->so_state & SS_NBIO)
485 snderr(EWOULDBLOCK);
486 sbunlock(&so->so_snd);
487 error = sbwait(&so->so_snd);
488 splx(s);
489 if (error)
490 goto out;
491 goto restart;
492 }
493 splx(s);
494 mp = &top;
495 space -= clen;
496 do {
497 if (uio == NULL) {
498 /*
499 * Data is prepackaged in "top".
500 */
501 resid = 0;
502 if (flags & MSG_EOR)
503 top->m_flags |= M_EOR;
504 } else do {
505 if (top == 0) {
506 MGETHDR(m, M_WAIT, MT_DATA);
507 mlen = MHLEN;
508 m->m_pkthdr.len = 0;
509 m->m_pkthdr.rcvif = (struct ifnet *)0;
510 } else {
511 MGET(m, M_WAIT, MT_DATA);
512 mlen = MLEN;
513 }
514 if (resid >= MINCLSIZE) {
515 MCLGET(m, M_WAIT);
516 if ((m->m_flags & M_EXT) == 0)
517 goto nopages;
518 mlen = MCLBYTES;
519 len = min(min(mlen, resid), space);
520 } else {
521nopages:
522 len = min(min(mlen, resid), space);
523 /*
524 * For datagram protocols, leave room
525 * for protocol headers in first mbuf.
526 */
527 if (atomic && top == 0 && len < mlen)
528 MH_ALIGN(m, len);
529 }
530 space -= len;
531 error = uiomove(mtod(m, caddr_t), (int)len, uio);
532 resid = uio->uio_resid;
533 m->m_len = len;
534 *mp = m;
535 top->m_pkthdr.len += len;
536 if (error)
537 goto release;
538 mp = &m->m_next;
539 if (resid <= 0) {
540 if (flags & MSG_EOR)
541 top->m_flags |= M_EOR;
542 break;
543 }
544 } while (space > 0 && atomic);
545 if (dontroute)
546 so->so_options |= SO_DONTROUTE;
547 s = splnet(); /* XXX */
548 /*
549 * XXX all the SS_CANTSENDMORE checks previously
550 * done could be out of date. We could have recieved
551 * a reset packet in an interrupt or maybe we slept
552 * while doing page faults in uiomove() etc. We could
553 * probably recheck again inside the splnet() protection
554 * here, but there are probably other places that this
555 * also happens. We must rethink this.
556 */
557 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
558 (flags & MSG_OOB) ? PRUS_OOB :
559 /*
560 * If the user set MSG_EOF, the protocol
561 * understands this flag and nothing left to
562 * send then use PRU_SEND_EOF instead of PRU_SEND.
563 */
564 ((flags & MSG_EOF) &&
565 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
566 (resid <= 0)) ?
567 PRUS_EOF :
568 /* If there is more to send set PRUS_MORETOCOME */
569 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
570 top, addr, control, p);
571 splx(s);
572 if (dontroute)
573 so->so_options &= ~SO_DONTROUTE;
574 clen = 0;
575 control = 0;
576 top = 0;
577 mp = &top;
578 if (error)
579 goto release;
580 } while (resid && space > 0);
581 } while (resid);
582
583release:
584 sbunlock(&so->so_snd);
585out:
586 if (top)
587 m_freem(top);
588 if (control)
589 m_freem(control);
590 return (error);
591}
592
593/*
594 * Implement receive operations on a socket.
595 * We depend on the way that records are added to the sockbuf
596 * by sbappend*. In particular, each record (mbufs linked through m_next)
597 * must begin with an address if the protocol so specifies,
598 * followed by an optional mbuf or mbufs containing ancillary data,
599 * and then zero or more mbufs of data.
600 * In order to avoid blocking network interrupts for the entire time here,
601 * we splx() while doing the actual copy to user space.
602 * Although the sockbuf is locked, new data may still be appended,
603 * and thus we must maintain consistency of the sockbuf during that time.
604 *
605 * The caller may receive the data as a single mbuf chain by supplying
606 * an mbuf **mp0 for use in returning the chain. The uio is then used
607 * only for the count in uio_resid.
608 */
609int
610soreceive(so, psa, uio, mp0, controlp, flagsp)
611 register struct socket *so;
612 struct sockaddr **psa;
613 struct uio *uio;
614 struct mbuf **mp0;
615 struct mbuf **controlp;
616 int *flagsp;
617{
618 register struct mbuf *m, **mp;
619 register int flags, len, error, s, offset;
620 struct protosw *pr = so->so_proto;
621 struct mbuf *nextrecord;
622 int moff, type = 0;
623 int orig_resid = uio->uio_resid;
624
625 mp = mp0;
626 if (psa)
627 *psa = 0;
628 if (controlp)
629 *controlp = 0;
630 if (flagsp)
631 flags = *flagsp &~ MSG_EOR;
632 else
633 flags = 0;
634 if (flags & MSG_OOB) {
635 m = m_get(M_WAIT, MT_DATA);
636 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
637 if (error)
638 goto bad;
639 do {
640 error = uiomove(mtod(m, caddr_t),
641 (int) min(uio->uio_resid, m->m_len), uio);
642 m = m_free(m);
643 } while (uio->uio_resid && error == 0 && m);
644bad:
645 if (m)
646 m_freem(m);
647 return (error);
648 }
649 if (mp)
650 *mp = (struct mbuf *)0;
651 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
652 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
653
654restart:
655 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
656 if (error)
657 return (error);
658 s = splnet();
659
660 m = so->so_rcv.sb_mb;
661 /*
662 * If we have less data than requested, block awaiting more
663 * (subject to any timeout) if:
664 * 1. the current count is less than the low water mark, or
665 * 2. MSG_WAITALL is set, and it is possible to do the entire
666 * receive operation at once if we block (resid <= hiwat).
667 * 3. MSG_DONTWAIT is not set
668 * If MSG_WAITALL is set but resid is larger than the receive buffer,
669 * we have to do the receive in sections, and thus risk returning
670 * a short count if a timeout or signal occurs after we start.
671 */
672 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
673 so->so_rcv.sb_cc < uio->uio_resid) &&
674 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
675 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
676 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
677 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
678 if (so->so_error) {
679 if (m)
680 goto dontblock;
681 error = so->so_error;
682 if ((flags & MSG_PEEK) == 0)
683 so->so_error = 0;
684 goto release;
685 }
686 if (so->so_state & SS_CANTRCVMORE) {
687 if (m)
688 goto dontblock;
689 else
690 goto release;
691 }
692 for (; m; m = m->m_next)
693 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
694 m = so->so_rcv.sb_mb;
695 goto dontblock;
696 }
697 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
698 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
699 error = ENOTCONN;
700 goto release;
701 }
702 if (uio->uio_resid == 0)
703 goto release;
704 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
705 error = EWOULDBLOCK;
706 goto release;
707 }
708 sbunlock(&so->so_rcv);
709 error = sbwait(&so->so_rcv);
710 splx(s);
711 if (error)
712 return (error);
713 goto restart;
714 }
715dontblock:
716 if (uio->uio_procp)
717 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
718 nextrecord = m->m_nextpkt;
719 if (pr->pr_flags & PR_ADDR) {
720 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
721 orig_resid = 0;
722 if (psa)
723 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
724 mp0 == 0);
725 if (flags & MSG_PEEK) {
726 m = m->m_next;
727 } else {
728 sbfree(&so->so_rcv, m);
729 MFREE(m, so->so_rcv.sb_mb);
730 m = so->so_rcv.sb_mb;
731 }
732 }
733 while (m && m->m_type == MT_CONTROL && error == 0) {
734 if (flags & MSG_PEEK) {
735 if (controlp)
736 *controlp = m_copy(m, 0, m->m_len);
737 m = m->m_next;
738 } else {
739 sbfree(&so->so_rcv, m);
740 if (controlp) {
741 if (pr->pr_domain->dom_externalize &&
742 mtod(m, struct cmsghdr *)->cmsg_type ==
743 SCM_RIGHTS)
744 error = (*pr->pr_domain->dom_externalize)(m);
745 *controlp = m;
746 so->so_rcv.sb_mb = m->m_next;
747 m->m_next = 0;
748 m = so->so_rcv.sb_mb;
749 } else {
750 MFREE(m, so->so_rcv.sb_mb);
751 m = so->so_rcv.sb_mb;
752 }
753 }
754 if (controlp) {
755 orig_resid = 0;
756 controlp = &(*controlp)->m_next;
757 }
758 }
759 if (m) {
760 if ((flags & MSG_PEEK) == 0)
761 m->m_nextpkt = nextrecord;
762 type = m->m_type;
763 if (type == MT_OOBDATA)
764 flags |= MSG_OOB;
765 }
766 moff = 0;
767 offset = 0;
768 while (m && uio->uio_resid > 0 && error == 0) {
769 if (m->m_type == MT_OOBDATA) {
770 if (type != MT_OOBDATA)
771 break;
772 } else if (type == MT_OOBDATA)
773 break;
774 else
775 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
776 ("receive 3"));
777 so->so_state &= ~SS_RCVATMARK;
778 len = uio->uio_resid;
779 if (so->so_oobmark && len > so->so_oobmark - offset)
780 len = so->so_oobmark - offset;
781 if (len > m->m_len - moff)
782 len = m->m_len - moff;
783 /*
784 * If mp is set, just pass back the mbufs.
785 * Otherwise copy them out via the uio, then free.
786 * Sockbuf must be consistent here (points to current mbuf,
787 * it points to next record) when we drop priority;
788 * we must note any additions to the sockbuf when we
789 * block interrupts again.
790 */
791 if (mp == 0) {
792 splx(s);
793 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
794 s = splnet();
795 if (error)
796 goto release;
797 } else
798 uio->uio_resid -= len;
799 if (len == m->m_len - moff) {
800 if (m->m_flags & M_EOR)
801 flags |= MSG_EOR;
802 if (flags & MSG_PEEK) {
803 m = m->m_next;
804 moff = 0;
805 } else {
806 nextrecord = m->m_nextpkt;
807 sbfree(&so->so_rcv, m);
808 if (mp) {
809 *mp = m;
810 mp = &m->m_next;
811 so->so_rcv.sb_mb = m = m->m_next;
812 *mp = (struct mbuf *)0;
813 } else {
814 MFREE(m, so->so_rcv.sb_mb);
815 m = so->so_rcv.sb_mb;
816 }
817 if (m)
818 m->m_nextpkt = nextrecord;
819 }
820 } else {
821 if (flags & MSG_PEEK)
822 moff += len;
823 else {
824 if (mp)
825 *mp = m_copym(m, 0, len, M_WAIT);
826 m->m_data += len;
827 m->m_len -= len;
828 so->so_rcv.sb_cc -= len;
829 }
830 }
831 if (so->so_oobmark) {
832 if ((flags & MSG_PEEK) == 0) {
833 so->so_oobmark -= len;
834 if (so->so_oobmark == 0) {
835 so->so_state |= SS_RCVATMARK;
836 break;
837 }
838 } else {
839 offset += len;
840 if (offset == so->so_oobmark)
841 break;
842 }
843 }
844 if (flags & MSG_EOR)
845 break;
846 /*
847 * If the MSG_WAITALL flag is set (for non-atomic socket),
848 * we must not quit until "uio->uio_resid == 0" or an error
849 * termination. If a signal/timeout occurs, return
850 * with a short count but without error.
851 * Keep sockbuf locked against other readers.
852 */
853 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
854 !sosendallatonce(so) && !nextrecord) {
855 if (so->so_error || so->so_state & SS_CANTRCVMORE)
856 break;
857 error = sbwait(&so->so_rcv);
858 if (error) {
859 sbunlock(&so->so_rcv);
860 splx(s);
861 return (0);
862 }
863 m = so->so_rcv.sb_mb;
864 if (m)
865 nextrecord = m->m_nextpkt;
866 }
867 }
868
869 if (m && pr->pr_flags & PR_ATOMIC) {
870 flags |= MSG_TRUNC;
871 if ((flags & MSG_PEEK) == 0)
872 (void) sbdroprecord(&so->so_rcv);
873 }
874 if ((flags & MSG_PEEK) == 0) {
875 if (m == 0)
876 so->so_rcv.sb_mb = nextrecord;
877 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
878 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
879 }
880 if (orig_resid == uio->uio_resid && orig_resid &&
881 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
882 sbunlock(&so->so_rcv);
883 splx(s);
884 goto restart;
885 }
886
887 if (flagsp)
888 *flagsp |= flags;
889release:
890 sbunlock(&so->so_rcv);
891 splx(s);
892 return (error);
893}
894
895int
896soshutdown(so, how)
897 register struct socket *so;
898 register int how;
899{
900 register struct protosw *pr = so->so_proto;
901
902 how++;
903 if (how & FREAD)
904 sorflush(so);
905 if (how & FWRITE)
906 return ((*pr->pr_usrreqs->pru_shutdown)(so));
907 return (0);
908}
909
910void
911sorflush(so)
912 register struct socket *so;
913{
914 register struct sockbuf *sb = &so->so_rcv;
915 register struct protosw *pr = so->so_proto;
916 register int s;
917 struct sockbuf asb;
918
919 sb->sb_flags |= SB_NOINTR;
920 (void) sblock(sb, M_WAITOK);
921 s = splimp();
922 socantrcvmore(so);
923 sbunlock(sb);
924 asb = *sb;
925 bzero((caddr_t)sb, sizeof (*sb));
926 splx(s);
927 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
928 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
929 sbrelease(&asb);
930}
931
932/*
933 * Perhaps this routine, and sooptcopyout(), below, ought to come in
934 * an additional variant to handle the case where the option value needs
935 * to be some kind of integer, but not a specific size.
936 * In addition to their use here, these functions are also called by the
937 * protocol-level pr_ctloutput() routines.
938 */
939int
940sooptcopyin(sopt, buf, len, minlen)
941 struct sockopt *sopt;
942 void *buf;
943 size_t len;
944 size_t minlen;
945{
946 size_t valsize;
947
948 /*
949 * If the user gives us more than we wanted, we ignore it,
950 * but if we don't get the minimum length the caller
951 * wants, we return EINVAL. On success, sopt->sopt_valsize
952 * is set to however much we actually retrieved.
953 */
954 if ((valsize = sopt->sopt_valsize) < minlen)
955 return EINVAL;
956 if (valsize > len)
957 sopt->sopt_valsize = valsize = len;
958
959 if (sopt->sopt_p != 0)
960 return (copyin(sopt->sopt_val, buf, valsize));
961
962 bcopy(sopt->sopt_val, buf, valsize);
963 return 0;
964}
965
966int
967sosetopt(so, sopt)
968 struct socket *so;
969 struct sockopt *sopt;
970{
971 int error, optval;
972 struct linger l;
973 struct timeval tv;
974 u_long val;
975
976 error = 0;
977 if (sopt->sopt_level != SOL_SOCKET) {
978 if (so->so_proto && so->so_proto->pr_ctloutput)
979 return ((*so->so_proto->pr_ctloutput)
980 (so, sopt));
981 error = ENOPROTOOPT;
982 } else {
983 switch (sopt->sopt_name) {
984 case SO_LINGER:
985 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
986 if (error)
987 goto bad;
988
989 so->so_linger = l.l_linger;
990 if (l.l_onoff)
991 so->so_options |= SO_LINGER;
992 else
993 so->so_options &= ~SO_LINGER;
994 break;
995
996 case SO_DEBUG:
997 case SO_KEEPALIVE:
998 case SO_DONTROUTE:
999 case SO_USELOOPBACK:
1000 case SO_BROADCAST:
1001 case SO_REUSEADDR:
1002 case SO_REUSEPORT:
1003 case SO_OOBINLINE:
1004 case SO_TIMESTAMP:
1005 error = sooptcopyin(sopt, &optval, sizeof optval,
1006 sizeof optval);
1007 if (error)
1008 goto bad;
1009 if (optval)
1010 so->so_options |= sopt->sopt_name;
1011 else
1012 so->so_options &= ~sopt->sopt_name;
1013 break;
1014
1015 case SO_SNDBUF:
1016 case SO_RCVBUF:
1017 case SO_SNDLOWAT:
1018 case SO_RCVLOWAT:
1019 error = sooptcopyin(sopt, &optval, sizeof optval,
1020 sizeof optval);
1021 if (error)
1022 goto bad;
1023
1024 /*
1025 * Values < 1 make no sense for any of these
1026 * options, so disallow them.
1027 */
1028 if (optval < 1) {
1029 error = EINVAL;
1030 goto bad;
1031 }
1032
1033 switch (sopt->sopt_name) {
1034 case SO_SNDBUF:
1035 case SO_RCVBUF:
1036 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
1037 &so->so_snd : &so->so_rcv,
1038 (u_long) optval) == 0) {
1039 error = ENOBUFS;
1040 goto bad;
1041 }
1042 break;
1043
1044 /*
1045 * Make sure the low-water is never greater than
1046 * the high-water.
1047 */
1048 case SO_SNDLOWAT:
1049 so->so_snd.sb_lowat =
1050 (optval > so->so_snd.sb_hiwat) ?
1051 so->so_snd.sb_hiwat : optval;
1052 break;
1053 case SO_RCVLOWAT:
1054 so->so_rcv.sb_lowat =
1055 (optval > so->so_rcv.sb_hiwat) ?
1056 so->so_rcv.sb_hiwat : optval;
1057 break;
1058 }
1059 break;
1060
1061 case SO_SNDTIMEO:
1062 case SO_RCVTIMEO:
1063 error = sooptcopyin(sopt, &tv, sizeof tv,
1064 sizeof tv);
1065 if (error)
1066 goto bad;
1067
1068 /* assert(hz > 0); */
1069 if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz ||
1070 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
1071 error = EDOM;
1072 goto bad;
1073 }
1074 /* assert(tick > 0); */
1075 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */
1076 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick;
1077 if (val > SHRT_MAX) {
1078 error = EDOM;
1079 goto bad;
1080 }
1081
1082 switch (sopt->sopt_name) {
1083 case SO_SNDTIMEO:
1084 so->so_snd.sb_timeo = val;
1085 break;
1086 case SO_RCVTIMEO:
1087 so->so_rcv.sb_timeo = val;
1088 break;
1089 }
1090 break;
1091
1092 default:
1093 error = ENOPROTOOPT;
1094 break;
1095 }
1096 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1097 (void) ((*so->so_proto->pr_ctloutput)
1098 (so, sopt));
1099 }
1100 }
1101bad:
1102 return (error);
1103}
1104
1105/* Helper routine for getsockopt */
1106int
1107sooptcopyout(sopt, buf, len)
1108 struct sockopt *sopt;
1109 void *buf;
1110 size_t len;
1111{
1112 int error;
1113 size_t valsize;
1114
1115 error = 0;
1116
1117 /*
1118 * Documented get behavior is that we always return a value,
1119 * possibly truncated to fit in the user's buffer.
1120 * Traditional behavior is that we always tell the user
1121 * precisely how much we copied, rather than something useful
1122 * like the total amount we had available for her.
1123 * Note that this interface is not idempotent; the entire answer must
1124 * generated ahead of time.
1125 */
1126 valsize = min(len, sopt->sopt_valsize);
1127 sopt->sopt_valsize = valsize;
1128 if (sopt->sopt_val != 0) {
1129 if (sopt->sopt_p != 0)
1130 error = copyout(buf, sopt->sopt_val, valsize);
1131 else
1132 bcopy(buf, sopt->sopt_val, valsize);
1133 }
1134 return error;
1135}
1136
1137int
1138sogetopt(so, sopt)
1139 struct socket *so;
1140 struct sockopt *sopt;
1141{
1142 int error, optval;
1143 struct linger l;
1144 struct timeval tv;
1145
1146 error = 0;
1147 if (sopt->sopt_level != SOL_SOCKET) {
1148 if (so->so_proto && so->so_proto->pr_ctloutput) {
1149 return ((*so->so_proto->pr_ctloutput)
1150 (so, sopt));
1151 } else
1152 return (ENOPROTOOPT);
1153 } else {
1154 switch (sopt->sopt_name) {
1155 case SO_LINGER:
1156 l.l_onoff = so->so_options & SO_LINGER;
1157 l.l_linger = so->so_linger;
1158 error = sooptcopyout(sopt, &l, sizeof l);
1159 break;
1160
1161 case SO_USELOOPBACK:
1162 case SO_DONTROUTE:
1163 case SO_DEBUG:
1164 case SO_KEEPALIVE:
1165 case SO_REUSEADDR:
1166 case SO_REUSEPORT:
1167 case SO_BROADCAST:
1168 case SO_OOBINLINE:
1169 case SO_TIMESTAMP:
1170 optval = so->so_options & sopt->sopt_name;
1171integer:
1172 error = sooptcopyout(sopt, &optval, sizeof optval);
1173 break;
1174
1175 case SO_TYPE:
1176 optval = so->so_type;
1177 goto integer;
1178
1179 case SO_ERROR:
1180 optval = so->so_error;
1181 so->so_error = 0;
1182 goto integer;
1183
1184 case SO_SNDBUF:
1185 optval = so->so_snd.sb_hiwat;
1186 goto integer;
1187
1188 case SO_RCVBUF:
1189 optval = so->so_rcv.sb_hiwat;
1190 goto integer;
1191
1192 case SO_SNDLOWAT:
1193 optval = so->so_snd.sb_lowat;
1194 goto integer;
1195
1196 case SO_RCVLOWAT:
1197 optval = so->so_rcv.sb_lowat;
1198 goto integer;
1199
1200 case SO_SNDTIMEO:
1201 case SO_RCVTIMEO:
1202 optval = (sopt->sopt_name == SO_SNDTIMEO ?
1203 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1204
1205 tv.tv_sec = optval / hz;
1206 tv.tv_usec = (optval % hz) * tick;
1207 error = sooptcopyout(sopt, &tv, sizeof tv);
1208 break;
1209
1210 default:
1211 error = ENOPROTOOPT;
1212 break;
1213 }
1214 return (error);
1215 }
1216}
1217
1218void
1219sohasoutofband(so)
1220 register struct socket *so;
1221{
1222 if (so->so_sigio != NULL)
1223 pgsigio(so->so_sigio, SIGURG, 0);
1224 selwakeup(&so->so_rcv.sb_sel);
1225}
1226
1227int
1228sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p)
1229{
1230 int revents = 0;
1231 int s = splnet();
1232
1233 if (events & (POLLIN | POLLRDNORM))
1234 if (soreadable(so))
1235 revents |= events & (POLLIN | POLLRDNORM);
1236
1237 if (events & (POLLOUT | POLLWRNORM))
1238 if (sowriteable(so))
1239 revents |= events & (POLLOUT | POLLWRNORM);
1240
1241 if (events & (POLLPRI | POLLRDBAND))
1242 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
1243 revents |= events & (POLLPRI | POLLRDBAND);
1244
1245 if (revents == 0) {
1246 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
1247 selrecord(p, &so->so_rcv.sb_sel);
1248 so->so_rcv.sb_flags |= SB_SEL;
1249 }
1250
1251 if (events & (POLLOUT | POLLWRNORM)) {
1252 selrecord(p, &so->so_snd.sb_sel);
1253 so->so_snd.sb_flags |= SB_SEL;
1254 }
1255 }
1256
1257 splx(s);
1258 return (revents);
1259}
162 zfreei(so->so_zone, so);
163}
164
165int
166solisten(so, backlog, p)
167 register struct socket *so;
168 int backlog;
169 struct proc *p;
170{
171 int s, error;
172
173 s = splnet();
174 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
175 if (error) {
176 splx(s);
177 return (error);
178 }
179 if (so->so_comp.tqh_first == NULL)
180 so->so_options |= SO_ACCEPTCONN;
181 if (backlog < 0 || backlog > somaxconn)
182 backlog = somaxconn;
183 so->so_qlimit = backlog;
184 splx(s);
185 return (0);
186}
187
188void
189sofree(so)
190 register struct socket *so;
191{
192 struct socket *head = so->so_head;
193
194 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
195 return;
196 if (head != NULL) {
197 if (so->so_state & SS_INCOMP) {
198 TAILQ_REMOVE(&head->so_incomp, so, so_list);
199 head->so_incqlen--;
200 } else if (so->so_state & SS_COMP) {
201 /*
202 * We must not decommission a socket that's
203 * on the accept(2) queue. If we do, then
204 * accept(2) may hang after select(2) indicated
205 * that the listening socket was ready.
206 */
207 return;
208 } else {
209 panic("sofree: not queued");
210 }
211 head->so_qlen--;
212 so->so_state &= ~SS_INCOMP;
213 so->so_head = NULL;
214 }
215 sbrelease(&so->so_snd);
216 sorflush(so);
217 sodealloc(so);
218}
219
220/*
221 * Close a socket on last file table reference removal.
222 * Initiate disconnect if connected.
223 * Free socket when disconnect complete.
224 */
225int
226soclose(so)
227 register struct socket *so;
228{
229 int s = splnet(); /* conservative */
230 int error = 0;
231
232 funsetown(so->so_sigio);
233 if (so->so_options & SO_ACCEPTCONN) {
234 struct socket *sp, *sonext;
235
236 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) {
237 sonext = sp->so_list.tqe_next;
238 (void) soabort(sp);
239 }
240 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) {
241 sonext = sp->so_list.tqe_next;
242 /* Dequeue from so_comp since sofree() won't do it */
243 TAILQ_REMOVE(&so->so_comp, sp, so_list);
244 so->so_qlen--;
245 sp->so_state &= ~SS_COMP;
246 sp->so_head = NULL;
247 (void) soabort(sp);
248 }
249 }
250 if (so->so_pcb == 0)
251 goto discard;
252 if (so->so_state & SS_ISCONNECTED) {
253 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
254 error = sodisconnect(so);
255 if (error)
256 goto drop;
257 }
258 if (so->so_options & SO_LINGER) {
259 if ((so->so_state & SS_ISDISCONNECTING) &&
260 (so->so_state & SS_NBIO))
261 goto drop;
262 while (so->so_state & SS_ISCONNECTED) {
263 error = tsleep((caddr_t)&so->so_timeo,
264 PSOCK | PCATCH, "soclos", so->so_linger * hz);
265 if (error)
266 break;
267 }
268 }
269 }
270drop:
271 if (so->so_pcb) {
272 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
273 if (error == 0)
274 error = error2;
275 }
276discard:
277 if (so->so_state & SS_NOFDREF)
278 panic("soclose: NOFDREF");
279 so->so_state |= SS_NOFDREF;
280 sofree(so);
281 splx(s);
282 return (error);
283}
284
285/*
286 * Must be called at splnet...
287 */
288int
289soabort(so)
290 struct socket *so;
291{
292
293 return (*so->so_proto->pr_usrreqs->pru_abort)(so);
294}
295
296int
297soaccept(so, nam)
298 register struct socket *so;
299 struct sockaddr **nam;
300{
301 int s = splnet();
302 int error;
303
304 if ((so->so_state & SS_NOFDREF) == 0)
305 panic("soaccept: !NOFDREF");
306 so->so_state &= ~SS_NOFDREF;
307 if ((so->so_state & SS_ISDISCONNECTED) == 0)
308 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
309 else {
310 if (nam)
311 *nam = 0;
312 error = 0;
313 }
314 splx(s);
315 return (error);
316}
317
318int
319soconnect(so, nam, p)
320 register struct socket *so;
321 struct sockaddr *nam;
322 struct proc *p;
323{
324 int s;
325 int error;
326
327 if (so->so_options & SO_ACCEPTCONN)
328 return (EOPNOTSUPP);
329 s = splnet();
330 /*
331 * If protocol is connection-based, can only connect once.
332 * Otherwise, if connected, try to disconnect first.
333 * This allows user to disconnect by connecting to, e.g.,
334 * a null address.
335 */
336 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
337 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
338 (error = sodisconnect(so))))
339 error = EISCONN;
340 else
341 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
342 splx(s);
343 return (error);
344}
345
346int
347soconnect2(so1, so2)
348 register struct socket *so1;
349 struct socket *so2;
350{
351 int s = splnet();
352 int error;
353
354 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
355 splx(s);
356 return (error);
357}
358
359int
360sodisconnect(so)
361 register struct socket *so;
362{
363 int s = splnet();
364 int error;
365
366 if ((so->so_state & SS_ISCONNECTED) == 0) {
367 error = ENOTCONN;
368 goto bad;
369 }
370 if (so->so_state & SS_ISDISCONNECTING) {
371 error = EALREADY;
372 goto bad;
373 }
374 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
375bad:
376 splx(s);
377 return (error);
378}
379
380#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
381/*
382 * Send on a socket.
383 * If send must go all at once and message is larger than
384 * send buffering, then hard error.
385 * Lock against other senders.
386 * If must go all at once and not enough room now, then
387 * inform user that this would block and do nothing.
388 * Otherwise, if nonblocking, send as much as possible.
389 * The data to be sent is described by "uio" if nonzero,
390 * otherwise by the mbuf chain "top" (which must be null
391 * if uio is not). Data provided in mbuf chain must be small
392 * enough to send all at once.
393 *
394 * Returns nonzero on error, timeout or signal; callers
395 * must check for short counts if EINTR/ERESTART are returned.
396 * Data and control buffers are freed on return.
397 */
398int
399sosend(so, addr, uio, top, control, flags, p)
400 register struct socket *so;
401 struct sockaddr *addr;
402 struct uio *uio;
403 struct mbuf *top;
404 struct mbuf *control;
405 int flags;
406 struct proc *p;
407{
408 struct mbuf **mp;
409 register struct mbuf *m;
410 register long space, len, resid;
411 int clen = 0, error, s, dontroute, mlen;
412 int atomic = sosendallatonce(so) || top;
413
414 if (uio)
415 resid = uio->uio_resid;
416 else
417 resid = top->m_pkthdr.len;
418 /*
419 * In theory resid should be unsigned.
420 * However, space must be signed, as it might be less than 0
421 * if we over-committed, and we must use a signed comparison
422 * of space and resid. On the other hand, a negative resid
423 * causes us to loop sending 0-length segments to the protocol.
424 *
425 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
426 * type sockets since that's an error.
427 */
428 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
429 error = EINVAL;
430 goto out;
431 }
432
433 dontroute =
434 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
435 (so->so_proto->pr_flags & PR_ATOMIC);
436 if (p)
437 p->p_stats->p_ru.ru_msgsnd++;
438 if (control)
439 clen = control->m_len;
440#define snderr(errno) { error = errno; splx(s); goto release; }
441
442restart:
443 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
444 if (error)
445 goto out;
446 do {
447 s = splnet();
448 if (so->so_state & SS_CANTSENDMORE)
449 snderr(EPIPE);
450 if (so->so_error) {
451 error = so->so_error;
452 so->so_error = 0;
453 splx(s);
454 goto release;
455 }
456 if ((so->so_state & SS_ISCONNECTED) == 0) {
457 /*
458 * `sendto' and `sendmsg' is allowed on a connection-
459 * based socket if it supports implied connect.
460 * Return ENOTCONN if not connected and no address is
461 * supplied.
462 */
463 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
464 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
465 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
466 !(resid == 0 && clen != 0))
467 snderr(ENOTCONN);
468 } else if (addr == 0)
469 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
470 ENOTCONN : EDESTADDRREQ);
471 }
472 space = sbspace(&so->so_snd);
473 if (flags & MSG_OOB)
474 space += 1024;
475 if ((atomic && resid > so->so_snd.sb_hiwat) ||
476 clen > so->so_snd.sb_hiwat)
477 snderr(EMSGSIZE);
478 if (space < resid + clen && uio &&
479 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
480 if (so->so_state & SS_NBIO)
481 snderr(EWOULDBLOCK);
482 sbunlock(&so->so_snd);
483 error = sbwait(&so->so_snd);
484 splx(s);
485 if (error)
486 goto out;
487 goto restart;
488 }
489 splx(s);
490 mp = &top;
491 space -= clen;
492 do {
493 if (uio == NULL) {
494 /*
495 * Data is prepackaged in "top".
496 */
497 resid = 0;
498 if (flags & MSG_EOR)
499 top->m_flags |= M_EOR;
500 } else do {
501 if (top == 0) {
502 MGETHDR(m, M_WAIT, MT_DATA);
503 mlen = MHLEN;
504 m->m_pkthdr.len = 0;
505 m->m_pkthdr.rcvif = (struct ifnet *)0;
506 } else {
507 MGET(m, M_WAIT, MT_DATA);
508 mlen = MLEN;
509 }
510 if (resid >= MINCLSIZE) {
511 MCLGET(m, M_WAIT);
512 if ((m->m_flags & M_EXT) == 0)
513 goto nopages;
514 mlen = MCLBYTES;
515 len = min(min(mlen, resid), space);
516 } else {
517nopages:
518 len = min(min(mlen, resid), space);
519 /*
520 * For datagram protocols, leave room
521 * for protocol headers in first mbuf.
522 */
523 if (atomic && top == 0 && len < mlen)
524 MH_ALIGN(m, len);
525 }
526 space -= len;
527 error = uiomove(mtod(m, caddr_t), (int)len, uio);
528 resid = uio->uio_resid;
529 m->m_len = len;
530 *mp = m;
531 top->m_pkthdr.len += len;
532 if (error)
533 goto release;
534 mp = &m->m_next;
535 if (resid <= 0) {
536 if (flags & MSG_EOR)
537 top->m_flags |= M_EOR;
538 break;
539 }
540 } while (space > 0 && atomic);
541 if (dontroute)
542 so->so_options |= SO_DONTROUTE;
543 s = splnet(); /* XXX */
544 /*
545 * XXX all the SS_CANTSENDMORE checks previously
546 * done could be out of date. We could have recieved
547 * a reset packet in an interrupt or maybe we slept
548 * while doing page faults in uiomove() etc. We could
549 * probably recheck again inside the splnet() protection
550 * here, but there are probably other places that this
551 * also happens. We must rethink this.
552 */
553 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
554 (flags & MSG_OOB) ? PRUS_OOB :
555 /*
556 * If the user set MSG_EOF, the protocol
557 * understands this flag and nothing left to
558 * send then use PRU_SEND_EOF instead of PRU_SEND.
559 */
560 ((flags & MSG_EOF) &&
561 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
562 (resid <= 0)) ?
563 PRUS_EOF :
564 /* If there is more to send set PRUS_MORETOCOME */
565 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
566 top, addr, control, p);
567 splx(s);
568 if (dontroute)
569 so->so_options &= ~SO_DONTROUTE;
570 clen = 0;
571 control = 0;
572 top = 0;
573 mp = &top;
574 if (error)
575 goto release;
576 } while (resid && space > 0);
577 } while (resid);
578
579release:
580 sbunlock(&so->so_snd);
581out:
582 if (top)
583 m_freem(top);
584 if (control)
585 m_freem(control);
586 return (error);
587}
588
589/*
590 * Implement receive operations on a socket.
591 * We depend on the way that records are added to the sockbuf
592 * by sbappend*. In particular, each record (mbufs linked through m_next)
593 * must begin with an address if the protocol so specifies,
594 * followed by an optional mbuf or mbufs containing ancillary data,
595 * and then zero or more mbufs of data.
596 * In order to avoid blocking network interrupts for the entire time here,
597 * we splx() while doing the actual copy to user space.
598 * Although the sockbuf is locked, new data may still be appended,
599 * and thus we must maintain consistency of the sockbuf during that time.
600 *
601 * The caller may receive the data as a single mbuf chain by supplying
602 * an mbuf **mp0 for use in returning the chain. The uio is then used
603 * only for the count in uio_resid.
604 */
605int
606soreceive(so, psa, uio, mp0, controlp, flagsp)
607 register struct socket *so;
608 struct sockaddr **psa;
609 struct uio *uio;
610 struct mbuf **mp0;
611 struct mbuf **controlp;
612 int *flagsp;
613{
614 register struct mbuf *m, **mp;
615 register int flags, len, error, s, offset;
616 struct protosw *pr = so->so_proto;
617 struct mbuf *nextrecord;
618 int moff, type = 0;
619 int orig_resid = uio->uio_resid;
620
621 mp = mp0;
622 if (psa)
623 *psa = 0;
624 if (controlp)
625 *controlp = 0;
626 if (flagsp)
627 flags = *flagsp &~ MSG_EOR;
628 else
629 flags = 0;
630 if (flags & MSG_OOB) {
631 m = m_get(M_WAIT, MT_DATA);
632 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
633 if (error)
634 goto bad;
635 do {
636 error = uiomove(mtod(m, caddr_t),
637 (int) min(uio->uio_resid, m->m_len), uio);
638 m = m_free(m);
639 } while (uio->uio_resid && error == 0 && m);
640bad:
641 if (m)
642 m_freem(m);
643 return (error);
644 }
645 if (mp)
646 *mp = (struct mbuf *)0;
647 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
648 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
649
650restart:
651 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
652 if (error)
653 return (error);
654 s = splnet();
655
656 m = so->so_rcv.sb_mb;
657 /*
658 * If we have less data than requested, block awaiting more
659 * (subject to any timeout) if:
660 * 1. the current count is less than the low water mark, or
661 * 2. MSG_WAITALL is set, and it is possible to do the entire
662 * receive operation at once if we block (resid <= hiwat).
663 * 3. MSG_DONTWAIT is not set
664 * If MSG_WAITALL is set but resid is larger than the receive buffer,
665 * we have to do the receive in sections, and thus risk returning
666 * a short count if a timeout or signal occurs after we start.
667 */
668 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
669 so->so_rcv.sb_cc < uio->uio_resid) &&
670 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
671 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
672 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
673 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
674 if (so->so_error) {
675 if (m)
676 goto dontblock;
677 error = so->so_error;
678 if ((flags & MSG_PEEK) == 0)
679 so->so_error = 0;
680 goto release;
681 }
682 if (so->so_state & SS_CANTRCVMORE) {
683 if (m)
684 goto dontblock;
685 else
686 goto release;
687 }
688 for (; m; m = m->m_next)
689 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
690 m = so->so_rcv.sb_mb;
691 goto dontblock;
692 }
693 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
694 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
695 error = ENOTCONN;
696 goto release;
697 }
698 if (uio->uio_resid == 0)
699 goto release;
700 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
701 error = EWOULDBLOCK;
702 goto release;
703 }
704 sbunlock(&so->so_rcv);
705 error = sbwait(&so->so_rcv);
706 splx(s);
707 if (error)
708 return (error);
709 goto restart;
710 }
711dontblock:
712 if (uio->uio_procp)
713 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
714 nextrecord = m->m_nextpkt;
715 if (pr->pr_flags & PR_ADDR) {
716 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
717 orig_resid = 0;
718 if (psa)
719 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
720 mp0 == 0);
721 if (flags & MSG_PEEK) {
722 m = m->m_next;
723 } else {
724 sbfree(&so->so_rcv, m);
725 MFREE(m, so->so_rcv.sb_mb);
726 m = so->so_rcv.sb_mb;
727 }
728 }
729 while (m && m->m_type == MT_CONTROL && error == 0) {
730 if (flags & MSG_PEEK) {
731 if (controlp)
732 *controlp = m_copy(m, 0, m->m_len);
733 m = m->m_next;
734 } else {
735 sbfree(&so->so_rcv, m);
736 if (controlp) {
737 if (pr->pr_domain->dom_externalize &&
738 mtod(m, struct cmsghdr *)->cmsg_type ==
739 SCM_RIGHTS)
740 error = (*pr->pr_domain->dom_externalize)(m);
741 *controlp = m;
742 so->so_rcv.sb_mb = m->m_next;
743 m->m_next = 0;
744 m = so->so_rcv.sb_mb;
745 } else {
746 MFREE(m, so->so_rcv.sb_mb);
747 m = so->so_rcv.sb_mb;
748 }
749 }
750 if (controlp) {
751 orig_resid = 0;
752 controlp = &(*controlp)->m_next;
753 }
754 }
755 if (m) {
756 if ((flags & MSG_PEEK) == 0)
757 m->m_nextpkt = nextrecord;
758 type = m->m_type;
759 if (type == MT_OOBDATA)
760 flags |= MSG_OOB;
761 }
762 moff = 0;
763 offset = 0;
764 while (m && uio->uio_resid > 0 && error == 0) {
765 if (m->m_type == MT_OOBDATA) {
766 if (type != MT_OOBDATA)
767 break;
768 } else if (type == MT_OOBDATA)
769 break;
770 else
771 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
772 ("receive 3"));
773 so->so_state &= ~SS_RCVATMARK;
774 len = uio->uio_resid;
775 if (so->so_oobmark && len > so->so_oobmark - offset)
776 len = so->so_oobmark - offset;
777 if (len > m->m_len - moff)
778 len = m->m_len - moff;
779 /*
780 * If mp is set, just pass back the mbufs.
781 * Otherwise copy them out via the uio, then free.
782 * Sockbuf must be consistent here (points to current mbuf,
783 * it points to next record) when we drop priority;
784 * we must note any additions to the sockbuf when we
785 * block interrupts again.
786 */
787 if (mp == 0) {
788 splx(s);
789 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
790 s = splnet();
791 if (error)
792 goto release;
793 } else
794 uio->uio_resid -= len;
795 if (len == m->m_len - moff) {
796 if (m->m_flags & M_EOR)
797 flags |= MSG_EOR;
798 if (flags & MSG_PEEK) {
799 m = m->m_next;
800 moff = 0;
801 } else {
802 nextrecord = m->m_nextpkt;
803 sbfree(&so->so_rcv, m);
804 if (mp) {
805 *mp = m;
806 mp = &m->m_next;
807 so->so_rcv.sb_mb = m = m->m_next;
808 *mp = (struct mbuf *)0;
809 } else {
810 MFREE(m, so->so_rcv.sb_mb);
811 m = so->so_rcv.sb_mb;
812 }
813 if (m)
814 m->m_nextpkt = nextrecord;
815 }
816 } else {
817 if (flags & MSG_PEEK)
818 moff += len;
819 else {
820 if (mp)
821 *mp = m_copym(m, 0, len, M_WAIT);
822 m->m_data += len;
823 m->m_len -= len;
824 so->so_rcv.sb_cc -= len;
825 }
826 }
827 if (so->so_oobmark) {
828 if ((flags & MSG_PEEK) == 0) {
829 so->so_oobmark -= len;
830 if (so->so_oobmark == 0) {
831 so->so_state |= SS_RCVATMARK;
832 break;
833 }
834 } else {
835 offset += len;
836 if (offset == so->so_oobmark)
837 break;
838 }
839 }
840 if (flags & MSG_EOR)
841 break;
842 /*
843 * If the MSG_WAITALL flag is set (for non-atomic socket),
844 * we must not quit until "uio->uio_resid == 0" or an error
845 * termination. If a signal/timeout occurs, return
846 * with a short count but without error.
847 * Keep sockbuf locked against other readers.
848 */
849 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
850 !sosendallatonce(so) && !nextrecord) {
851 if (so->so_error || so->so_state & SS_CANTRCVMORE)
852 break;
853 error = sbwait(&so->so_rcv);
854 if (error) {
855 sbunlock(&so->so_rcv);
856 splx(s);
857 return (0);
858 }
859 m = so->so_rcv.sb_mb;
860 if (m)
861 nextrecord = m->m_nextpkt;
862 }
863 }
864
865 if (m && pr->pr_flags & PR_ATOMIC) {
866 flags |= MSG_TRUNC;
867 if ((flags & MSG_PEEK) == 0)
868 (void) sbdroprecord(&so->so_rcv);
869 }
870 if ((flags & MSG_PEEK) == 0) {
871 if (m == 0)
872 so->so_rcv.sb_mb = nextrecord;
873 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
874 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
875 }
876 if (orig_resid == uio->uio_resid && orig_resid &&
877 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
878 sbunlock(&so->so_rcv);
879 splx(s);
880 goto restart;
881 }
882
883 if (flagsp)
884 *flagsp |= flags;
885release:
886 sbunlock(&so->so_rcv);
887 splx(s);
888 return (error);
889}
890
891int
892soshutdown(so, how)
893 register struct socket *so;
894 register int how;
895{
896 register struct protosw *pr = so->so_proto;
897
898 how++;
899 if (how & FREAD)
900 sorflush(so);
901 if (how & FWRITE)
902 return ((*pr->pr_usrreqs->pru_shutdown)(so));
903 return (0);
904}
905
906void
907sorflush(so)
908 register struct socket *so;
909{
910 register struct sockbuf *sb = &so->so_rcv;
911 register struct protosw *pr = so->so_proto;
912 register int s;
913 struct sockbuf asb;
914
915 sb->sb_flags |= SB_NOINTR;
916 (void) sblock(sb, M_WAITOK);
917 s = splimp();
918 socantrcvmore(so);
919 sbunlock(sb);
920 asb = *sb;
921 bzero((caddr_t)sb, sizeof (*sb));
922 splx(s);
923 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
924 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
925 sbrelease(&asb);
926}
927
928/*
929 * Perhaps this routine, and sooptcopyout(), below, ought to come in
930 * an additional variant to handle the case where the option value needs
931 * to be some kind of integer, but not a specific size.
932 * In addition to their use here, these functions are also called by the
933 * protocol-level pr_ctloutput() routines.
934 */
935int
936sooptcopyin(sopt, buf, len, minlen)
937 struct sockopt *sopt;
938 void *buf;
939 size_t len;
940 size_t minlen;
941{
942 size_t valsize;
943
944 /*
945 * If the user gives us more than we wanted, we ignore it,
946 * but if we don't get the minimum length the caller
947 * wants, we return EINVAL. On success, sopt->sopt_valsize
948 * is set to however much we actually retrieved.
949 */
950 if ((valsize = sopt->sopt_valsize) < minlen)
951 return EINVAL;
952 if (valsize > len)
953 sopt->sopt_valsize = valsize = len;
954
955 if (sopt->sopt_p != 0)
956 return (copyin(sopt->sopt_val, buf, valsize));
957
958 bcopy(sopt->sopt_val, buf, valsize);
959 return 0;
960}
961
962int
963sosetopt(so, sopt)
964 struct socket *so;
965 struct sockopt *sopt;
966{
967 int error, optval;
968 struct linger l;
969 struct timeval tv;
970 u_long val;
971
972 error = 0;
973 if (sopt->sopt_level != SOL_SOCKET) {
974 if (so->so_proto && so->so_proto->pr_ctloutput)
975 return ((*so->so_proto->pr_ctloutput)
976 (so, sopt));
977 error = ENOPROTOOPT;
978 } else {
979 switch (sopt->sopt_name) {
980 case SO_LINGER:
981 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
982 if (error)
983 goto bad;
984
985 so->so_linger = l.l_linger;
986 if (l.l_onoff)
987 so->so_options |= SO_LINGER;
988 else
989 so->so_options &= ~SO_LINGER;
990 break;
991
992 case SO_DEBUG:
993 case SO_KEEPALIVE:
994 case SO_DONTROUTE:
995 case SO_USELOOPBACK:
996 case SO_BROADCAST:
997 case SO_REUSEADDR:
998 case SO_REUSEPORT:
999 case SO_OOBINLINE:
1000 case SO_TIMESTAMP:
1001 error = sooptcopyin(sopt, &optval, sizeof optval,
1002 sizeof optval);
1003 if (error)
1004 goto bad;
1005 if (optval)
1006 so->so_options |= sopt->sopt_name;
1007 else
1008 so->so_options &= ~sopt->sopt_name;
1009 break;
1010
1011 case SO_SNDBUF:
1012 case SO_RCVBUF:
1013 case SO_SNDLOWAT:
1014 case SO_RCVLOWAT:
1015 error = sooptcopyin(sopt, &optval, sizeof optval,
1016 sizeof optval);
1017 if (error)
1018 goto bad;
1019
1020 /*
1021 * Values < 1 make no sense for any of these
1022 * options, so disallow them.
1023 */
1024 if (optval < 1) {
1025 error = EINVAL;
1026 goto bad;
1027 }
1028
1029 switch (sopt->sopt_name) {
1030 case SO_SNDBUF:
1031 case SO_RCVBUF:
1032 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
1033 &so->so_snd : &so->so_rcv,
1034 (u_long) optval) == 0) {
1035 error = ENOBUFS;
1036 goto bad;
1037 }
1038 break;
1039
1040 /*
1041 * Make sure the low-water is never greater than
1042 * the high-water.
1043 */
1044 case SO_SNDLOWAT:
1045 so->so_snd.sb_lowat =
1046 (optval > so->so_snd.sb_hiwat) ?
1047 so->so_snd.sb_hiwat : optval;
1048 break;
1049 case SO_RCVLOWAT:
1050 so->so_rcv.sb_lowat =
1051 (optval > so->so_rcv.sb_hiwat) ?
1052 so->so_rcv.sb_hiwat : optval;
1053 break;
1054 }
1055 break;
1056
1057 case SO_SNDTIMEO:
1058 case SO_RCVTIMEO:
1059 error = sooptcopyin(sopt, &tv, sizeof tv,
1060 sizeof tv);
1061 if (error)
1062 goto bad;
1063
1064 /* assert(hz > 0); */
1065 if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz ||
1066 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
1067 error = EDOM;
1068 goto bad;
1069 }
1070 /* assert(tick > 0); */
1071 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */
1072 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick;
1073 if (val > SHRT_MAX) {
1074 error = EDOM;
1075 goto bad;
1076 }
1077
1078 switch (sopt->sopt_name) {
1079 case SO_SNDTIMEO:
1080 so->so_snd.sb_timeo = val;
1081 break;
1082 case SO_RCVTIMEO:
1083 so->so_rcv.sb_timeo = val;
1084 break;
1085 }
1086 break;
1087
1088 default:
1089 error = ENOPROTOOPT;
1090 break;
1091 }
1092 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1093 (void) ((*so->so_proto->pr_ctloutput)
1094 (so, sopt));
1095 }
1096 }
1097bad:
1098 return (error);
1099}
1100
1101/* Helper routine for getsockopt */
1102int
1103sooptcopyout(sopt, buf, len)
1104 struct sockopt *sopt;
1105 void *buf;
1106 size_t len;
1107{
1108 int error;
1109 size_t valsize;
1110
1111 error = 0;
1112
1113 /*
1114 * Documented get behavior is that we always return a value,
1115 * possibly truncated to fit in the user's buffer.
1116 * Traditional behavior is that we always tell the user
1117 * precisely how much we copied, rather than something useful
1118 * like the total amount we had available for her.
1119 * Note that this interface is not idempotent; the entire answer must
1120 * generated ahead of time.
1121 */
1122 valsize = min(len, sopt->sopt_valsize);
1123 sopt->sopt_valsize = valsize;
1124 if (sopt->sopt_val != 0) {
1125 if (sopt->sopt_p != 0)
1126 error = copyout(buf, sopt->sopt_val, valsize);
1127 else
1128 bcopy(buf, sopt->sopt_val, valsize);
1129 }
1130 return error;
1131}
1132
1133int
1134sogetopt(so, sopt)
1135 struct socket *so;
1136 struct sockopt *sopt;
1137{
1138 int error, optval;
1139 struct linger l;
1140 struct timeval tv;
1141
1142 error = 0;
1143 if (sopt->sopt_level != SOL_SOCKET) {
1144 if (so->so_proto && so->so_proto->pr_ctloutput) {
1145 return ((*so->so_proto->pr_ctloutput)
1146 (so, sopt));
1147 } else
1148 return (ENOPROTOOPT);
1149 } else {
1150 switch (sopt->sopt_name) {
1151 case SO_LINGER:
1152 l.l_onoff = so->so_options & SO_LINGER;
1153 l.l_linger = so->so_linger;
1154 error = sooptcopyout(sopt, &l, sizeof l);
1155 break;
1156
1157 case SO_USELOOPBACK:
1158 case SO_DONTROUTE:
1159 case SO_DEBUG:
1160 case SO_KEEPALIVE:
1161 case SO_REUSEADDR:
1162 case SO_REUSEPORT:
1163 case SO_BROADCAST:
1164 case SO_OOBINLINE:
1165 case SO_TIMESTAMP:
1166 optval = so->so_options & sopt->sopt_name;
1167integer:
1168 error = sooptcopyout(sopt, &optval, sizeof optval);
1169 break;
1170
1171 case SO_TYPE:
1172 optval = so->so_type;
1173 goto integer;
1174
1175 case SO_ERROR:
1176 optval = so->so_error;
1177 so->so_error = 0;
1178 goto integer;
1179
1180 case SO_SNDBUF:
1181 optval = so->so_snd.sb_hiwat;
1182 goto integer;
1183
1184 case SO_RCVBUF:
1185 optval = so->so_rcv.sb_hiwat;
1186 goto integer;
1187
1188 case SO_SNDLOWAT:
1189 optval = so->so_snd.sb_lowat;
1190 goto integer;
1191
1192 case SO_RCVLOWAT:
1193 optval = so->so_rcv.sb_lowat;
1194 goto integer;
1195
1196 case SO_SNDTIMEO:
1197 case SO_RCVTIMEO:
1198 optval = (sopt->sopt_name == SO_SNDTIMEO ?
1199 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1200
1201 tv.tv_sec = optval / hz;
1202 tv.tv_usec = (optval % hz) * tick;
1203 error = sooptcopyout(sopt, &tv, sizeof tv);
1204 break;
1205
1206 default:
1207 error = ENOPROTOOPT;
1208 break;
1209 }
1210 return (error);
1211 }
1212}
1213
1214void
1215sohasoutofband(so)
1216 register struct socket *so;
1217{
1218 if (so->so_sigio != NULL)
1219 pgsigio(so->so_sigio, SIGURG, 0);
1220 selwakeup(&so->so_rcv.sb_sel);
1221}
1222
1223int
1224sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p)
1225{
1226 int revents = 0;
1227 int s = splnet();
1228
1229 if (events & (POLLIN | POLLRDNORM))
1230 if (soreadable(so))
1231 revents |= events & (POLLIN | POLLRDNORM);
1232
1233 if (events & (POLLOUT | POLLWRNORM))
1234 if (sowriteable(so))
1235 revents |= events & (POLLOUT | POLLWRNORM);
1236
1237 if (events & (POLLPRI | POLLRDBAND))
1238 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
1239 revents |= events & (POLLPRI | POLLRDBAND);
1240
1241 if (revents == 0) {
1242 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
1243 selrecord(p, &so->so_rcv.sb_sel);
1244 so->so_rcv.sb_flags |= SB_SEL;
1245 }
1246
1247 if (events & (POLLOUT | POLLWRNORM)) {
1248 selrecord(p, &so->so_snd.sb_sel);
1249 so->so_snd.sb_flags |= SB_SEL;
1250 }
1251 }
1252
1253 splx(s);
1254 return (revents);
1255}