Deleted Added
full compact
uipc_socket.c (46014) uipc_socket.c (46381)
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
34 * $Id: uipc_socket.c,v 1.55 1999/02/16 10:49:49 dfr Exp $
34 * $Id: uipc_socket.c,v 1.56 1999/04/24 18:22:34 ache Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/proc.h>
40#include <sys/fcntl.h>
41#include <sys/malloc.h>
42#include <sys/mbuf.h>
43#include <sys/domain.h>
44#include <sys/kernel.h>
45#include <sys/poll.h>
46#include <sys/protosw.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/resourcevar.h>
50#include <sys/signalvar.h>
51#include <sys/sysctl.h>
52#include <sys/uio.h>
53#include <vm/vm_zone.h>
54
55#include <machine/limits.h>
56
57struct vm_zone *socket_zone;
58so_gen_t so_gencnt; /* generation count for sockets */
59
60MALLOC_DEFINE(M_SONAME, "soname", "socket name");
61MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
62
63SYSCTL_DECL(_kern_ipc);
64
65static int somaxconn = SOMAXCONN;
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/proc.h>
40#include <sys/fcntl.h>
41#include <sys/malloc.h>
42#include <sys/mbuf.h>
43#include <sys/domain.h>
44#include <sys/kernel.h>
45#include <sys/poll.h>
46#include <sys/protosw.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/resourcevar.h>
50#include <sys/signalvar.h>
51#include <sys/sysctl.h>
52#include <sys/uio.h>
53#include <vm/vm_zone.h>
54
55#include <machine/limits.h>
56
57struct vm_zone *socket_zone;
58so_gen_t so_gencnt; /* generation count for sockets */
59
60MALLOC_DEFINE(M_SONAME, "soname", "socket name");
61MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
62
63SYSCTL_DECL(_kern_ipc);
64
65static int somaxconn = SOMAXCONN;
66SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
67 0, "");
66SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW,
67 &somaxconn, 0, "Maximum pending socket connection queue size");
68
69/*
70 * Socket operation routines.
71 * These routines are called by the routines in
72 * sys_socket.c or from a system process, and
73 * implement the semantics of socket operations by
74 * switching out to the protocol specific routines.
75 */
76
77/*
78 * Get a socket structure from our zone, and initialize it.
79 * We don't implement `waitok' yet (see comments in uipc_domain.c).
80 * Note that it would probably be better to allocate socket
81 * and PCB at the same time, but I'm not convinced that all
82 * the protocols can be easily modified to do this.
83 */
84struct socket *
85soalloc(waitok)
86 int waitok;
87{
88 struct socket *so;
89
90 so = zalloci(socket_zone);
91 if (so) {
92 /* XXX race condition for reentrant kernel */
93 bzero(so, sizeof *so);
94 so->so_gencnt = ++so_gencnt;
95 so->so_zone = socket_zone;
96 }
97 return so;
98}
99
100int
101socreate(dom, aso, type, proto, p)
102 int dom;
103 struct socket **aso;
104 register int type;
105 int proto;
106 struct proc *p;
107{
108 register struct protosw *prp;
109 register struct socket *so;
110 register int error;
111
112 if (proto)
113 prp = pffindproto(dom, proto, type);
114 else
115 prp = pffindtype(dom, type);
116 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
117 return (EPROTONOSUPPORT);
118 if (prp->pr_type != type)
119 return (EPROTOTYPE);
120 so = soalloc(p != 0);
121 if (so == 0)
122 return (ENOBUFS);
123
124 TAILQ_INIT(&so->so_incomp);
125 TAILQ_INIT(&so->so_comp);
126 so->so_type = type;
127 if (p != 0)
128 so->so_uid = p->p_ucred->cr_uid;
129 so->so_proto = prp;
130 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
131 if (error) {
132 so->so_state |= SS_NOFDREF;
133 sofree(so);
134 return (error);
135 }
136 *aso = so;
137 return (0);
138}
139
140int
141sobind(so, nam, p)
142 struct socket *so;
143 struct sockaddr *nam;
144 struct proc *p;
145{
146 int s = splnet();
147 int error;
148
149 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
150 splx(s);
151 return (error);
152}
153
154void
155sodealloc(so)
156 struct socket *so;
157{
158 so->so_gencnt = ++so_gencnt;
159 zfreei(so->so_zone, so);
160}
161
162int
163solisten(so, backlog, p)
164 register struct socket *so;
165 int backlog;
166 struct proc *p;
167{
168 int s, error;
169
170 s = splnet();
171 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
172 if (error) {
173 splx(s);
174 return (error);
175 }
176 if (so->so_comp.tqh_first == NULL)
177 so->so_options |= SO_ACCEPTCONN;
178 if (backlog < 0 || backlog > somaxconn)
179 backlog = somaxconn;
180 so->so_qlimit = backlog;
181 splx(s);
182 return (0);
183}
184
185void
186sofree(so)
187 register struct socket *so;
188{
189 struct socket *head = so->so_head;
190
191 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
192 return;
193 if (head != NULL) {
194 if (so->so_state & SS_INCOMP) {
195 TAILQ_REMOVE(&head->so_incomp, so, so_list);
196 head->so_incqlen--;
197 } else if (so->so_state & SS_COMP) {
198 /*
199 * We must not decommission a socket that's
200 * on the accept(2) queue. If we do, then
201 * accept(2) may hang after select(2) indicated
202 * that the listening socket was ready.
203 */
204 return;
205 } else {
206 panic("sofree: not queued");
207 }
208 head->so_qlen--;
209 so->so_state &= ~SS_INCOMP;
210 so->so_head = NULL;
211 }
212 sbrelease(&so->so_snd);
213 sorflush(so);
214 sodealloc(so);
215}
216
217/*
218 * Close a socket on last file table reference removal.
219 * Initiate disconnect if connected.
220 * Free socket when disconnect complete.
221 */
222int
223soclose(so)
224 register struct socket *so;
225{
226 int s = splnet(); /* conservative */
227 int error = 0;
228
229 funsetown(so->so_sigio);
230 if (so->so_options & SO_ACCEPTCONN) {
231 struct socket *sp, *sonext;
232
233 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) {
234 sonext = sp->so_list.tqe_next;
235 (void) soabort(sp);
236 }
237 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) {
238 sonext = sp->so_list.tqe_next;
239 /* Dequeue from so_comp since sofree() won't do it */
240 TAILQ_REMOVE(&so->so_comp, sp, so_list);
241 so->so_qlen--;
242 sp->so_state &= ~SS_COMP;
243 sp->so_head = NULL;
244 (void) soabort(sp);
245 }
246 }
247 if (so->so_pcb == 0)
248 goto discard;
249 if (so->so_state & SS_ISCONNECTED) {
250 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
251 error = sodisconnect(so);
252 if (error)
253 goto drop;
254 }
255 if (so->so_options & SO_LINGER) {
256 if ((so->so_state & SS_ISDISCONNECTING) &&
257 (so->so_state & SS_NBIO))
258 goto drop;
259 while (so->so_state & SS_ISCONNECTED) {
260 error = tsleep((caddr_t)&so->so_timeo,
261 PSOCK | PCATCH, "soclos", so->so_linger * hz);
262 if (error)
263 break;
264 }
265 }
266 }
267drop:
268 if (so->so_pcb) {
269 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
270 if (error == 0)
271 error = error2;
272 }
273discard:
274 if (so->so_state & SS_NOFDREF)
275 panic("soclose: NOFDREF");
276 so->so_state |= SS_NOFDREF;
277 sofree(so);
278 splx(s);
279 return (error);
280}
281
282/*
283 * Must be called at splnet...
284 */
285int
286soabort(so)
287 struct socket *so;
288{
289
290 return (*so->so_proto->pr_usrreqs->pru_abort)(so);
291}
292
293int
294soaccept(so, nam)
295 register struct socket *so;
296 struct sockaddr **nam;
297{
298 int s = splnet();
299 int error;
300
301 if ((so->so_state & SS_NOFDREF) == 0)
302 panic("soaccept: !NOFDREF");
303 so->so_state &= ~SS_NOFDREF;
304 if ((so->so_state & SS_ISDISCONNECTED) == 0)
305 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
306 else {
307 if (nam)
308 *nam = 0;
309 error = 0;
310 }
311 splx(s);
312 return (error);
313}
314
315int
316soconnect(so, nam, p)
317 register struct socket *so;
318 struct sockaddr *nam;
319 struct proc *p;
320{
321 int s;
322 int error;
323
324 if (so->so_options & SO_ACCEPTCONN)
325 return (EOPNOTSUPP);
326 s = splnet();
327 /*
328 * If protocol is connection-based, can only connect once.
329 * Otherwise, if connected, try to disconnect first.
330 * This allows user to disconnect by connecting to, e.g.,
331 * a null address.
332 */
333 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
334 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
335 (error = sodisconnect(so))))
336 error = EISCONN;
337 else
338 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
339 splx(s);
340 return (error);
341}
342
343int
344soconnect2(so1, so2)
345 register struct socket *so1;
346 struct socket *so2;
347{
348 int s = splnet();
349 int error;
350
351 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
352 splx(s);
353 return (error);
354}
355
356int
357sodisconnect(so)
358 register struct socket *so;
359{
360 int s = splnet();
361 int error;
362
363 if ((so->so_state & SS_ISCONNECTED) == 0) {
364 error = ENOTCONN;
365 goto bad;
366 }
367 if (so->so_state & SS_ISDISCONNECTING) {
368 error = EALREADY;
369 goto bad;
370 }
371 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
372bad:
373 splx(s);
374 return (error);
375}
376
377#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
378/*
379 * Send on a socket.
380 * If send must go all at once and message is larger than
381 * send buffering, then hard error.
382 * Lock against other senders.
383 * If must go all at once and not enough room now, then
384 * inform user that this would block and do nothing.
385 * Otherwise, if nonblocking, send as much as possible.
386 * The data to be sent is described by "uio" if nonzero,
387 * otherwise by the mbuf chain "top" (which must be null
388 * if uio is not). Data provided in mbuf chain must be small
389 * enough to send all at once.
390 *
391 * Returns nonzero on error, timeout or signal; callers
392 * must check for short counts if EINTR/ERESTART are returned.
393 * Data and control buffers are freed on return.
394 */
395int
396sosend(so, addr, uio, top, control, flags, p)
397 register struct socket *so;
398 struct sockaddr *addr;
399 struct uio *uio;
400 struct mbuf *top;
401 struct mbuf *control;
402 int flags;
403 struct proc *p;
404{
405 struct mbuf **mp;
406 register struct mbuf *m;
407 register long space, len, resid;
408 int clen = 0, error, s, dontroute, mlen;
409 int atomic = sosendallatonce(so) || top;
410
411 if (uio)
412 resid = uio->uio_resid;
413 else
414 resid = top->m_pkthdr.len;
415 /*
416 * In theory resid should be unsigned.
417 * However, space must be signed, as it might be less than 0
418 * if we over-committed, and we must use a signed comparison
419 * of space and resid. On the other hand, a negative resid
420 * causes us to loop sending 0-length segments to the protocol.
421 *
422 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
423 * type sockets since that's an error.
424 */
425 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
426 error = EINVAL;
427 goto out;
428 }
429
430 dontroute =
431 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
432 (so->so_proto->pr_flags & PR_ATOMIC);
433 if (p)
434 p->p_stats->p_ru.ru_msgsnd++;
435 if (control)
436 clen = control->m_len;
437#define snderr(errno) { error = errno; splx(s); goto release; }
438
439restart:
440 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
441 if (error)
442 goto out;
443 do {
444 s = splnet();
445 if (so->so_state & SS_CANTSENDMORE)
446 snderr(EPIPE);
447 if (so->so_error) {
448 error = so->so_error;
449 so->so_error = 0;
450 splx(s);
451 goto release;
452 }
453 if ((so->so_state & SS_ISCONNECTED) == 0) {
454 /*
455 * `sendto' and `sendmsg' is allowed on a connection-
456 * based socket if it supports implied connect.
457 * Return ENOTCONN if not connected and no address is
458 * supplied.
459 */
460 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
461 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
462 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
463 !(resid == 0 && clen != 0))
464 snderr(ENOTCONN);
465 } else if (addr == 0)
466 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
467 ENOTCONN : EDESTADDRREQ);
468 }
469 space = sbspace(&so->so_snd);
470 if (flags & MSG_OOB)
471 space += 1024;
472 if ((atomic && resid > so->so_snd.sb_hiwat) ||
473 clen > so->so_snd.sb_hiwat)
474 snderr(EMSGSIZE);
475 if (space < resid + clen && uio &&
476 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
477 if (so->so_state & SS_NBIO)
478 snderr(EWOULDBLOCK);
479 sbunlock(&so->so_snd);
480 error = sbwait(&so->so_snd);
481 splx(s);
482 if (error)
483 goto out;
484 goto restart;
485 }
486 splx(s);
487 mp = &top;
488 space -= clen;
489 do {
490 if (uio == NULL) {
491 /*
492 * Data is prepackaged in "top".
493 */
494 resid = 0;
495 if (flags & MSG_EOR)
496 top->m_flags |= M_EOR;
497 } else do {
498 if (top == 0) {
499 MGETHDR(m, M_WAIT, MT_DATA);
500 mlen = MHLEN;
501 m->m_pkthdr.len = 0;
502 m->m_pkthdr.rcvif = (struct ifnet *)0;
503 } else {
504 MGET(m, M_WAIT, MT_DATA);
505 mlen = MLEN;
506 }
507 if (resid >= MINCLSIZE) {
508 MCLGET(m, M_WAIT);
509 if ((m->m_flags & M_EXT) == 0)
510 goto nopages;
511 mlen = MCLBYTES;
512 len = min(min(mlen, resid), space);
513 } else {
514nopages:
515 len = min(min(mlen, resid), space);
516 /*
517 * For datagram protocols, leave room
518 * for protocol headers in first mbuf.
519 */
520 if (atomic && top == 0 && len < mlen)
521 MH_ALIGN(m, len);
522 }
523 space -= len;
524 error = uiomove(mtod(m, caddr_t), (int)len, uio);
525 resid = uio->uio_resid;
526 m->m_len = len;
527 *mp = m;
528 top->m_pkthdr.len += len;
529 if (error)
530 goto release;
531 mp = &m->m_next;
532 if (resid <= 0) {
533 if (flags & MSG_EOR)
534 top->m_flags |= M_EOR;
535 break;
536 }
537 } while (space > 0 && atomic);
538 if (dontroute)
539 so->so_options |= SO_DONTROUTE;
540 s = splnet(); /* XXX */
541 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
542 (flags & MSG_OOB) ? PRUS_OOB :
543 /*
544 * If the user set MSG_EOF, the protocol
545 * understands this flag and nothing left to
546 * send then use PRU_SEND_EOF instead of PRU_SEND.
547 */
548 ((flags & MSG_EOF) &&
549 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
550 (resid <= 0)) ?
551 PRUS_EOF :
552 /* If there is more to send set PRUS_MORETOCOME */
553 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
554 top, addr, control, p);
555 splx(s);
556 if (dontroute)
557 so->so_options &= ~SO_DONTROUTE;
558 clen = 0;
559 control = 0;
560 top = 0;
561 mp = &top;
562 if (error)
563 goto release;
564 } while (resid && space > 0);
565 } while (resid);
566
567release:
568 sbunlock(&so->so_snd);
569out:
570 if (top)
571 m_freem(top);
572 if (control)
573 m_freem(control);
574 return (error);
575}
576
577/*
578 * Implement receive operations on a socket.
579 * We depend on the way that records are added to the sockbuf
580 * by sbappend*. In particular, each record (mbufs linked through m_next)
581 * must begin with an address if the protocol so specifies,
582 * followed by an optional mbuf or mbufs containing ancillary data,
583 * and then zero or more mbufs of data.
584 * In order to avoid blocking network interrupts for the entire time here,
585 * we splx() while doing the actual copy to user space.
586 * Although the sockbuf is locked, new data may still be appended,
587 * and thus we must maintain consistency of the sockbuf during that time.
588 *
589 * The caller may receive the data as a single mbuf chain by supplying
590 * an mbuf **mp0 for use in returning the chain. The uio is then used
591 * only for the count in uio_resid.
592 */
593int
594soreceive(so, psa, uio, mp0, controlp, flagsp)
595 register struct socket *so;
596 struct sockaddr **psa;
597 struct uio *uio;
598 struct mbuf **mp0;
599 struct mbuf **controlp;
600 int *flagsp;
601{
602 register struct mbuf *m, **mp;
603 register int flags, len, error, s, offset;
604 struct protosw *pr = so->so_proto;
605 struct mbuf *nextrecord;
606 int moff, type = 0;
607 int orig_resid = uio->uio_resid;
608
609 mp = mp0;
610 if (psa)
611 *psa = 0;
612 if (controlp)
613 *controlp = 0;
614 if (flagsp)
615 flags = *flagsp &~ MSG_EOR;
616 else
617 flags = 0;
618 if (flags & MSG_OOB) {
619 m = m_get(M_WAIT, MT_DATA);
620 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
621 if (error)
622 goto bad;
623 do {
624 error = uiomove(mtod(m, caddr_t),
625 (int) min(uio->uio_resid, m->m_len), uio);
626 m = m_free(m);
627 } while (uio->uio_resid && error == 0 && m);
628bad:
629 if (m)
630 m_freem(m);
631 return (error);
632 }
633 if (mp)
634 *mp = (struct mbuf *)0;
635 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
636 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
637
638restart:
639 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
640 if (error)
641 return (error);
642 s = splnet();
643
644 m = so->so_rcv.sb_mb;
645 /*
646 * If we have less data than requested, block awaiting more
647 * (subject to any timeout) if:
648 * 1. the current count is less than the low water mark, or
649 * 2. MSG_WAITALL is set, and it is possible to do the entire
650 * receive operation at once if we block (resid <= hiwat).
651 * 3. MSG_DONTWAIT is not set
652 * If MSG_WAITALL is set but resid is larger than the receive buffer,
653 * we have to do the receive in sections, and thus risk returning
654 * a short count if a timeout or signal occurs after we start.
655 */
656 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
657 so->so_rcv.sb_cc < uio->uio_resid) &&
658 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
659 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
660 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
661 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
662 if (so->so_error) {
663 if (m)
664 goto dontblock;
665 error = so->so_error;
666 if ((flags & MSG_PEEK) == 0)
667 so->so_error = 0;
668 goto release;
669 }
670 if (so->so_state & SS_CANTRCVMORE) {
671 if (m)
672 goto dontblock;
673 else
674 goto release;
675 }
676 for (; m; m = m->m_next)
677 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
678 m = so->so_rcv.sb_mb;
679 goto dontblock;
680 }
681 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
682 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
683 error = ENOTCONN;
684 goto release;
685 }
686 if (uio->uio_resid == 0)
687 goto release;
688 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
689 error = EWOULDBLOCK;
690 goto release;
691 }
692 sbunlock(&so->so_rcv);
693 error = sbwait(&so->so_rcv);
694 splx(s);
695 if (error)
696 return (error);
697 goto restart;
698 }
699dontblock:
700 if (uio->uio_procp)
701 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
702 nextrecord = m->m_nextpkt;
703 if (pr->pr_flags & PR_ADDR) {
704 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
705 orig_resid = 0;
706 if (psa)
707 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
708 mp0 == 0);
709 if (flags & MSG_PEEK) {
710 m = m->m_next;
711 } else {
712 sbfree(&so->so_rcv, m);
713 MFREE(m, so->so_rcv.sb_mb);
714 m = so->so_rcv.sb_mb;
715 }
716 }
717 while (m && m->m_type == MT_CONTROL && error == 0) {
718 if (flags & MSG_PEEK) {
719 if (controlp)
720 *controlp = m_copy(m, 0, m->m_len);
721 m = m->m_next;
722 } else {
723 sbfree(&so->so_rcv, m);
724 if (controlp) {
725 if (pr->pr_domain->dom_externalize &&
726 mtod(m, struct cmsghdr *)->cmsg_type ==
727 SCM_RIGHTS)
728 error = (*pr->pr_domain->dom_externalize)(m);
729 *controlp = m;
730 so->so_rcv.sb_mb = m->m_next;
731 m->m_next = 0;
732 m = so->so_rcv.sb_mb;
733 } else {
734 MFREE(m, so->so_rcv.sb_mb);
735 m = so->so_rcv.sb_mb;
736 }
737 }
738 if (controlp) {
739 orig_resid = 0;
740 controlp = &(*controlp)->m_next;
741 }
742 }
743 if (m) {
744 if ((flags & MSG_PEEK) == 0)
745 m->m_nextpkt = nextrecord;
746 type = m->m_type;
747 if (type == MT_OOBDATA)
748 flags |= MSG_OOB;
749 }
750 moff = 0;
751 offset = 0;
752 while (m && uio->uio_resid > 0 && error == 0) {
753 if (m->m_type == MT_OOBDATA) {
754 if (type != MT_OOBDATA)
755 break;
756 } else if (type == MT_OOBDATA)
757 break;
758 else
759 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
760 ("receive 3"));
761 so->so_state &= ~SS_RCVATMARK;
762 len = uio->uio_resid;
763 if (so->so_oobmark && len > so->so_oobmark - offset)
764 len = so->so_oobmark - offset;
765 if (len > m->m_len - moff)
766 len = m->m_len - moff;
767 /*
768 * If mp is set, just pass back the mbufs.
769 * Otherwise copy them out via the uio, then free.
770 * Sockbuf must be consistent here (points to current mbuf,
771 * it points to next record) when we drop priority;
772 * we must note any additions to the sockbuf when we
773 * block interrupts again.
774 */
775 if (mp == 0) {
776 splx(s);
777 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
778 s = splnet();
779 if (error)
780 goto release;
781 } else
782 uio->uio_resid -= len;
783 if (len == m->m_len - moff) {
784 if (m->m_flags & M_EOR)
785 flags |= MSG_EOR;
786 if (flags & MSG_PEEK) {
787 m = m->m_next;
788 moff = 0;
789 } else {
790 nextrecord = m->m_nextpkt;
791 sbfree(&so->so_rcv, m);
792 if (mp) {
793 *mp = m;
794 mp = &m->m_next;
795 so->so_rcv.sb_mb = m = m->m_next;
796 *mp = (struct mbuf *)0;
797 } else {
798 MFREE(m, so->so_rcv.sb_mb);
799 m = so->so_rcv.sb_mb;
800 }
801 if (m)
802 m->m_nextpkt = nextrecord;
803 }
804 } else {
805 if (flags & MSG_PEEK)
806 moff += len;
807 else {
808 if (mp)
809 *mp = m_copym(m, 0, len, M_WAIT);
810 m->m_data += len;
811 m->m_len -= len;
812 so->so_rcv.sb_cc -= len;
813 }
814 }
815 if (so->so_oobmark) {
816 if ((flags & MSG_PEEK) == 0) {
817 so->so_oobmark -= len;
818 if (so->so_oobmark == 0) {
819 so->so_state |= SS_RCVATMARK;
820 break;
821 }
822 } else {
823 offset += len;
824 if (offset == so->so_oobmark)
825 break;
826 }
827 }
828 if (flags & MSG_EOR)
829 break;
830 /*
831 * If the MSG_WAITALL flag is set (for non-atomic socket),
832 * we must not quit until "uio->uio_resid == 0" or an error
833 * termination. If a signal/timeout occurs, return
834 * with a short count but without error.
835 * Keep sockbuf locked against other readers.
836 */
837 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
838 !sosendallatonce(so) && !nextrecord) {
839 if (so->so_error || so->so_state & SS_CANTRCVMORE)
840 break;
841 error = sbwait(&so->so_rcv);
842 if (error) {
843 sbunlock(&so->so_rcv);
844 splx(s);
845 return (0);
846 }
847 m = so->so_rcv.sb_mb;
848 if (m)
849 nextrecord = m->m_nextpkt;
850 }
851 }
852
853 if (m && pr->pr_flags & PR_ATOMIC) {
854 flags |= MSG_TRUNC;
855 if ((flags & MSG_PEEK) == 0)
856 (void) sbdroprecord(&so->so_rcv);
857 }
858 if ((flags & MSG_PEEK) == 0) {
859 if (m == 0)
860 so->so_rcv.sb_mb = nextrecord;
861 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
862 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
863 }
864 if (orig_resid == uio->uio_resid && orig_resid &&
865 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
866 sbunlock(&so->so_rcv);
867 splx(s);
868 goto restart;
869 }
870
871 if (flagsp)
872 *flagsp |= flags;
873release:
874 sbunlock(&so->so_rcv);
875 splx(s);
876 return (error);
877}
878
879int
880soshutdown(so, how)
881 register struct socket *so;
882 register int how;
883{
884 register struct protosw *pr = so->so_proto;
885
886 how++;
887 if (how & FREAD)
888 sorflush(so);
889 if (how & FWRITE)
890 return ((*pr->pr_usrreqs->pru_shutdown)(so));
891 return (0);
892}
893
894void
895sorflush(so)
896 register struct socket *so;
897{
898 register struct sockbuf *sb = &so->so_rcv;
899 register struct protosw *pr = so->so_proto;
900 register int s;
901 struct sockbuf asb;
902
903 sb->sb_flags |= SB_NOINTR;
904 (void) sblock(sb, M_WAITOK);
905 s = splimp();
906 socantrcvmore(so);
907 sbunlock(sb);
908 asb = *sb;
909 bzero((caddr_t)sb, sizeof (*sb));
910 splx(s);
911 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
912 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
913 sbrelease(&asb);
914}
915
916/*
917 * Perhaps this routine, and sooptcopyout(), below, ought to come in
918 * an additional variant to handle the case where the option value needs
919 * to be some kind of integer, but not a specific size.
920 * In addition to their use here, these functions are also called by the
921 * protocol-level pr_ctloutput() routines.
922 */
923int
924sooptcopyin(sopt, buf, len, minlen)
925 struct sockopt *sopt;
926 void *buf;
927 size_t len;
928 size_t minlen;
929{
930 size_t valsize;
931
932 /*
933 * If the user gives us more than we wanted, we ignore it,
934 * but if we don't get the minimum length the caller
935 * wants, we return EINVAL. On success, sopt->sopt_valsize
936 * is set to however much we actually retrieved.
937 */
938 if ((valsize = sopt->sopt_valsize) < minlen)
939 return EINVAL;
940 if (valsize > len)
941 sopt->sopt_valsize = valsize = len;
942
943 if (sopt->sopt_p != 0)
944 return (copyin(sopt->sopt_val, buf, valsize));
945
946 bcopy(sopt->sopt_val, buf, valsize);
947 return 0;
948}
949
950int
951sosetopt(so, sopt)
952 struct socket *so;
953 struct sockopt *sopt;
954{
955 int error, optval;
956 struct linger l;
957 struct timeval tv;
958 short val;
959
960 error = 0;
961 if (sopt->sopt_level != SOL_SOCKET) {
962 if (so->so_proto && so->so_proto->pr_ctloutput)
963 return ((*so->so_proto->pr_ctloutput)
964 (so, sopt));
965 error = ENOPROTOOPT;
966 } else {
967 switch (sopt->sopt_name) {
968 case SO_LINGER:
969 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
970 if (error)
971 goto bad;
972
973 so->so_linger = l.l_linger;
974 if (l.l_onoff)
975 so->so_options |= SO_LINGER;
976 else
977 so->so_options &= ~SO_LINGER;
978 break;
979
980 case SO_DEBUG:
981 case SO_KEEPALIVE:
982 case SO_DONTROUTE:
983 case SO_USELOOPBACK:
984 case SO_BROADCAST:
985 case SO_REUSEADDR:
986 case SO_REUSEPORT:
987 case SO_OOBINLINE:
988 case SO_TIMESTAMP:
989 error = sooptcopyin(sopt, &optval, sizeof optval,
990 sizeof optval);
991 if (error)
992 goto bad;
993 if (optval)
994 so->so_options |= sopt->sopt_name;
995 else
996 so->so_options &= ~sopt->sopt_name;
997 break;
998
999 case SO_SNDBUF:
1000 case SO_RCVBUF:
1001 case SO_SNDLOWAT:
1002 case SO_RCVLOWAT:
1003 error = sooptcopyin(sopt, &optval, sizeof optval,
1004 sizeof optval);
1005 if (error)
1006 goto bad;
1007
1008 /*
1009 * Values < 1 make no sense for any of these
1010 * options, so disallow them.
1011 */
1012 if (optval < 1) {
1013 error = EINVAL;
1014 goto bad;
1015 }
1016
1017 switch (sopt->sopt_name) {
1018 case SO_SNDBUF:
1019 case SO_RCVBUF:
1020 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
1021 &so->so_snd : &so->so_rcv,
1022 (u_long) optval) == 0) {
1023 error = ENOBUFS;
1024 goto bad;
1025 }
1026 break;
1027
1028 /*
1029 * Make sure the low-water is never greater than
1030 * the high-water.
1031 */
1032 case SO_SNDLOWAT:
1033 so->so_snd.sb_lowat =
1034 (optval > so->so_snd.sb_hiwat) ?
1035 so->so_snd.sb_hiwat : optval;
1036 break;
1037 case SO_RCVLOWAT:
1038 so->so_rcv.sb_lowat =
1039 (optval > so->so_rcv.sb_hiwat) ?
1040 so->so_rcv.sb_hiwat : optval;
1041 break;
1042 }
1043 break;
1044
1045 case SO_SNDTIMEO:
1046 case SO_RCVTIMEO:
1047 error = sooptcopyin(sopt, &tv, sizeof tv,
1048 sizeof tv);
1049 if (error)
1050 goto bad;
1051
1052 if (tv.tv_sec * hz + tv.tv_usec / tick > SHRT_MAX) {
1053 error = EDOM;
1054 goto bad;
1055 }
1056 val = tv.tv_sec * hz + tv.tv_usec / tick;
1057
1058 switch (sopt->sopt_name) {
1059 case SO_SNDTIMEO:
1060 so->so_snd.sb_timeo = val;
1061 break;
1062 case SO_RCVTIMEO:
1063 so->so_rcv.sb_timeo = val;
1064 break;
1065 }
1066 break;
1067
1068 default:
1069 error = ENOPROTOOPT;
1070 break;
1071 }
1072 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1073 (void) ((*so->so_proto->pr_ctloutput)
1074 (so, sopt));
1075 }
1076 }
1077bad:
1078 return (error);
1079}
1080
1081/* Helper routine for getsockopt */
1082int
1083sooptcopyout(sopt, buf, len)
1084 struct sockopt *sopt;
1085 void *buf;
1086 size_t len;
1087{
1088 int error;
1089 size_t valsize;
1090
1091 error = 0;
1092
1093 /*
1094 * Documented get behavior is that we always return a value,
1095 * possibly truncated to fit in the user's buffer.
1096 * Traditional behavior is that we always tell the user
1097 * precisely how much we copied, rather than something useful
1098 * like the total amount we had available for her.
1099 * Note that this interface is not idempotent; the entire answer must
1100 * generated ahead of time.
1101 */
1102 valsize = min(len, sopt->sopt_valsize);
1103 sopt->sopt_valsize = valsize;
1104 if (sopt->sopt_val != 0) {
1105 if (sopt->sopt_p != 0)
1106 error = copyout(buf, sopt->sopt_val, valsize);
1107 else
1108 bcopy(buf, sopt->sopt_val, valsize);
1109 }
1110 return error;
1111}
1112
1113int
1114sogetopt(so, sopt)
1115 struct socket *so;
1116 struct sockopt *sopt;
1117{
1118 int error, optval;
1119 struct linger l;
1120 struct timeval tv;
1121
1122 error = 0;
1123 if (sopt->sopt_level != SOL_SOCKET) {
1124 if (so->so_proto && so->so_proto->pr_ctloutput) {
1125 return ((*so->so_proto->pr_ctloutput)
1126 (so, sopt));
1127 } else
1128 return (ENOPROTOOPT);
1129 } else {
1130 switch (sopt->sopt_name) {
1131 case SO_LINGER:
1132 l.l_onoff = so->so_options & SO_LINGER;
1133 l.l_linger = so->so_linger;
1134 error = sooptcopyout(sopt, &l, sizeof l);
1135 break;
1136
1137 case SO_USELOOPBACK:
1138 case SO_DONTROUTE:
1139 case SO_DEBUG:
1140 case SO_KEEPALIVE:
1141 case SO_REUSEADDR:
1142 case SO_REUSEPORT:
1143 case SO_BROADCAST:
1144 case SO_OOBINLINE:
1145 case SO_TIMESTAMP:
1146 optval = so->so_options & sopt->sopt_name;
1147integer:
1148 error = sooptcopyout(sopt, &optval, sizeof optval);
1149 break;
1150
1151 case SO_TYPE:
1152 optval = so->so_type;
1153 goto integer;
1154
1155 case SO_ERROR:
1156 optval = so->so_error;
1157 so->so_error = 0;
1158 goto integer;
1159
1160 case SO_SNDBUF:
1161 optval = so->so_snd.sb_hiwat;
1162 goto integer;
1163
1164 case SO_RCVBUF:
1165 optval = so->so_rcv.sb_hiwat;
1166 goto integer;
1167
1168 case SO_SNDLOWAT:
1169 optval = so->so_snd.sb_lowat;
1170 goto integer;
1171
1172 case SO_RCVLOWAT:
1173 optval = so->so_rcv.sb_lowat;
1174 goto integer;
1175
1176 case SO_SNDTIMEO:
1177 case SO_RCVTIMEO:
1178 optval = (sopt->sopt_name == SO_SNDTIMEO ?
1179 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1180
1181 tv.tv_sec = optval / hz;
1182 tv.tv_usec = (optval % hz) * tick;
1183 error = sooptcopyout(sopt, &tv, sizeof tv);
1184 break;
1185
1186 default:
1187 error = ENOPROTOOPT;
1188 break;
1189 }
1190 return (error);
1191 }
1192}
1193
1194void
1195sohasoutofband(so)
1196 register struct socket *so;
1197{
1198 if (so->so_sigio != NULL)
1199 pgsigio(so->so_sigio, SIGURG, 0);
1200 selwakeup(&so->so_rcv.sb_sel);
1201}
1202
1203int
1204sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p)
1205{
1206 int revents = 0;
1207 int s = splnet();
1208
1209 if (events & (POLLIN | POLLRDNORM))
1210 if (soreadable(so))
1211 revents |= events & (POLLIN | POLLRDNORM);
1212
1213 if (events & (POLLOUT | POLLWRNORM))
1214 if (sowriteable(so))
1215 revents |= events & (POLLOUT | POLLWRNORM);
1216
1217 if (events & (POLLPRI | POLLRDBAND))
1218 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
1219 revents |= events & (POLLPRI | POLLRDBAND);
1220
1221 if (revents == 0) {
1222 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
1223 selrecord(p, &so->so_rcv.sb_sel);
1224 so->so_rcv.sb_flags |= SB_SEL;
1225 }
1226
1227 if (events & (POLLOUT | POLLWRNORM)) {
1228 selrecord(p, &so->so_snd.sb_sel);
1229 so->so_snd.sb_flags |= SB_SEL;
1230 }
1231 }
1232
1233 splx(s);
1234 return (revents);
1235}
68
69/*
70 * Socket operation routines.
71 * These routines are called by the routines in
72 * sys_socket.c or from a system process, and
73 * implement the semantics of socket operations by
74 * switching out to the protocol specific routines.
75 */
76
77/*
78 * Get a socket structure from our zone, and initialize it.
79 * We don't implement `waitok' yet (see comments in uipc_domain.c).
80 * Note that it would probably be better to allocate socket
81 * and PCB at the same time, but I'm not convinced that all
82 * the protocols can be easily modified to do this.
83 */
84struct socket *
85soalloc(waitok)
86 int waitok;
87{
88 struct socket *so;
89
90 so = zalloci(socket_zone);
91 if (so) {
92 /* XXX race condition for reentrant kernel */
93 bzero(so, sizeof *so);
94 so->so_gencnt = ++so_gencnt;
95 so->so_zone = socket_zone;
96 }
97 return so;
98}
99
100int
101socreate(dom, aso, type, proto, p)
102 int dom;
103 struct socket **aso;
104 register int type;
105 int proto;
106 struct proc *p;
107{
108 register struct protosw *prp;
109 register struct socket *so;
110 register int error;
111
112 if (proto)
113 prp = pffindproto(dom, proto, type);
114 else
115 prp = pffindtype(dom, type);
116 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
117 return (EPROTONOSUPPORT);
118 if (prp->pr_type != type)
119 return (EPROTOTYPE);
120 so = soalloc(p != 0);
121 if (so == 0)
122 return (ENOBUFS);
123
124 TAILQ_INIT(&so->so_incomp);
125 TAILQ_INIT(&so->so_comp);
126 so->so_type = type;
127 if (p != 0)
128 so->so_uid = p->p_ucred->cr_uid;
129 so->so_proto = prp;
130 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
131 if (error) {
132 so->so_state |= SS_NOFDREF;
133 sofree(so);
134 return (error);
135 }
136 *aso = so;
137 return (0);
138}
139
140int
141sobind(so, nam, p)
142 struct socket *so;
143 struct sockaddr *nam;
144 struct proc *p;
145{
146 int s = splnet();
147 int error;
148
149 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
150 splx(s);
151 return (error);
152}
153
154void
155sodealloc(so)
156 struct socket *so;
157{
158 so->so_gencnt = ++so_gencnt;
159 zfreei(so->so_zone, so);
160}
161
162int
163solisten(so, backlog, p)
164 register struct socket *so;
165 int backlog;
166 struct proc *p;
167{
168 int s, error;
169
170 s = splnet();
171 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
172 if (error) {
173 splx(s);
174 return (error);
175 }
176 if (so->so_comp.tqh_first == NULL)
177 so->so_options |= SO_ACCEPTCONN;
178 if (backlog < 0 || backlog > somaxconn)
179 backlog = somaxconn;
180 so->so_qlimit = backlog;
181 splx(s);
182 return (0);
183}
184
185void
186sofree(so)
187 register struct socket *so;
188{
189 struct socket *head = so->so_head;
190
191 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
192 return;
193 if (head != NULL) {
194 if (so->so_state & SS_INCOMP) {
195 TAILQ_REMOVE(&head->so_incomp, so, so_list);
196 head->so_incqlen--;
197 } else if (so->so_state & SS_COMP) {
198 /*
199 * We must not decommission a socket that's
200 * on the accept(2) queue. If we do, then
201 * accept(2) may hang after select(2) indicated
202 * that the listening socket was ready.
203 */
204 return;
205 } else {
206 panic("sofree: not queued");
207 }
208 head->so_qlen--;
209 so->so_state &= ~SS_INCOMP;
210 so->so_head = NULL;
211 }
212 sbrelease(&so->so_snd);
213 sorflush(so);
214 sodealloc(so);
215}
216
217/*
218 * Close a socket on last file table reference removal.
219 * Initiate disconnect if connected.
220 * Free socket when disconnect complete.
221 */
222int
223soclose(so)
224 register struct socket *so;
225{
226 int s = splnet(); /* conservative */
227 int error = 0;
228
229 funsetown(so->so_sigio);
230 if (so->so_options & SO_ACCEPTCONN) {
231 struct socket *sp, *sonext;
232
233 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) {
234 sonext = sp->so_list.tqe_next;
235 (void) soabort(sp);
236 }
237 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) {
238 sonext = sp->so_list.tqe_next;
239 /* Dequeue from so_comp since sofree() won't do it */
240 TAILQ_REMOVE(&so->so_comp, sp, so_list);
241 so->so_qlen--;
242 sp->so_state &= ~SS_COMP;
243 sp->so_head = NULL;
244 (void) soabort(sp);
245 }
246 }
247 if (so->so_pcb == 0)
248 goto discard;
249 if (so->so_state & SS_ISCONNECTED) {
250 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
251 error = sodisconnect(so);
252 if (error)
253 goto drop;
254 }
255 if (so->so_options & SO_LINGER) {
256 if ((so->so_state & SS_ISDISCONNECTING) &&
257 (so->so_state & SS_NBIO))
258 goto drop;
259 while (so->so_state & SS_ISCONNECTED) {
260 error = tsleep((caddr_t)&so->so_timeo,
261 PSOCK | PCATCH, "soclos", so->so_linger * hz);
262 if (error)
263 break;
264 }
265 }
266 }
267drop:
268 if (so->so_pcb) {
269 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
270 if (error == 0)
271 error = error2;
272 }
273discard:
274 if (so->so_state & SS_NOFDREF)
275 panic("soclose: NOFDREF");
276 so->so_state |= SS_NOFDREF;
277 sofree(so);
278 splx(s);
279 return (error);
280}
281
282/*
283 * Must be called at splnet...
284 */
285int
286soabort(so)
287 struct socket *so;
288{
289
290 return (*so->so_proto->pr_usrreqs->pru_abort)(so);
291}
292
293int
294soaccept(so, nam)
295 register struct socket *so;
296 struct sockaddr **nam;
297{
298 int s = splnet();
299 int error;
300
301 if ((so->so_state & SS_NOFDREF) == 0)
302 panic("soaccept: !NOFDREF");
303 so->so_state &= ~SS_NOFDREF;
304 if ((so->so_state & SS_ISDISCONNECTED) == 0)
305 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
306 else {
307 if (nam)
308 *nam = 0;
309 error = 0;
310 }
311 splx(s);
312 return (error);
313}
314
315int
316soconnect(so, nam, p)
317 register struct socket *so;
318 struct sockaddr *nam;
319 struct proc *p;
320{
321 int s;
322 int error;
323
324 if (so->so_options & SO_ACCEPTCONN)
325 return (EOPNOTSUPP);
326 s = splnet();
327 /*
328 * If protocol is connection-based, can only connect once.
329 * Otherwise, if connected, try to disconnect first.
330 * This allows user to disconnect by connecting to, e.g.,
331 * a null address.
332 */
333 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
334 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
335 (error = sodisconnect(so))))
336 error = EISCONN;
337 else
338 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
339 splx(s);
340 return (error);
341}
342
343int
344soconnect2(so1, so2)
345 register struct socket *so1;
346 struct socket *so2;
347{
348 int s = splnet();
349 int error;
350
351 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
352 splx(s);
353 return (error);
354}
355
356int
357sodisconnect(so)
358 register struct socket *so;
359{
360 int s = splnet();
361 int error;
362
363 if ((so->so_state & SS_ISCONNECTED) == 0) {
364 error = ENOTCONN;
365 goto bad;
366 }
367 if (so->so_state & SS_ISDISCONNECTING) {
368 error = EALREADY;
369 goto bad;
370 }
371 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
372bad:
373 splx(s);
374 return (error);
375}
376
377#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
378/*
379 * Send on a socket.
380 * If send must go all at once and message is larger than
381 * send buffering, then hard error.
382 * Lock against other senders.
383 * If must go all at once and not enough room now, then
384 * inform user that this would block and do nothing.
385 * Otherwise, if nonblocking, send as much as possible.
386 * The data to be sent is described by "uio" if nonzero,
387 * otherwise by the mbuf chain "top" (which must be null
388 * if uio is not). Data provided in mbuf chain must be small
389 * enough to send all at once.
390 *
391 * Returns nonzero on error, timeout or signal; callers
392 * must check for short counts if EINTR/ERESTART are returned.
393 * Data and control buffers are freed on return.
394 */
395int
396sosend(so, addr, uio, top, control, flags, p)
397 register struct socket *so;
398 struct sockaddr *addr;
399 struct uio *uio;
400 struct mbuf *top;
401 struct mbuf *control;
402 int flags;
403 struct proc *p;
404{
405 struct mbuf **mp;
406 register struct mbuf *m;
407 register long space, len, resid;
408 int clen = 0, error, s, dontroute, mlen;
409 int atomic = sosendallatonce(so) || top;
410
411 if (uio)
412 resid = uio->uio_resid;
413 else
414 resid = top->m_pkthdr.len;
415 /*
416 * In theory resid should be unsigned.
417 * However, space must be signed, as it might be less than 0
418 * if we over-committed, and we must use a signed comparison
419 * of space and resid. On the other hand, a negative resid
420 * causes us to loop sending 0-length segments to the protocol.
421 *
422 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
423 * type sockets since that's an error.
424 */
425 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
426 error = EINVAL;
427 goto out;
428 }
429
430 dontroute =
431 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
432 (so->so_proto->pr_flags & PR_ATOMIC);
433 if (p)
434 p->p_stats->p_ru.ru_msgsnd++;
435 if (control)
436 clen = control->m_len;
437#define snderr(errno) { error = errno; splx(s); goto release; }
438
439restart:
440 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
441 if (error)
442 goto out;
443 do {
444 s = splnet();
445 if (so->so_state & SS_CANTSENDMORE)
446 snderr(EPIPE);
447 if (so->so_error) {
448 error = so->so_error;
449 so->so_error = 0;
450 splx(s);
451 goto release;
452 }
453 if ((so->so_state & SS_ISCONNECTED) == 0) {
454 /*
455 * `sendto' and `sendmsg' is allowed on a connection-
456 * based socket if it supports implied connect.
457 * Return ENOTCONN if not connected and no address is
458 * supplied.
459 */
460 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
461 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
462 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
463 !(resid == 0 && clen != 0))
464 snderr(ENOTCONN);
465 } else if (addr == 0)
466 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
467 ENOTCONN : EDESTADDRREQ);
468 }
469 space = sbspace(&so->so_snd);
470 if (flags & MSG_OOB)
471 space += 1024;
472 if ((atomic && resid > so->so_snd.sb_hiwat) ||
473 clen > so->so_snd.sb_hiwat)
474 snderr(EMSGSIZE);
475 if (space < resid + clen && uio &&
476 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
477 if (so->so_state & SS_NBIO)
478 snderr(EWOULDBLOCK);
479 sbunlock(&so->so_snd);
480 error = sbwait(&so->so_snd);
481 splx(s);
482 if (error)
483 goto out;
484 goto restart;
485 }
486 splx(s);
487 mp = &top;
488 space -= clen;
489 do {
490 if (uio == NULL) {
491 /*
492 * Data is prepackaged in "top".
493 */
494 resid = 0;
495 if (flags & MSG_EOR)
496 top->m_flags |= M_EOR;
497 } else do {
498 if (top == 0) {
499 MGETHDR(m, M_WAIT, MT_DATA);
500 mlen = MHLEN;
501 m->m_pkthdr.len = 0;
502 m->m_pkthdr.rcvif = (struct ifnet *)0;
503 } else {
504 MGET(m, M_WAIT, MT_DATA);
505 mlen = MLEN;
506 }
507 if (resid >= MINCLSIZE) {
508 MCLGET(m, M_WAIT);
509 if ((m->m_flags & M_EXT) == 0)
510 goto nopages;
511 mlen = MCLBYTES;
512 len = min(min(mlen, resid), space);
513 } else {
514nopages:
515 len = min(min(mlen, resid), space);
516 /*
517 * For datagram protocols, leave room
518 * for protocol headers in first mbuf.
519 */
520 if (atomic && top == 0 && len < mlen)
521 MH_ALIGN(m, len);
522 }
523 space -= len;
524 error = uiomove(mtod(m, caddr_t), (int)len, uio);
525 resid = uio->uio_resid;
526 m->m_len = len;
527 *mp = m;
528 top->m_pkthdr.len += len;
529 if (error)
530 goto release;
531 mp = &m->m_next;
532 if (resid <= 0) {
533 if (flags & MSG_EOR)
534 top->m_flags |= M_EOR;
535 break;
536 }
537 } while (space > 0 && atomic);
538 if (dontroute)
539 so->so_options |= SO_DONTROUTE;
540 s = splnet(); /* XXX */
541 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
542 (flags & MSG_OOB) ? PRUS_OOB :
543 /*
544 * If the user set MSG_EOF, the protocol
545 * understands this flag and nothing left to
546 * send then use PRU_SEND_EOF instead of PRU_SEND.
547 */
548 ((flags & MSG_EOF) &&
549 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
550 (resid <= 0)) ?
551 PRUS_EOF :
552 /* If there is more to send set PRUS_MORETOCOME */
553 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
554 top, addr, control, p);
555 splx(s);
556 if (dontroute)
557 so->so_options &= ~SO_DONTROUTE;
558 clen = 0;
559 control = 0;
560 top = 0;
561 mp = &top;
562 if (error)
563 goto release;
564 } while (resid && space > 0);
565 } while (resid);
566
567release:
568 sbunlock(&so->so_snd);
569out:
570 if (top)
571 m_freem(top);
572 if (control)
573 m_freem(control);
574 return (error);
575}
576
577/*
578 * Implement receive operations on a socket.
579 * We depend on the way that records are added to the sockbuf
580 * by sbappend*. In particular, each record (mbufs linked through m_next)
581 * must begin with an address if the protocol so specifies,
582 * followed by an optional mbuf or mbufs containing ancillary data,
583 * and then zero or more mbufs of data.
584 * In order to avoid blocking network interrupts for the entire time here,
585 * we splx() while doing the actual copy to user space.
586 * Although the sockbuf is locked, new data may still be appended,
587 * and thus we must maintain consistency of the sockbuf during that time.
588 *
589 * The caller may receive the data as a single mbuf chain by supplying
590 * an mbuf **mp0 for use in returning the chain. The uio is then used
591 * only for the count in uio_resid.
592 */
593int
594soreceive(so, psa, uio, mp0, controlp, flagsp)
595 register struct socket *so;
596 struct sockaddr **psa;
597 struct uio *uio;
598 struct mbuf **mp0;
599 struct mbuf **controlp;
600 int *flagsp;
601{
602 register struct mbuf *m, **mp;
603 register int flags, len, error, s, offset;
604 struct protosw *pr = so->so_proto;
605 struct mbuf *nextrecord;
606 int moff, type = 0;
607 int orig_resid = uio->uio_resid;
608
609 mp = mp0;
610 if (psa)
611 *psa = 0;
612 if (controlp)
613 *controlp = 0;
614 if (flagsp)
615 flags = *flagsp &~ MSG_EOR;
616 else
617 flags = 0;
618 if (flags & MSG_OOB) {
619 m = m_get(M_WAIT, MT_DATA);
620 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
621 if (error)
622 goto bad;
623 do {
624 error = uiomove(mtod(m, caddr_t),
625 (int) min(uio->uio_resid, m->m_len), uio);
626 m = m_free(m);
627 } while (uio->uio_resid && error == 0 && m);
628bad:
629 if (m)
630 m_freem(m);
631 return (error);
632 }
633 if (mp)
634 *mp = (struct mbuf *)0;
635 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
636 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
637
638restart:
639 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
640 if (error)
641 return (error);
642 s = splnet();
643
644 m = so->so_rcv.sb_mb;
645 /*
646 * If we have less data than requested, block awaiting more
647 * (subject to any timeout) if:
648 * 1. the current count is less than the low water mark, or
649 * 2. MSG_WAITALL is set, and it is possible to do the entire
650 * receive operation at once if we block (resid <= hiwat).
651 * 3. MSG_DONTWAIT is not set
652 * If MSG_WAITALL is set but resid is larger than the receive buffer,
653 * we have to do the receive in sections, and thus risk returning
654 * a short count if a timeout or signal occurs after we start.
655 */
656 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
657 so->so_rcv.sb_cc < uio->uio_resid) &&
658 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
659 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
660 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
661 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
662 if (so->so_error) {
663 if (m)
664 goto dontblock;
665 error = so->so_error;
666 if ((flags & MSG_PEEK) == 0)
667 so->so_error = 0;
668 goto release;
669 }
670 if (so->so_state & SS_CANTRCVMORE) {
671 if (m)
672 goto dontblock;
673 else
674 goto release;
675 }
676 for (; m; m = m->m_next)
677 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
678 m = so->so_rcv.sb_mb;
679 goto dontblock;
680 }
681 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
682 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
683 error = ENOTCONN;
684 goto release;
685 }
686 if (uio->uio_resid == 0)
687 goto release;
688 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
689 error = EWOULDBLOCK;
690 goto release;
691 }
692 sbunlock(&so->so_rcv);
693 error = sbwait(&so->so_rcv);
694 splx(s);
695 if (error)
696 return (error);
697 goto restart;
698 }
699dontblock:
700 if (uio->uio_procp)
701 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
702 nextrecord = m->m_nextpkt;
703 if (pr->pr_flags & PR_ADDR) {
704 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
705 orig_resid = 0;
706 if (psa)
707 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
708 mp0 == 0);
709 if (flags & MSG_PEEK) {
710 m = m->m_next;
711 } else {
712 sbfree(&so->so_rcv, m);
713 MFREE(m, so->so_rcv.sb_mb);
714 m = so->so_rcv.sb_mb;
715 }
716 }
717 while (m && m->m_type == MT_CONTROL && error == 0) {
718 if (flags & MSG_PEEK) {
719 if (controlp)
720 *controlp = m_copy(m, 0, m->m_len);
721 m = m->m_next;
722 } else {
723 sbfree(&so->so_rcv, m);
724 if (controlp) {
725 if (pr->pr_domain->dom_externalize &&
726 mtod(m, struct cmsghdr *)->cmsg_type ==
727 SCM_RIGHTS)
728 error = (*pr->pr_domain->dom_externalize)(m);
729 *controlp = m;
730 so->so_rcv.sb_mb = m->m_next;
731 m->m_next = 0;
732 m = so->so_rcv.sb_mb;
733 } else {
734 MFREE(m, so->so_rcv.sb_mb);
735 m = so->so_rcv.sb_mb;
736 }
737 }
738 if (controlp) {
739 orig_resid = 0;
740 controlp = &(*controlp)->m_next;
741 }
742 }
743 if (m) {
744 if ((flags & MSG_PEEK) == 0)
745 m->m_nextpkt = nextrecord;
746 type = m->m_type;
747 if (type == MT_OOBDATA)
748 flags |= MSG_OOB;
749 }
750 moff = 0;
751 offset = 0;
752 while (m && uio->uio_resid > 0 && error == 0) {
753 if (m->m_type == MT_OOBDATA) {
754 if (type != MT_OOBDATA)
755 break;
756 } else if (type == MT_OOBDATA)
757 break;
758 else
759 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
760 ("receive 3"));
761 so->so_state &= ~SS_RCVATMARK;
762 len = uio->uio_resid;
763 if (so->so_oobmark && len > so->so_oobmark - offset)
764 len = so->so_oobmark - offset;
765 if (len > m->m_len - moff)
766 len = m->m_len - moff;
767 /*
768 * If mp is set, just pass back the mbufs.
769 * Otherwise copy them out via the uio, then free.
770 * Sockbuf must be consistent here (points to current mbuf,
771 * it points to next record) when we drop priority;
772 * we must note any additions to the sockbuf when we
773 * block interrupts again.
774 */
775 if (mp == 0) {
776 splx(s);
777 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
778 s = splnet();
779 if (error)
780 goto release;
781 } else
782 uio->uio_resid -= len;
783 if (len == m->m_len - moff) {
784 if (m->m_flags & M_EOR)
785 flags |= MSG_EOR;
786 if (flags & MSG_PEEK) {
787 m = m->m_next;
788 moff = 0;
789 } else {
790 nextrecord = m->m_nextpkt;
791 sbfree(&so->so_rcv, m);
792 if (mp) {
793 *mp = m;
794 mp = &m->m_next;
795 so->so_rcv.sb_mb = m = m->m_next;
796 *mp = (struct mbuf *)0;
797 } else {
798 MFREE(m, so->so_rcv.sb_mb);
799 m = so->so_rcv.sb_mb;
800 }
801 if (m)
802 m->m_nextpkt = nextrecord;
803 }
804 } else {
805 if (flags & MSG_PEEK)
806 moff += len;
807 else {
808 if (mp)
809 *mp = m_copym(m, 0, len, M_WAIT);
810 m->m_data += len;
811 m->m_len -= len;
812 so->so_rcv.sb_cc -= len;
813 }
814 }
815 if (so->so_oobmark) {
816 if ((flags & MSG_PEEK) == 0) {
817 so->so_oobmark -= len;
818 if (so->so_oobmark == 0) {
819 so->so_state |= SS_RCVATMARK;
820 break;
821 }
822 } else {
823 offset += len;
824 if (offset == so->so_oobmark)
825 break;
826 }
827 }
828 if (flags & MSG_EOR)
829 break;
830 /*
831 * If the MSG_WAITALL flag is set (for non-atomic socket),
832 * we must not quit until "uio->uio_resid == 0" or an error
833 * termination. If a signal/timeout occurs, return
834 * with a short count but without error.
835 * Keep sockbuf locked against other readers.
836 */
837 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
838 !sosendallatonce(so) && !nextrecord) {
839 if (so->so_error || so->so_state & SS_CANTRCVMORE)
840 break;
841 error = sbwait(&so->so_rcv);
842 if (error) {
843 sbunlock(&so->so_rcv);
844 splx(s);
845 return (0);
846 }
847 m = so->so_rcv.sb_mb;
848 if (m)
849 nextrecord = m->m_nextpkt;
850 }
851 }
852
853 if (m && pr->pr_flags & PR_ATOMIC) {
854 flags |= MSG_TRUNC;
855 if ((flags & MSG_PEEK) == 0)
856 (void) sbdroprecord(&so->so_rcv);
857 }
858 if ((flags & MSG_PEEK) == 0) {
859 if (m == 0)
860 so->so_rcv.sb_mb = nextrecord;
861 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
862 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
863 }
864 if (orig_resid == uio->uio_resid && orig_resid &&
865 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
866 sbunlock(&so->so_rcv);
867 splx(s);
868 goto restart;
869 }
870
871 if (flagsp)
872 *flagsp |= flags;
873release:
874 sbunlock(&so->so_rcv);
875 splx(s);
876 return (error);
877}
878
879int
880soshutdown(so, how)
881 register struct socket *so;
882 register int how;
883{
884 register struct protosw *pr = so->so_proto;
885
886 how++;
887 if (how & FREAD)
888 sorflush(so);
889 if (how & FWRITE)
890 return ((*pr->pr_usrreqs->pru_shutdown)(so));
891 return (0);
892}
893
894void
895sorflush(so)
896 register struct socket *so;
897{
898 register struct sockbuf *sb = &so->so_rcv;
899 register struct protosw *pr = so->so_proto;
900 register int s;
901 struct sockbuf asb;
902
903 sb->sb_flags |= SB_NOINTR;
904 (void) sblock(sb, M_WAITOK);
905 s = splimp();
906 socantrcvmore(so);
907 sbunlock(sb);
908 asb = *sb;
909 bzero((caddr_t)sb, sizeof (*sb));
910 splx(s);
911 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
912 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
913 sbrelease(&asb);
914}
915
916/*
917 * Perhaps this routine, and sooptcopyout(), below, ought to come in
918 * an additional variant to handle the case where the option value needs
919 * to be some kind of integer, but not a specific size.
920 * In addition to their use here, these functions are also called by the
921 * protocol-level pr_ctloutput() routines.
922 */
923int
924sooptcopyin(sopt, buf, len, minlen)
925 struct sockopt *sopt;
926 void *buf;
927 size_t len;
928 size_t minlen;
929{
930 size_t valsize;
931
932 /*
933 * If the user gives us more than we wanted, we ignore it,
934 * but if we don't get the minimum length the caller
935 * wants, we return EINVAL. On success, sopt->sopt_valsize
936 * is set to however much we actually retrieved.
937 */
938 if ((valsize = sopt->sopt_valsize) < minlen)
939 return EINVAL;
940 if (valsize > len)
941 sopt->sopt_valsize = valsize = len;
942
943 if (sopt->sopt_p != 0)
944 return (copyin(sopt->sopt_val, buf, valsize));
945
946 bcopy(sopt->sopt_val, buf, valsize);
947 return 0;
948}
949
950int
951sosetopt(so, sopt)
952 struct socket *so;
953 struct sockopt *sopt;
954{
955 int error, optval;
956 struct linger l;
957 struct timeval tv;
958 short val;
959
960 error = 0;
961 if (sopt->sopt_level != SOL_SOCKET) {
962 if (so->so_proto && so->so_proto->pr_ctloutput)
963 return ((*so->so_proto->pr_ctloutput)
964 (so, sopt));
965 error = ENOPROTOOPT;
966 } else {
967 switch (sopt->sopt_name) {
968 case SO_LINGER:
969 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
970 if (error)
971 goto bad;
972
973 so->so_linger = l.l_linger;
974 if (l.l_onoff)
975 so->so_options |= SO_LINGER;
976 else
977 so->so_options &= ~SO_LINGER;
978 break;
979
980 case SO_DEBUG:
981 case SO_KEEPALIVE:
982 case SO_DONTROUTE:
983 case SO_USELOOPBACK:
984 case SO_BROADCAST:
985 case SO_REUSEADDR:
986 case SO_REUSEPORT:
987 case SO_OOBINLINE:
988 case SO_TIMESTAMP:
989 error = sooptcopyin(sopt, &optval, sizeof optval,
990 sizeof optval);
991 if (error)
992 goto bad;
993 if (optval)
994 so->so_options |= sopt->sopt_name;
995 else
996 so->so_options &= ~sopt->sopt_name;
997 break;
998
999 case SO_SNDBUF:
1000 case SO_RCVBUF:
1001 case SO_SNDLOWAT:
1002 case SO_RCVLOWAT:
1003 error = sooptcopyin(sopt, &optval, sizeof optval,
1004 sizeof optval);
1005 if (error)
1006 goto bad;
1007
1008 /*
1009 * Values < 1 make no sense for any of these
1010 * options, so disallow them.
1011 */
1012 if (optval < 1) {
1013 error = EINVAL;
1014 goto bad;
1015 }
1016
1017 switch (sopt->sopt_name) {
1018 case SO_SNDBUF:
1019 case SO_RCVBUF:
1020 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
1021 &so->so_snd : &so->so_rcv,
1022 (u_long) optval) == 0) {
1023 error = ENOBUFS;
1024 goto bad;
1025 }
1026 break;
1027
1028 /*
1029 * Make sure the low-water is never greater than
1030 * the high-water.
1031 */
1032 case SO_SNDLOWAT:
1033 so->so_snd.sb_lowat =
1034 (optval > so->so_snd.sb_hiwat) ?
1035 so->so_snd.sb_hiwat : optval;
1036 break;
1037 case SO_RCVLOWAT:
1038 so->so_rcv.sb_lowat =
1039 (optval > so->so_rcv.sb_hiwat) ?
1040 so->so_rcv.sb_hiwat : optval;
1041 break;
1042 }
1043 break;
1044
1045 case SO_SNDTIMEO:
1046 case SO_RCVTIMEO:
1047 error = sooptcopyin(sopt, &tv, sizeof tv,
1048 sizeof tv);
1049 if (error)
1050 goto bad;
1051
1052 if (tv.tv_sec * hz + tv.tv_usec / tick > SHRT_MAX) {
1053 error = EDOM;
1054 goto bad;
1055 }
1056 val = tv.tv_sec * hz + tv.tv_usec / tick;
1057
1058 switch (sopt->sopt_name) {
1059 case SO_SNDTIMEO:
1060 so->so_snd.sb_timeo = val;
1061 break;
1062 case SO_RCVTIMEO:
1063 so->so_rcv.sb_timeo = val;
1064 break;
1065 }
1066 break;
1067
1068 default:
1069 error = ENOPROTOOPT;
1070 break;
1071 }
1072 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1073 (void) ((*so->so_proto->pr_ctloutput)
1074 (so, sopt));
1075 }
1076 }
1077bad:
1078 return (error);
1079}
1080
1081/* Helper routine for getsockopt */
1082int
1083sooptcopyout(sopt, buf, len)
1084 struct sockopt *sopt;
1085 void *buf;
1086 size_t len;
1087{
1088 int error;
1089 size_t valsize;
1090
1091 error = 0;
1092
1093 /*
1094 * Documented get behavior is that we always return a value,
1095 * possibly truncated to fit in the user's buffer.
1096 * Traditional behavior is that we always tell the user
1097 * precisely how much we copied, rather than something useful
1098 * like the total amount we had available for her.
1099 * Note that this interface is not idempotent; the entire answer must
1100 * generated ahead of time.
1101 */
1102 valsize = min(len, sopt->sopt_valsize);
1103 sopt->sopt_valsize = valsize;
1104 if (sopt->sopt_val != 0) {
1105 if (sopt->sopt_p != 0)
1106 error = copyout(buf, sopt->sopt_val, valsize);
1107 else
1108 bcopy(buf, sopt->sopt_val, valsize);
1109 }
1110 return error;
1111}
1112
1113int
1114sogetopt(so, sopt)
1115 struct socket *so;
1116 struct sockopt *sopt;
1117{
1118 int error, optval;
1119 struct linger l;
1120 struct timeval tv;
1121
1122 error = 0;
1123 if (sopt->sopt_level != SOL_SOCKET) {
1124 if (so->so_proto && so->so_proto->pr_ctloutput) {
1125 return ((*so->so_proto->pr_ctloutput)
1126 (so, sopt));
1127 } else
1128 return (ENOPROTOOPT);
1129 } else {
1130 switch (sopt->sopt_name) {
1131 case SO_LINGER:
1132 l.l_onoff = so->so_options & SO_LINGER;
1133 l.l_linger = so->so_linger;
1134 error = sooptcopyout(sopt, &l, sizeof l);
1135 break;
1136
1137 case SO_USELOOPBACK:
1138 case SO_DONTROUTE:
1139 case SO_DEBUG:
1140 case SO_KEEPALIVE:
1141 case SO_REUSEADDR:
1142 case SO_REUSEPORT:
1143 case SO_BROADCAST:
1144 case SO_OOBINLINE:
1145 case SO_TIMESTAMP:
1146 optval = so->so_options & sopt->sopt_name;
1147integer:
1148 error = sooptcopyout(sopt, &optval, sizeof optval);
1149 break;
1150
1151 case SO_TYPE:
1152 optval = so->so_type;
1153 goto integer;
1154
1155 case SO_ERROR:
1156 optval = so->so_error;
1157 so->so_error = 0;
1158 goto integer;
1159
1160 case SO_SNDBUF:
1161 optval = so->so_snd.sb_hiwat;
1162 goto integer;
1163
1164 case SO_RCVBUF:
1165 optval = so->so_rcv.sb_hiwat;
1166 goto integer;
1167
1168 case SO_SNDLOWAT:
1169 optval = so->so_snd.sb_lowat;
1170 goto integer;
1171
1172 case SO_RCVLOWAT:
1173 optval = so->so_rcv.sb_lowat;
1174 goto integer;
1175
1176 case SO_SNDTIMEO:
1177 case SO_RCVTIMEO:
1178 optval = (sopt->sopt_name == SO_SNDTIMEO ?
1179 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1180
1181 tv.tv_sec = optval / hz;
1182 tv.tv_usec = (optval % hz) * tick;
1183 error = sooptcopyout(sopt, &tv, sizeof tv);
1184 break;
1185
1186 default:
1187 error = ENOPROTOOPT;
1188 break;
1189 }
1190 return (error);
1191 }
1192}
1193
1194void
1195sohasoutofband(so)
1196 register struct socket *so;
1197{
1198 if (so->so_sigio != NULL)
1199 pgsigio(so->so_sigio, SIGURG, 0);
1200 selwakeup(&so->so_rcv.sb_sel);
1201}
1202
1203int
1204sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p)
1205{
1206 int revents = 0;
1207 int s = splnet();
1208
1209 if (events & (POLLIN | POLLRDNORM))
1210 if (soreadable(so))
1211 revents |= events & (POLLIN | POLLRDNORM);
1212
1213 if (events & (POLLOUT | POLLWRNORM))
1214 if (sowriteable(so))
1215 revents |= events & (POLLOUT | POLLWRNORM);
1216
1217 if (events & (POLLPRI | POLLRDBAND))
1218 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
1219 revents |= events & (POLLPRI | POLLRDBAND);
1220
1221 if (revents == 0) {
1222 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
1223 selrecord(p, &so->so_rcv.sb_sel);
1224 so->so_rcv.sb_flags |= SB_SEL;
1225 }
1226
1227 if (events & (POLLOUT | POLLWRNORM)) {
1228 selrecord(p, &so->so_snd.sb_sel);
1229 so->so_snd.sb_flags |= SB_SEL;
1230 }
1231 }
1232
1233 splx(s);
1234 return (revents);
1235}