uipc_socket.c revision 188146
1/*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2004 The FreeBSD Foundation 5 * Copyright (c) 2004-2008 Robert N. M. Watson 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35/* 36 * Comments on the socket life cycle: 37 * 38 * soalloc() sets of socket layer state for a socket, called only by 39 * socreate() and sonewconn(). Socket layer private. 40 * 41 * sodealloc() tears down socket layer state for a socket, called only by 42 * sofree() and sonewconn(). Socket layer private. 43 * 44 * pru_attach() associates protocol layer state with an allocated socket; 45 * called only once, may fail, aborting socket allocation. This is called 46 * from socreate() and sonewconn(). Socket layer private. 47 * 48 * pru_detach() disassociates protocol layer state from an attached socket, 49 * and will be called exactly once for sockets in which pru_attach() has 50 * been successfully called. If pru_attach() returned an error, 51 * pru_detach() will not be called. Socket layer private. 52 * 53 * pru_abort() and pru_close() notify the protocol layer that the last 54 * consumer of a socket is starting to tear down the socket, and that the 55 * protocol should terminate the connection. Historically, pru_abort() also 56 * detached protocol state from the socket state, but this is no longer the 57 * case. 58 * 59 * socreate() creates a socket and attaches protocol state. This is a public 60 * interface that may be used by socket layer consumers to create new 61 * sockets. 62 * 63 * sonewconn() creates a socket and attaches protocol state. This is a 64 * public interface that may be used by protocols to create new sockets when 65 * a new connection is received and will be available for accept() on a 66 * listen socket. 67 * 68 * soclose() destroys a socket after possibly waiting for it to disconnect. 69 * This is a public interface that socket consumers should use to close and 70 * release a socket when done with it. 71 * 72 * soabort() destroys a socket without waiting for it to disconnect (used 73 * only for incoming connections that are already partially or fully 74 * connected). This is used internally by the socket layer when clearing 75 * listen socket queues (due to overflow or close on the listen socket), but 76 * is also a public interface protocols may use to abort connections in 77 * their incomplete listen queues should they no longer be required. Sockets 78 * placed in completed connection listen queues should not be aborted for 79 * reasons described in the comment above the soclose() implementation. This 80 * is not a general purpose close routine, and except in the specific 81 * circumstances described here, should not be used. 82 * 83 * sofree() will free a socket and its protocol state if all references on 84 * the socket have been released, and is the public interface to attempt to 85 * free a socket when a reference is removed. This is a socket layer private 86 * interface. 87 * 88 * NOTE: In addition to socreate() and soclose(), which provide a single 89 * socket reference to the consumer to be managed as required, there are two 90 * calls to explicitly manage socket references, soref(), and sorele(). 91 * Currently, these are generally required only when transitioning a socket 92 * from a listen queue to a file descriptor, in order to prevent garbage 93 * collection of the socket at an untimely moment. For a number of reasons, 94 * these interfaces are not preferred, and should be avoided. 95 */ 96 97#include <sys/cdefs.h> 98__FBSDID("$FreeBSD: head/sys/kern/uipc_socket.c 188146 2009-02-05 14:15:18Z jamie $"); 99 100#include "opt_inet.h" 101#include "opt_inet6.h" 102#include "opt_mac.h" 103#include "opt_zero.h" 104#include "opt_compat.h" 105 106#include <sys/param.h> 107#include <sys/systm.h> 108#include <sys/fcntl.h> 109#include <sys/limits.h> 110#include <sys/lock.h> 111#include <sys/mac.h> 112#include <sys/malloc.h> 113#include <sys/mbuf.h> 114#include <sys/mutex.h> 115#include <sys/domain.h> 116#include <sys/file.h> /* for struct knote */ 117#include <sys/kernel.h> 118#include <sys/event.h> 119#include <sys/eventhandler.h> 120#include <sys/poll.h> 121#include <sys/proc.h> 122#include <sys/protosw.h> 123#include <sys/socket.h> 124#include <sys/socketvar.h> 125#include <sys/resourcevar.h> 126#include <net/route.h> 127#include <sys/signalvar.h> 128#include <sys/stat.h> 129#include <sys/sx.h> 130#include <sys/sysctl.h> 131#include <sys/uio.h> 132#include <sys/jail.h> 133 134#include <security/mac/mac_framework.h> 135 136#include <vm/uma.h> 137 138#ifdef COMPAT_IA32 139#include <sys/mount.h> 140#include <sys/sysent.h> 141#include <compat/freebsd32/freebsd32.h> 142#endif 143 144static int soreceive_rcvoob(struct socket *so, struct uio *uio, 145 int flags); 146 147static void filt_sordetach(struct knote *kn); 148static int filt_soread(struct knote *kn, long hint); 149static void filt_sowdetach(struct knote *kn); 150static int filt_sowrite(struct knote *kn, long hint); 151static int filt_solisten(struct knote *kn, long hint); 152 153static struct filterops solisten_filtops = 154 { 1, NULL, filt_sordetach, filt_solisten }; 155static struct filterops soread_filtops = 156 { 1, NULL, filt_sordetach, filt_soread }; 157static struct filterops sowrite_filtops = 158 { 1, NULL, filt_sowdetach, filt_sowrite }; 159 160uma_zone_t socket_zone; 161so_gen_t so_gencnt; /* generation count for sockets */ 162 163int maxsockets; 164 165MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 166MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 167 168static int somaxconn = SOMAXCONN; 169static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS); 170/* XXX: we dont have SYSCTL_USHORT */ 171SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT | CTLFLAG_RW, 172 0, sizeof(int), sysctl_somaxconn, "I", "Maximum pending socket connection " 173 "queue size"); 174static int numopensockets; 175SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD, 176 &numopensockets, 0, "Number of open sockets"); 177#ifdef ZERO_COPY_SOCKETS 178/* These aren't static because they're used in other files. */ 179int so_zero_copy_send = 1; 180int so_zero_copy_receive = 1; 181SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0, 182 "Zero copy controls"); 183SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW, 184 &so_zero_copy_receive, 0, "Enable zero copy receive"); 185SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW, 186 &so_zero_copy_send, 0, "Enable zero copy send"); 187#endif /* ZERO_COPY_SOCKETS */ 188 189/* 190 * accept_mtx locks down per-socket fields relating to accept queues. See 191 * socketvar.h for an annotation of the protected fields of struct socket. 192 */ 193struct mtx accept_mtx; 194MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF); 195 196/* 197 * so_global_mtx protects so_gencnt, numopensockets, and the per-socket 198 * so_gencnt field. 199 */ 200static struct mtx so_global_mtx; 201MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF); 202 203/* 204 * General IPC sysctl name space, used by sockets and a variety of other IPC 205 * types. 206 */ 207SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC"); 208 209/* 210 * Sysctl to get and set the maximum global sockets limit. Notify protocols 211 * of the change so that they can update their dependent limits as required. 212 */ 213static int 214sysctl_maxsockets(SYSCTL_HANDLER_ARGS) 215{ 216 int error, newmaxsockets; 217 218 newmaxsockets = maxsockets; 219 error = sysctl_handle_int(oidp, &newmaxsockets, 0, req); 220 if (error == 0 && req->newptr) { 221 if (newmaxsockets > maxsockets) { 222 maxsockets = newmaxsockets; 223 if (maxsockets > ((maxfiles / 4) * 3)) { 224 maxfiles = (maxsockets * 5) / 4; 225 maxfilesperproc = (maxfiles * 9) / 10; 226 } 227 EVENTHANDLER_INVOKE(maxsockets_change); 228 } else 229 error = EINVAL; 230 } 231 return (error); 232} 233 234SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW, 235 &maxsockets, 0, sysctl_maxsockets, "IU", 236 "Maximum number of sockets avaliable"); 237 238/* 239 * Initialise maxsockets. This SYSINIT must be run after 240 * tunable_mbinit(). 241 */ 242static void 243init_maxsockets(void *ignored) 244{ 245 246 TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); 247 maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters)); 248} 249SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL); 250 251/* 252 * Socket operation routines. These routines are called by the routines in 253 * sys_socket.c or from a system process, and implement the semantics of 254 * socket operations by switching out to the protocol specific routines. 255 */ 256 257/* 258 * Get a socket structure from our zone, and initialize it. Note that it 259 * would probably be better to allocate socket and PCB at the same time, but 260 * I'm not convinced that all the protocols can be easily modified to do 261 * this. 262 * 263 * soalloc() returns a socket with a ref count of 0. 264 */ 265static struct socket * 266soalloc(void) 267{ 268 struct socket *so; 269 270 so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO); 271 if (so == NULL) 272 return (NULL); 273#ifdef MAC 274 if (mac_socket_init(so, M_NOWAIT) != 0) { 275 uma_zfree(socket_zone, so); 276 return (NULL); 277 } 278#endif 279 SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd"); 280 SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv"); 281 sx_init(&so->so_snd.sb_sx, "so_snd_sx"); 282 sx_init(&so->so_rcv.sb_sx, "so_rcv_sx"); 283 TAILQ_INIT(&so->so_aiojobq); 284 mtx_lock(&so_global_mtx); 285 so->so_gencnt = ++so_gencnt; 286 ++numopensockets; 287 mtx_unlock(&so_global_mtx); 288 return (so); 289} 290 291/* 292 * Free the storage associated with a socket at the socket layer, tear down 293 * locks, labels, etc. All protocol state is assumed already to have been 294 * torn down (and possibly never set up) by the caller. 295 */ 296static void 297sodealloc(struct socket *so) 298{ 299 300 KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count)); 301 KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL")); 302 303 mtx_lock(&so_global_mtx); 304 so->so_gencnt = ++so_gencnt; 305 --numopensockets; /* Could be below, but faster here. */ 306 mtx_unlock(&so_global_mtx); 307 if (so->so_rcv.sb_hiwat) 308 (void)chgsbsize(so->so_cred->cr_uidinfo, 309 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); 310 if (so->so_snd.sb_hiwat) 311 (void)chgsbsize(so->so_cred->cr_uidinfo, 312 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); 313#ifdef INET 314 /* remove acccept filter if one is present. */ 315 if (so->so_accf != NULL) 316 do_setopt_accept_filter(so, NULL); 317#endif 318#ifdef MAC 319 mac_socket_destroy(so); 320#endif 321 crfree(so->so_cred); 322 sx_destroy(&so->so_snd.sb_sx); 323 sx_destroy(&so->so_rcv.sb_sx); 324 SOCKBUF_LOCK_DESTROY(&so->so_snd); 325 SOCKBUF_LOCK_DESTROY(&so->so_rcv); 326 uma_zfree(socket_zone, so); 327} 328 329/* 330 * socreate returns a socket with a ref count of 1. The socket should be 331 * closed with soclose(). 332 */ 333int 334socreate(int dom, struct socket **aso, int type, int proto, 335 struct ucred *cred, struct thread *td) 336{ 337 struct protosw *prp; 338 struct socket *so; 339 int error; 340 341 if (proto) 342 prp = pffindproto(dom, proto, type); 343 else 344 prp = pffindtype(dom, type); 345 346 if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL || 347 prp->pr_usrreqs->pru_attach == pru_attach_notsupp) 348 return (EPROTONOSUPPORT); 349 350 if (prison_check_af(cred, prp->pr_domain->dom_family) != 0) 351 return (EPROTONOSUPPORT); 352 353 if (prp->pr_type != type) 354 return (EPROTOTYPE); 355 so = soalloc(); 356 if (so == NULL) 357 return (ENOBUFS); 358 359 TAILQ_INIT(&so->so_incomp); 360 TAILQ_INIT(&so->so_comp); 361 so->so_type = type; 362 so->so_cred = crhold(cred); 363 if ((prp->pr_domain->dom_family == PF_INET) || 364 (prp->pr_domain->dom_family == PF_ROUTE)) 365 so->so_fibnum = td->td_proc->p_fibnum; 366 else 367 so->so_fibnum = 0; 368 so->so_proto = prp; 369#ifdef MAC 370 mac_socket_create(cred, so); 371#endif 372 knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv), 373 NULL, NULL, NULL); 374 knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd), 375 NULL, NULL, NULL); 376 so->so_count = 1; 377 /* 378 * Auto-sizing of socket buffers is managed by the protocols and 379 * the appropriate flags must be set in the pru_attach function. 380 */ 381 error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); 382 if (error) { 383 KASSERT(so->so_count == 1, ("socreate: so_count %d", 384 so->so_count)); 385 so->so_count = 0; 386 sodealloc(so); 387 return (error); 388 } 389 *aso = so; 390 return (0); 391} 392 393#ifdef REGRESSION 394static int regression_sonewconn_earlytest = 1; 395SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW, 396 ®ression_sonewconn_earlytest, 0, "Perform early sonewconn limit test"); 397#endif 398 399/* 400 * When an attempt at a new connection is noted on a socket which accepts 401 * connections, sonewconn is called. If the connection is possible (subject 402 * to space constraints, etc.) then we allocate a new structure, propoerly 403 * linked into the data structure of the original socket, and return this. 404 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 405 * 406 * Note: the ref count on the socket is 0 on return. 407 */ 408struct socket * 409sonewconn(struct socket *head, int connstatus) 410{ 411 struct socket *so; 412 int over; 413 414 ACCEPT_LOCK(); 415 over = (head->so_qlen > 3 * head->so_qlimit / 2); 416 ACCEPT_UNLOCK(); 417#ifdef REGRESSION 418 if (regression_sonewconn_earlytest && over) 419#else 420 if (over) 421#endif 422 return (NULL); 423 so = soalloc(); 424 if (so == NULL) 425 return (NULL); 426 if ((head->so_options & SO_ACCEPTFILTER) != 0) 427 connstatus = 0; 428 so->so_head = head; 429 so->so_type = head->so_type; 430 so->so_options = head->so_options &~ SO_ACCEPTCONN; 431 so->so_linger = head->so_linger; 432 so->so_state = head->so_state | SS_NOFDREF; 433 so->so_proto = head->so_proto; 434 so->so_cred = crhold(head->so_cred); 435#ifdef MAC 436 SOCK_LOCK(head); 437 mac_socket_newconn(head, so); 438 SOCK_UNLOCK(head); 439#endif 440 knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv), 441 NULL, NULL, NULL); 442 knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd), 443 NULL, NULL, NULL); 444 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || 445 (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { 446 sodealloc(so); 447 return (NULL); 448 } 449 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 450 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 451 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 452 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 453 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; 454 so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; 455 so->so_state |= connstatus; 456 ACCEPT_LOCK(); 457 if (connstatus) { 458 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); 459 so->so_qstate |= SQ_COMP; 460 head->so_qlen++; 461 } else { 462 /* 463 * Keep removing sockets from the head until there's room for 464 * us to insert on the tail. In pre-locking revisions, this 465 * was a simple if(), but as we could be racing with other 466 * threads and soabort() requires dropping locks, we must 467 * loop waiting for the condition to be true. 468 */ 469 while (head->so_incqlen > head->so_qlimit) { 470 struct socket *sp; 471 sp = TAILQ_FIRST(&head->so_incomp); 472 TAILQ_REMOVE(&head->so_incomp, sp, so_list); 473 head->so_incqlen--; 474 sp->so_qstate &= ~SQ_INCOMP; 475 sp->so_head = NULL; 476 ACCEPT_UNLOCK(); 477 soabort(sp); 478 ACCEPT_LOCK(); 479 } 480 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list); 481 so->so_qstate |= SQ_INCOMP; 482 head->so_incqlen++; 483 } 484 ACCEPT_UNLOCK(); 485 if (connstatus) { 486 sorwakeup(head); 487 wakeup_one(&head->so_timeo); 488 } 489 return (so); 490} 491 492int 493sobind(struct socket *so, struct sockaddr *nam, struct thread *td) 494{ 495 496 return ((*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td)); 497} 498 499/* 500 * solisten() transitions a socket from a non-listening state to a listening 501 * state, but can also be used to update the listen queue depth on an 502 * existing listen socket. The protocol will call back into the sockets 503 * layer using solisten_proto_check() and solisten_proto() to check and set 504 * socket-layer listen state. Call backs are used so that the protocol can 505 * acquire both protocol and socket layer locks in whatever order is required 506 * by the protocol. 507 * 508 * Protocol implementors are advised to hold the socket lock across the 509 * socket-layer test and set to avoid races at the socket layer. 510 */ 511int 512solisten(struct socket *so, int backlog, struct thread *td) 513{ 514 515 return ((*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td)); 516} 517 518int 519solisten_proto_check(struct socket *so) 520{ 521 522 SOCK_LOCK_ASSERT(so); 523 524 if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | 525 SS_ISDISCONNECTING)) 526 return (EINVAL); 527 return (0); 528} 529 530void 531solisten_proto(struct socket *so, int backlog) 532{ 533 534 SOCK_LOCK_ASSERT(so); 535 536 if (backlog < 0 || backlog > somaxconn) 537 backlog = somaxconn; 538 so->so_qlimit = backlog; 539 so->so_options |= SO_ACCEPTCONN; 540} 541 542/* 543 * Attempt to free a socket. This should really be sotryfree(). 544 * 545 * sofree() will succeed if: 546 * 547 * - There are no outstanding file descriptor references or related consumers 548 * (so_count == 0). 549 * 550 * - The socket has been closed by user space, if ever open (SS_NOFDREF). 551 * 552 * - The protocol does not have an outstanding strong reference on the socket 553 * (SS_PROTOREF). 554 * 555 * - The socket is not in a completed connection queue, so a process has been 556 * notified that it is present. If it is removed, the user process may 557 * block in accept() despite select() saying the socket was ready. 558 * 559 * Otherwise, it will quietly abort so that a future call to sofree(), when 560 * conditions are right, can succeed. 561 */ 562void 563sofree(struct socket *so) 564{ 565 struct protosw *pr = so->so_proto; 566 struct socket *head; 567 568 ACCEPT_LOCK_ASSERT(); 569 SOCK_LOCK_ASSERT(so); 570 571 if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 || 572 (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)) { 573 SOCK_UNLOCK(so); 574 ACCEPT_UNLOCK(); 575 return; 576 } 577 578 head = so->so_head; 579 if (head != NULL) { 580 KASSERT((so->so_qstate & SQ_COMP) != 0 || 581 (so->so_qstate & SQ_INCOMP) != 0, 582 ("sofree: so_head != NULL, but neither SQ_COMP nor " 583 "SQ_INCOMP")); 584 KASSERT((so->so_qstate & SQ_COMP) == 0 || 585 (so->so_qstate & SQ_INCOMP) == 0, 586 ("sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP")); 587 TAILQ_REMOVE(&head->so_incomp, so, so_list); 588 head->so_incqlen--; 589 so->so_qstate &= ~SQ_INCOMP; 590 so->so_head = NULL; 591 } 592 KASSERT((so->so_qstate & SQ_COMP) == 0 && 593 (so->so_qstate & SQ_INCOMP) == 0, 594 ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)", 595 so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP)); 596 if (so->so_options & SO_ACCEPTCONN) { 597 KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated")); 598 KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_comp populated")); 599 } 600 SOCK_UNLOCK(so); 601 ACCEPT_UNLOCK(); 602 603 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) 604 (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb); 605 if (pr->pr_usrreqs->pru_detach != NULL) 606 (*pr->pr_usrreqs->pru_detach)(so); 607 608 /* 609 * From this point on, we assume that no other references to this 610 * socket exist anywhere else in the stack. Therefore, no locks need 611 * to be acquired or held. 612 * 613 * We used to do a lot of socket buffer and socket locking here, as 614 * well as invoke sorflush() and perform wakeups. The direct call to 615 * dom_dispose() and sbrelease_internal() are an inlining of what was 616 * necessary from sorflush(). 617 * 618 * Notice that the socket buffer and kqueue state are torn down 619 * before calling pru_detach. This means that protocols shold not 620 * assume they can perform socket wakeups, etc, in their detach code. 621 */ 622 sbdestroy(&so->so_snd, so); 623 sbdestroy(&so->so_rcv, so); 624 knlist_destroy(&so->so_rcv.sb_sel.si_note); 625 knlist_destroy(&so->so_snd.sb_sel.si_note); 626 sodealloc(so); 627} 628 629/* 630 * Close a socket on last file table reference removal. Initiate disconnect 631 * if connected. Free socket when disconnect complete. 632 * 633 * This function will sorele() the socket. Note that soclose() may be called 634 * prior to the ref count reaching zero. The actual socket structure will 635 * not be freed until the ref count reaches zero. 636 */ 637int 638soclose(struct socket *so) 639{ 640 int error = 0; 641 642 KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter")); 643 644 funsetown(&so->so_sigio); 645 if (so->so_state & SS_ISCONNECTED) { 646 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 647 error = sodisconnect(so); 648 if (error) 649 goto drop; 650 } 651 if (so->so_options & SO_LINGER) { 652 if ((so->so_state & SS_ISDISCONNECTING) && 653 (so->so_state & SS_NBIO)) 654 goto drop; 655 while (so->so_state & SS_ISCONNECTED) { 656 error = tsleep(&so->so_timeo, 657 PSOCK | PCATCH, "soclos", so->so_linger * hz); 658 if (error) 659 break; 660 } 661 } 662 } 663 664drop: 665 if (so->so_proto->pr_usrreqs->pru_close != NULL) 666 (*so->so_proto->pr_usrreqs->pru_close)(so); 667 if (so->so_options & SO_ACCEPTCONN) { 668 struct socket *sp; 669 ACCEPT_LOCK(); 670 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { 671 TAILQ_REMOVE(&so->so_incomp, sp, so_list); 672 so->so_incqlen--; 673 sp->so_qstate &= ~SQ_INCOMP; 674 sp->so_head = NULL; 675 ACCEPT_UNLOCK(); 676 soabort(sp); 677 ACCEPT_LOCK(); 678 } 679 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { 680 TAILQ_REMOVE(&so->so_comp, sp, so_list); 681 so->so_qlen--; 682 sp->so_qstate &= ~SQ_COMP; 683 sp->so_head = NULL; 684 ACCEPT_UNLOCK(); 685 soabort(sp); 686 ACCEPT_LOCK(); 687 } 688 ACCEPT_UNLOCK(); 689 } 690 ACCEPT_LOCK(); 691 SOCK_LOCK(so); 692 KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF")); 693 so->so_state |= SS_NOFDREF; 694 sorele(so); 695 return (error); 696} 697 698/* 699 * soabort() is used to abruptly tear down a connection, such as when a 700 * resource limit is reached (listen queue depth exceeded), or if a listen 701 * socket is closed while there are sockets waiting to be accepted. 702 * 703 * This interface is tricky, because it is called on an unreferenced socket, 704 * and must be called only by a thread that has actually removed the socket 705 * from the listen queue it was on, or races with other threads are risked. 706 * 707 * This interface will call into the protocol code, so must not be called 708 * with any socket locks held. Protocols do call it while holding their own 709 * recursible protocol mutexes, but this is something that should be subject 710 * to review in the future. 711 */ 712void 713soabort(struct socket *so) 714{ 715 716 /* 717 * In as much as is possible, assert that no references to this 718 * socket are held. This is not quite the same as asserting that the 719 * current thread is responsible for arranging for no references, but 720 * is as close as we can get for now. 721 */ 722 KASSERT(so->so_count == 0, ("soabort: so_count")); 723 KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF")); 724 KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF")); 725 KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP")); 726 KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP")); 727 728 if (so->so_proto->pr_usrreqs->pru_abort != NULL) 729 (*so->so_proto->pr_usrreqs->pru_abort)(so); 730 ACCEPT_LOCK(); 731 SOCK_LOCK(so); 732 sofree(so); 733} 734 735int 736soaccept(struct socket *so, struct sockaddr **nam) 737{ 738 int error; 739 740 SOCK_LOCK(so); 741 KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF")); 742 so->so_state &= ~SS_NOFDREF; 743 SOCK_UNLOCK(so); 744 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 745 return (error); 746} 747 748int 749soconnect(struct socket *so, struct sockaddr *nam, struct thread *td) 750{ 751 int error; 752 753 if (so->so_options & SO_ACCEPTCONN) 754 return (EOPNOTSUPP); 755 /* 756 * If protocol is connection-based, can only connect once. 757 * Otherwise, if connected, try to disconnect first. This allows 758 * user to disconnect by connecting to, e.g., a null address. 759 */ 760 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 761 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 762 (error = sodisconnect(so)))) { 763 error = EISCONN; 764 } else { 765 /* 766 * Prevent accumulated error from previous connection from 767 * biting us. 768 */ 769 so->so_error = 0; 770 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td); 771 } 772 773 return (error); 774} 775 776int 777soconnect2(struct socket *so1, struct socket *so2) 778{ 779 780 return ((*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2)); 781} 782 783int 784sodisconnect(struct socket *so) 785{ 786 int error; 787 788 if ((so->so_state & SS_ISCONNECTED) == 0) 789 return (ENOTCONN); 790 if (so->so_state & SS_ISDISCONNECTING) 791 return (EALREADY); 792 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 793 return (error); 794} 795 796#ifdef ZERO_COPY_SOCKETS 797struct so_zerocopy_stats{ 798 int size_ok; 799 int align_ok; 800 int found_ifp; 801}; 802struct so_zerocopy_stats so_zerocp_stats = {0,0,0}; 803#include <netinet/in.h> 804#include <net/route.h> 805#include <netinet/in_pcb.h> 806#include <vm/vm.h> 807#include <vm/vm_page.h> 808#include <vm/vm_object.h> 809 810/* 811 * sosend_copyin() is only used if zero copy sockets are enabled. Otherwise 812 * sosend_dgram() and sosend_generic() use m_uiotombuf(). 813 * 814 * sosend_copyin() accepts a uio and prepares an mbuf chain holding part or 815 * all of the data referenced by the uio. If desired, it uses zero-copy. 816 * *space will be updated to reflect data copied in. 817 * 818 * NB: If atomic I/O is requested, the caller must already have checked that 819 * space can hold resid bytes. 820 * 821 * NB: In the event of an error, the caller may need to free the partial 822 * chain pointed to by *mpp. The contents of both *uio and *space may be 823 * modified even in the case of an error. 824 */ 825static int 826sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, 827 int flags) 828{ 829 struct mbuf *m, **mp, *top; 830 long len, resid; 831 int error; 832#ifdef ZERO_COPY_SOCKETS 833 int cow_send; 834#endif 835 836 *retmp = top = NULL; 837 mp = ⊤ 838 len = 0; 839 resid = uio->uio_resid; 840 error = 0; 841 do { 842#ifdef ZERO_COPY_SOCKETS 843 cow_send = 0; 844#endif /* ZERO_COPY_SOCKETS */ 845 if (resid >= MINCLSIZE) { 846#ifdef ZERO_COPY_SOCKETS 847 if (top == NULL) { 848 m = m_gethdr(M_WAITOK, MT_DATA); 849 m->m_pkthdr.len = 0; 850 m->m_pkthdr.rcvif = NULL; 851 } else 852 m = m_get(M_WAITOK, MT_DATA); 853 if (so_zero_copy_send && 854 resid>=PAGE_SIZE && 855 *space>=PAGE_SIZE && 856 uio->uio_iov->iov_len>=PAGE_SIZE) { 857 so_zerocp_stats.size_ok++; 858 so_zerocp_stats.align_ok++; 859 cow_send = socow_setup(m, uio); 860 len = cow_send; 861 } 862 if (!cow_send) { 863 m_clget(m, M_WAITOK); 864 len = min(min(MCLBYTES, resid), *space); 865 } 866#else /* ZERO_COPY_SOCKETS */ 867 if (top == NULL) { 868 m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR); 869 m->m_pkthdr.len = 0; 870 m->m_pkthdr.rcvif = NULL; 871 } else 872 m = m_getcl(M_WAIT, MT_DATA, 0); 873 len = min(min(MCLBYTES, resid), *space); 874#endif /* ZERO_COPY_SOCKETS */ 875 } else { 876 if (top == NULL) { 877 m = m_gethdr(M_WAIT, MT_DATA); 878 m->m_pkthdr.len = 0; 879 m->m_pkthdr.rcvif = NULL; 880 881 len = min(min(MHLEN, resid), *space); 882 /* 883 * For datagram protocols, leave room 884 * for protocol headers in first mbuf. 885 */ 886 if (atomic && m && len < MHLEN) 887 MH_ALIGN(m, len); 888 } else { 889 m = m_get(M_WAIT, MT_DATA); 890 len = min(min(MLEN, resid), *space); 891 } 892 } 893 if (m == NULL) { 894 error = ENOBUFS; 895 goto out; 896 } 897 898 *space -= len; 899#ifdef ZERO_COPY_SOCKETS 900 if (cow_send) 901 error = 0; 902 else 903#endif /* ZERO_COPY_SOCKETS */ 904 error = uiomove(mtod(m, void *), (int)len, uio); 905 resid = uio->uio_resid; 906 m->m_len = len; 907 *mp = m; 908 top->m_pkthdr.len += len; 909 if (error) 910 goto out; 911 mp = &m->m_next; 912 if (resid <= 0) { 913 if (flags & MSG_EOR) 914 top->m_flags |= M_EOR; 915 break; 916 } 917 } while (*space > 0 && atomic); 918out: 919 *retmp = top; 920 return (error); 921} 922#endif /*ZERO_COPY_SOCKETS*/ 923 924#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) 925 926int 927sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, 928 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 929{ 930 long space, resid; 931 int clen = 0, error, dontroute; 932#ifdef ZERO_COPY_SOCKETS 933 int atomic = sosendallatonce(so) || top; 934#endif 935 936 KASSERT(so->so_type == SOCK_DGRAM, ("sodgram_send: !SOCK_DGRAM")); 937 KASSERT(so->so_proto->pr_flags & PR_ATOMIC, 938 ("sodgram_send: !PR_ATOMIC")); 939 940 if (uio != NULL) 941 resid = uio->uio_resid; 942 else 943 resid = top->m_pkthdr.len; 944 /* 945 * In theory resid should be unsigned. However, space must be 946 * signed, as it might be less than 0 if we over-committed, and we 947 * must use a signed comparison of space and resid. On the other 948 * hand, a negative resid causes us to loop sending 0-length 949 * segments to the protocol. 950 * 951 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 952 * type sockets since that's an error. 953 */ 954 if (resid < 0) { 955 error = EINVAL; 956 goto out; 957 } 958 959 dontroute = 960 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0; 961 if (td != NULL) 962 td->td_ru.ru_msgsnd++; 963 if (control != NULL) 964 clen = control->m_len; 965 966 SOCKBUF_LOCK(&so->so_snd); 967 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 968 SOCKBUF_UNLOCK(&so->so_snd); 969 error = EPIPE; 970 goto out; 971 } 972 if (so->so_error) { 973 error = so->so_error; 974 so->so_error = 0; 975 SOCKBUF_UNLOCK(&so->so_snd); 976 goto out; 977 } 978 if ((so->so_state & SS_ISCONNECTED) == 0) { 979 /* 980 * `sendto' and `sendmsg' is allowed on a connection-based 981 * socket if it supports implied connect. Return ENOTCONN if 982 * not connected and no address is supplied. 983 */ 984 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 985 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 986 if ((so->so_state & SS_ISCONFIRMING) == 0 && 987 !(resid == 0 && clen != 0)) { 988 SOCKBUF_UNLOCK(&so->so_snd); 989 error = ENOTCONN; 990 goto out; 991 } 992 } else if (addr == NULL) { 993 if (so->so_proto->pr_flags & PR_CONNREQUIRED) 994 error = ENOTCONN; 995 else 996 error = EDESTADDRREQ; 997 SOCKBUF_UNLOCK(&so->so_snd); 998 goto out; 999 } 1000 } 1001 1002 /* 1003 * Do we need MSG_OOB support in SOCK_DGRAM? Signs here may be a 1004 * problem and need fixing. 1005 */ 1006 space = sbspace(&so->so_snd); 1007 if (flags & MSG_OOB) 1008 space += 1024; 1009 space -= clen; 1010 SOCKBUF_UNLOCK(&so->so_snd); 1011 if (resid > space) { 1012 error = EMSGSIZE; 1013 goto out; 1014 } 1015 if (uio == NULL) { 1016 resid = 0; 1017 if (flags & MSG_EOR) 1018 top->m_flags |= M_EOR; 1019 } else { 1020#ifdef ZERO_COPY_SOCKETS 1021 error = sosend_copyin(uio, &top, atomic, &space, flags); 1022 if (error) 1023 goto out; 1024#else 1025 /* 1026 * Copy the data from userland into a mbuf chain. 1027 * If no data is to be copied in, a single empty mbuf 1028 * is returned. 1029 */ 1030 top = m_uiotombuf(uio, M_WAITOK, space, max_hdr, 1031 (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0))); 1032 if (top == NULL) { 1033 error = EFAULT; /* only possible error */ 1034 goto out; 1035 } 1036 space -= resid - uio->uio_resid; 1037#endif 1038 resid = uio->uio_resid; 1039 } 1040 KASSERT(resid == 0, ("sosend_dgram: resid != 0")); 1041 /* 1042 * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock 1043 * than with. 1044 */ 1045 if (dontroute) { 1046 SOCK_LOCK(so); 1047 so->so_options |= SO_DONTROUTE; 1048 SOCK_UNLOCK(so); 1049 } 1050 /* 1051 * XXX all the SBS_CANTSENDMORE checks previously done could be out 1052 * of date. We could have recieved a reset packet in an interrupt or 1053 * maybe we slept while doing page faults in uiomove() etc. We could 1054 * probably recheck again inside the locking protection here, but 1055 * there are probably other places that this also happens. We must 1056 * rethink this. 1057 */ 1058 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 1059 (flags & MSG_OOB) ? PRUS_OOB : 1060 /* 1061 * If the user set MSG_EOF, the protocol understands this flag and 1062 * nothing left to send then use PRU_SEND_EOF instead of PRU_SEND. 1063 */ 1064 ((flags & MSG_EOF) && 1065 (so->so_proto->pr_flags & PR_IMPLOPCL) && 1066 (resid <= 0)) ? 1067 PRUS_EOF : 1068 /* If there is more to send set PRUS_MORETOCOME */ 1069 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, 1070 top, addr, control, td); 1071 if (dontroute) { 1072 SOCK_LOCK(so); 1073 so->so_options &= ~SO_DONTROUTE; 1074 SOCK_UNLOCK(so); 1075 } 1076 clen = 0; 1077 control = NULL; 1078 top = NULL; 1079out: 1080 if (top != NULL) 1081 m_freem(top); 1082 if (control != NULL) 1083 m_freem(control); 1084 return (error); 1085} 1086 1087/* 1088 * Send on a socket. If send must go all at once and message is larger than 1089 * send buffering, then hard error. Lock against other senders. If must go 1090 * all at once and not enough room now, then inform user that this would 1091 * block and do nothing. Otherwise, if nonblocking, send as much as 1092 * possible. The data to be sent is described by "uio" if nonzero, otherwise 1093 * by the mbuf chain "top" (which must be null if uio is not). Data provided 1094 * in mbuf chain must be small enough to send all at once. 1095 * 1096 * Returns nonzero on error, timeout or signal; callers must check for short 1097 * counts if EINTR/ERESTART are returned. Data and control buffers are freed 1098 * on return. 1099 */ 1100int 1101sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, 1102 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 1103{ 1104 long space, resid; 1105 int clen = 0, error, dontroute; 1106 int atomic = sosendallatonce(so) || top; 1107 1108 if (uio != NULL) 1109 resid = uio->uio_resid; 1110 else 1111 resid = top->m_pkthdr.len; 1112 /* 1113 * In theory resid should be unsigned. However, space must be 1114 * signed, as it might be less than 0 if we over-committed, and we 1115 * must use a signed comparison of space and resid. On the other 1116 * hand, a negative resid causes us to loop sending 0-length 1117 * segments to the protocol. 1118 * 1119 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 1120 * type sockets since that's an error. 1121 */ 1122 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 1123 error = EINVAL; 1124 goto out; 1125 } 1126 1127 dontroute = 1128 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 1129 (so->so_proto->pr_flags & PR_ATOMIC); 1130 if (td != NULL) 1131 td->td_ru.ru_msgsnd++; 1132 if (control != NULL) 1133 clen = control->m_len; 1134 1135 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 1136 if (error) 1137 goto out; 1138 1139restart: 1140 do { 1141 SOCKBUF_LOCK(&so->so_snd); 1142 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 1143 SOCKBUF_UNLOCK(&so->so_snd); 1144 error = EPIPE; 1145 goto release; 1146 } 1147 if (so->so_error) { 1148 error = so->so_error; 1149 so->so_error = 0; 1150 SOCKBUF_UNLOCK(&so->so_snd); 1151 goto release; 1152 } 1153 if ((so->so_state & SS_ISCONNECTED) == 0) { 1154 /* 1155 * `sendto' and `sendmsg' is allowed on a connection- 1156 * based socket if it supports implied connect. 1157 * Return ENOTCONN if not connected and no address is 1158 * supplied. 1159 */ 1160 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 1161 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 1162 if ((so->so_state & SS_ISCONFIRMING) == 0 && 1163 !(resid == 0 && clen != 0)) { 1164 SOCKBUF_UNLOCK(&so->so_snd); 1165 error = ENOTCONN; 1166 goto release; 1167 } 1168 } else if (addr == NULL) { 1169 SOCKBUF_UNLOCK(&so->so_snd); 1170 if (so->so_proto->pr_flags & PR_CONNREQUIRED) 1171 error = ENOTCONN; 1172 else 1173 error = EDESTADDRREQ; 1174 goto release; 1175 } 1176 } 1177 space = sbspace(&so->so_snd); 1178 if (flags & MSG_OOB) 1179 space += 1024; 1180 if ((atomic && resid > so->so_snd.sb_hiwat) || 1181 clen > so->so_snd.sb_hiwat) { 1182 SOCKBUF_UNLOCK(&so->so_snd); 1183 error = EMSGSIZE; 1184 goto release; 1185 } 1186 if (space < resid + clen && 1187 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 1188 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) { 1189 SOCKBUF_UNLOCK(&so->so_snd); 1190 error = EWOULDBLOCK; 1191 goto release; 1192 } 1193 error = sbwait(&so->so_snd); 1194 SOCKBUF_UNLOCK(&so->so_snd); 1195 if (error) 1196 goto release; 1197 goto restart; 1198 } 1199 SOCKBUF_UNLOCK(&so->so_snd); 1200 space -= clen; 1201 do { 1202 if (uio == NULL) { 1203 resid = 0; 1204 if (flags & MSG_EOR) 1205 top->m_flags |= M_EOR; 1206 } else { 1207#ifdef ZERO_COPY_SOCKETS 1208 error = sosend_copyin(uio, &top, atomic, 1209 &space, flags); 1210 if (error != 0) 1211 goto release; 1212#else 1213 /* 1214 * Copy the data from userland into a mbuf 1215 * chain. If no data is to be copied in, 1216 * a single empty mbuf is returned. 1217 */ 1218 top = m_uiotombuf(uio, M_WAITOK, space, 1219 (atomic ? max_hdr : 0), 1220 (atomic ? M_PKTHDR : 0) | 1221 ((flags & MSG_EOR) ? M_EOR : 0)); 1222 if (top == NULL) { 1223 error = EFAULT; /* only possible error */ 1224 goto release; 1225 } 1226 space -= resid - uio->uio_resid; 1227#endif 1228 resid = uio->uio_resid; 1229 } 1230 if (dontroute) { 1231 SOCK_LOCK(so); 1232 so->so_options |= SO_DONTROUTE; 1233 SOCK_UNLOCK(so); 1234 } 1235 /* 1236 * XXX all the SBS_CANTSENDMORE checks previously 1237 * done could be out of date. We could have recieved 1238 * a reset packet in an interrupt or maybe we slept 1239 * while doing page faults in uiomove() etc. We 1240 * could probably recheck again inside the locking 1241 * protection here, but there are probably other 1242 * places that this also happens. We must rethink 1243 * this. 1244 */ 1245 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 1246 (flags & MSG_OOB) ? PRUS_OOB : 1247 /* 1248 * If the user set MSG_EOF, the protocol understands 1249 * this flag and nothing left to send then use 1250 * PRU_SEND_EOF instead of PRU_SEND. 1251 */ 1252 ((flags & MSG_EOF) && 1253 (so->so_proto->pr_flags & PR_IMPLOPCL) && 1254 (resid <= 0)) ? 1255 PRUS_EOF : 1256 /* If there is more to send set PRUS_MORETOCOME. */ 1257 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, 1258 top, addr, control, td); 1259 if (dontroute) { 1260 SOCK_LOCK(so); 1261 so->so_options &= ~SO_DONTROUTE; 1262 SOCK_UNLOCK(so); 1263 } 1264 clen = 0; 1265 control = NULL; 1266 top = NULL; 1267 if (error) 1268 goto release; 1269 } while (resid && space > 0); 1270 } while (resid); 1271 1272release: 1273 sbunlock(&so->so_snd); 1274out: 1275 if (top != NULL) 1276 m_freem(top); 1277 if (control != NULL) 1278 m_freem(control); 1279 return (error); 1280} 1281 1282int 1283sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, 1284 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 1285{ 1286 1287 return (so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top, 1288 control, flags, td)); 1289} 1290 1291/* 1292 * The part of soreceive() that implements reading non-inline out-of-band 1293 * data from a socket. For more complete comments, see soreceive(), from 1294 * which this code originated. 1295 * 1296 * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is 1297 * unable to return an mbuf chain to the caller. 1298 */ 1299static int 1300soreceive_rcvoob(struct socket *so, struct uio *uio, int flags) 1301{ 1302 struct protosw *pr = so->so_proto; 1303 struct mbuf *m; 1304 int error; 1305 1306 KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0")); 1307 1308 m = m_get(M_WAIT, MT_DATA); 1309 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 1310 if (error) 1311 goto bad; 1312 do { 1313#ifdef ZERO_COPY_SOCKETS 1314 if (so_zero_copy_receive) { 1315 int disposable; 1316 1317 if ((m->m_flags & M_EXT) 1318 && (m->m_ext.ext_type == EXT_DISPOSABLE)) 1319 disposable = 1; 1320 else 1321 disposable = 0; 1322 1323 error = uiomoveco(mtod(m, void *), 1324 min(uio->uio_resid, m->m_len), 1325 uio, disposable); 1326 } else 1327#endif /* ZERO_COPY_SOCKETS */ 1328 error = uiomove(mtod(m, void *), 1329 (int) min(uio->uio_resid, m->m_len), uio); 1330 m = m_free(m); 1331 } while (uio->uio_resid && error == 0 && m); 1332bad: 1333 if (m != NULL) 1334 m_freem(m); 1335 return (error); 1336} 1337 1338/* 1339 * Following replacement or removal of the first mbuf on the first mbuf chain 1340 * of a socket buffer, push necessary state changes back into the socket 1341 * buffer so that other consumers see the values consistently. 'nextrecord' 1342 * is the callers locally stored value of the original value of 1343 * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes. 1344 * NOTE: 'nextrecord' may be NULL. 1345 */ 1346static __inline void 1347sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord) 1348{ 1349 1350 SOCKBUF_LOCK_ASSERT(sb); 1351 /* 1352 * First, update for the new value of nextrecord. If necessary, make 1353 * it the first record. 1354 */ 1355 if (sb->sb_mb != NULL) 1356 sb->sb_mb->m_nextpkt = nextrecord; 1357 else 1358 sb->sb_mb = nextrecord; 1359 1360 /* 1361 * Now update any dependent socket buffer fields to reflect the new 1362 * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the 1363 * addition of a second clause that takes care of the case where 1364 * sb_mb has been updated, but remains the last record. 1365 */ 1366 if (sb->sb_mb == NULL) { 1367 sb->sb_mbtail = NULL; 1368 sb->sb_lastrecord = NULL; 1369 } else if (sb->sb_mb->m_nextpkt == NULL) 1370 sb->sb_lastrecord = sb->sb_mb; 1371} 1372 1373 1374/* 1375 * Implement receive operations on a socket. We depend on the way that 1376 * records are added to the sockbuf by sbappend. In particular, each record 1377 * (mbufs linked through m_next) must begin with an address if the protocol 1378 * so specifies, followed by an optional mbuf or mbufs containing ancillary 1379 * data, and then zero or more mbufs of data. In order to allow parallelism 1380 * between network receive and copying to user space, as well as avoid 1381 * sleeping with a mutex held, we release the socket buffer mutex during the 1382 * user space copy. Although the sockbuf is locked, new data may still be 1383 * appended, and thus we must maintain consistency of the sockbuf during that 1384 * time. 1385 * 1386 * The caller may receive the data as a single mbuf chain by supplying an 1387 * mbuf **mp0 for use in returning the chain. The uio is then used only for 1388 * the count in uio_resid. 1389 */ 1390int 1391soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, 1392 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 1393{ 1394 struct mbuf *m, **mp; 1395 int flags, len, error, offset; 1396 struct protosw *pr = so->so_proto; 1397 struct mbuf *nextrecord; 1398 int moff, type = 0; 1399 int orig_resid = uio->uio_resid; 1400 1401 mp = mp0; 1402 if (psa != NULL) 1403 *psa = NULL; 1404 if (controlp != NULL) 1405 *controlp = NULL; 1406 if (flagsp != NULL) 1407 flags = *flagsp &~ MSG_EOR; 1408 else 1409 flags = 0; 1410 if (flags & MSG_OOB) 1411 return (soreceive_rcvoob(so, uio, flags)); 1412 if (mp != NULL) 1413 *mp = NULL; 1414 if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING) 1415 && uio->uio_resid) 1416 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 1417 1418 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 1419 if (error) 1420 return (error); 1421 1422restart: 1423 SOCKBUF_LOCK(&so->so_rcv); 1424 m = so->so_rcv.sb_mb; 1425 /* 1426 * If we have less data than requested, block awaiting more (subject 1427 * to any timeout) if: 1428 * 1. the current count is less than the low water mark, or 1429 * 2. MSG_WAITALL is set, and it is possible to do the entire 1430 * receive operation at once if we block (resid <= hiwat). 1431 * 3. MSG_DONTWAIT is not set 1432 * If MSG_WAITALL is set but resid is larger than the receive buffer, 1433 * we have to do the receive in sections, and thus risk returning a 1434 * short count if a timeout or signal occurs after we start. 1435 */ 1436 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 1437 so->so_rcv.sb_cc < uio->uio_resid) && 1438 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 1439 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 1440 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 1441 KASSERT(m != NULL || !so->so_rcv.sb_cc, 1442 ("receive: m == %p so->so_rcv.sb_cc == %u", 1443 m, so->so_rcv.sb_cc)); 1444 if (so->so_error) { 1445 if (m != NULL) 1446 goto dontblock; 1447 error = so->so_error; 1448 if ((flags & MSG_PEEK) == 0) 1449 so->so_error = 0; 1450 SOCKBUF_UNLOCK(&so->so_rcv); 1451 goto release; 1452 } 1453 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1454 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { 1455 if (m == NULL) { 1456 SOCKBUF_UNLOCK(&so->so_rcv); 1457 goto release; 1458 } else 1459 goto dontblock; 1460 } 1461 for (; m != NULL; m = m->m_next) 1462 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 1463 m = so->so_rcv.sb_mb; 1464 goto dontblock; 1465 } 1466 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1467 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1468 SOCKBUF_UNLOCK(&so->so_rcv); 1469 error = ENOTCONN; 1470 goto release; 1471 } 1472 if (uio->uio_resid == 0) { 1473 SOCKBUF_UNLOCK(&so->so_rcv); 1474 goto release; 1475 } 1476 if ((so->so_state & SS_NBIO) || 1477 (flags & (MSG_DONTWAIT|MSG_NBIO))) { 1478 SOCKBUF_UNLOCK(&so->so_rcv); 1479 error = EWOULDBLOCK; 1480 goto release; 1481 } 1482 SBLASTRECORDCHK(&so->so_rcv); 1483 SBLASTMBUFCHK(&so->so_rcv); 1484 error = sbwait(&so->so_rcv); 1485 SOCKBUF_UNLOCK(&so->so_rcv); 1486 if (error) 1487 goto release; 1488 goto restart; 1489 } 1490dontblock: 1491 /* 1492 * From this point onward, we maintain 'nextrecord' as a cache of the 1493 * pointer to the next record in the socket buffer. We must keep the 1494 * various socket buffer pointers and local stack versions of the 1495 * pointers in sync, pushing out modifications before dropping the 1496 * socket buffer mutex, and re-reading them when picking it up. 1497 * 1498 * Otherwise, we will race with the network stack appending new data 1499 * or records onto the socket buffer by using inconsistent/stale 1500 * versions of the field, possibly resulting in socket buffer 1501 * corruption. 1502 * 1503 * By holding the high-level sblock(), we prevent simultaneous 1504 * readers from pulling off the front of the socket buffer. 1505 */ 1506 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1507 if (uio->uio_td) 1508 uio->uio_td->td_ru.ru_msgrcv++; 1509 KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb")); 1510 SBLASTRECORDCHK(&so->so_rcv); 1511 SBLASTMBUFCHK(&so->so_rcv); 1512 nextrecord = m->m_nextpkt; 1513 if (pr->pr_flags & PR_ADDR) { 1514 KASSERT(m->m_type == MT_SONAME, 1515 ("m->m_type == %d", m->m_type)); 1516 orig_resid = 0; 1517 if (psa != NULL) 1518 *psa = sodupsockaddr(mtod(m, struct sockaddr *), 1519 M_NOWAIT); 1520 if (flags & MSG_PEEK) { 1521 m = m->m_next; 1522 } else { 1523 sbfree(&so->so_rcv, m); 1524 so->so_rcv.sb_mb = m_free(m); 1525 m = so->so_rcv.sb_mb; 1526 sockbuf_pushsync(&so->so_rcv, nextrecord); 1527 } 1528 } 1529 1530 /* 1531 * Process one or more MT_CONTROL mbufs present before any data mbufs 1532 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we 1533 * just copy the data; if !MSG_PEEK, we call into the protocol to 1534 * perform externalization (or freeing if controlp == NULL). 1535 */ 1536 if (m != NULL && m->m_type == MT_CONTROL) { 1537 struct mbuf *cm = NULL, *cmn; 1538 struct mbuf **cme = &cm; 1539 1540 do { 1541 if (flags & MSG_PEEK) { 1542 if (controlp != NULL) { 1543 *controlp = m_copy(m, 0, m->m_len); 1544 controlp = &(*controlp)->m_next; 1545 } 1546 m = m->m_next; 1547 } else { 1548 sbfree(&so->so_rcv, m); 1549 so->so_rcv.sb_mb = m->m_next; 1550 m->m_next = NULL; 1551 *cme = m; 1552 cme = &(*cme)->m_next; 1553 m = so->so_rcv.sb_mb; 1554 } 1555 } while (m != NULL && m->m_type == MT_CONTROL); 1556 if ((flags & MSG_PEEK) == 0) 1557 sockbuf_pushsync(&so->so_rcv, nextrecord); 1558 while (cm != NULL) { 1559 cmn = cm->m_next; 1560 cm->m_next = NULL; 1561 if (pr->pr_domain->dom_externalize != NULL) { 1562 SOCKBUF_UNLOCK(&so->so_rcv); 1563 error = (*pr->pr_domain->dom_externalize) 1564 (cm, controlp); 1565 SOCKBUF_LOCK(&so->so_rcv); 1566 } else if (controlp != NULL) 1567 *controlp = cm; 1568 else 1569 m_freem(cm); 1570 if (controlp != NULL) { 1571 orig_resid = 0; 1572 while (*controlp != NULL) 1573 controlp = &(*controlp)->m_next; 1574 } 1575 cm = cmn; 1576 } 1577 if (m != NULL) 1578 nextrecord = so->so_rcv.sb_mb->m_nextpkt; 1579 else 1580 nextrecord = so->so_rcv.sb_mb; 1581 orig_resid = 0; 1582 } 1583 if (m != NULL) { 1584 if ((flags & MSG_PEEK) == 0) { 1585 KASSERT(m->m_nextpkt == nextrecord, 1586 ("soreceive: post-control, nextrecord !sync")); 1587 if (nextrecord == NULL) { 1588 KASSERT(so->so_rcv.sb_mb == m, 1589 ("soreceive: post-control, sb_mb!=m")); 1590 KASSERT(so->so_rcv.sb_lastrecord == m, 1591 ("soreceive: post-control, lastrecord!=m")); 1592 } 1593 } 1594 type = m->m_type; 1595 if (type == MT_OOBDATA) 1596 flags |= MSG_OOB; 1597 } else { 1598 if ((flags & MSG_PEEK) == 0) { 1599 KASSERT(so->so_rcv.sb_mb == nextrecord, 1600 ("soreceive: sb_mb != nextrecord")); 1601 if (so->so_rcv.sb_mb == NULL) { 1602 KASSERT(so->so_rcv.sb_lastrecord == NULL, 1603 ("soreceive: sb_lastercord != NULL")); 1604 } 1605 } 1606 } 1607 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1608 SBLASTRECORDCHK(&so->so_rcv); 1609 SBLASTMBUFCHK(&so->so_rcv); 1610 1611 /* 1612 * Now continue to read any data mbufs off of the head of the socket 1613 * buffer until the read request is satisfied. Note that 'type' is 1614 * used to store the type of any mbuf reads that have happened so far 1615 * such that soreceive() can stop reading if the type changes, which 1616 * causes soreceive() to return only one of regular data and inline 1617 * out-of-band data in a single socket receive operation. 1618 */ 1619 moff = 0; 1620 offset = 0; 1621 while (m != NULL && uio->uio_resid > 0 && error == 0) { 1622 /* 1623 * If the type of mbuf has changed since the last mbuf 1624 * examined ('type'), end the receive operation. 1625 */ 1626 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1627 if (m->m_type == MT_OOBDATA) { 1628 if (type != MT_OOBDATA) 1629 break; 1630 } else if (type == MT_OOBDATA) 1631 break; 1632 else 1633 KASSERT(m->m_type == MT_DATA, 1634 ("m->m_type == %d", m->m_type)); 1635 so->so_rcv.sb_state &= ~SBS_RCVATMARK; 1636 len = uio->uio_resid; 1637 if (so->so_oobmark && len > so->so_oobmark - offset) 1638 len = so->so_oobmark - offset; 1639 if (len > m->m_len - moff) 1640 len = m->m_len - moff; 1641 /* 1642 * If mp is set, just pass back the mbufs. Otherwise copy 1643 * them out via the uio, then free. Sockbuf must be 1644 * consistent here (points to current mbuf, it points to next 1645 * record) when we drop priority; we must note any additions 1646 * to the sockbuf when we block interrupts again. 1647 */ 1648 if (mp == NULL) { 1649 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1650 SBLASTRECORDCHK(&so->so_rcv); 1651 SBLASTMBUFCHK(&so->so_rcv); 1652 SOCKBUF_UNLOCK(&so->so_rcv); 1653#ifdef ZERO_COPY_SOCKETS 1654 if (so_zero_copy_receive) { 1655 int disposable; 1656 1657 if ((m->m_flags & M_EXT) 1658 && (m->m_ext.ext_type == EXT_DISPOSABLE)) 1659 disposable = 1; 1660 else 1661 disposable = 0; 1662 1663 error = uiomoveco(mtod(m, char *) + moff, 1664 (int)len, uio, 1665 disposable); 1666 } else 1667#endif /* ZERO_COPY_SOCKETS */ 1668 error = uiomove(mtod(m, char *) + moff, (int)len, uio); 1669 SOCKBUF_LOCK(&so->so_rcv); 1670 if (error) { 1671 /* 1672 * The MT_SONAME mbuf has already been removed 1673 * from the record, so it is necessary to 1674 * remove the data mbufs, if any, to preserve 1675 * the invariant in the case of PR_ADDR that 1676 * requires MT_SONAME mbufs at the head of 1677 * each record. 1678 */ 1679 if (m && pr->pr_flags & PR_ATOMIC && 1680 ((flags & MSG_PEEK) == 0)) 1681 (void)sbdroprecord_locked(&so->so_rcv); 1682 SOCKBUF_UNLOCK(&so->so_rcv); 1683 goto release; 1684 } 1685 } else 1686 uio->uio_resid -= len; 1687 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1688 if (len == m->m_len - moff) { 1689 if (m->m_flags & M_EOR) 1690 flags |= MSG_EOR; 1691 if (flags & MSG_PEEK) { 1692 m = m->m_next; 1693 moff = 0; 1694 } else { 1695 nextrecord = m->m_nextpkt; 1696 sbfree(&so->so_rcv, m); 1697 if (mp != NULL) { 1698 *mp = m; 1699 mp = &m->m_next; 1700 so->so_rcv.sb_mb = m = m->m_next; 1701 *mp = NULL; 1702 } else { 1703 so->so_rcv.sb_mb = m_free(m); 1704 m = so->so_rcv.sb_mb; 1705 } 1706 sockbuf_pushsync(&so->so_rcv, nextrecord); 1707 SBLASTRECORDCHK(&so->so_rcv); 1708 SBLASTMBUFCHK(&so->so_rcv); 1709 } 1710 } else { 1711 if (flags & MSG_PEEK) 1712 moff += len; 1713 else { 1714 if (mp != NULL) { 1715 int copy_flag; 1716 1717 if (flags & MSG_DONTWAIT) 1718 copy_flag = M_DONTWAIT; 1719 else 1720 copy_flag = M_WAIT; 1721 if (copy_flag == M_WAIT) 1722 SOCKBUF_UNLOCK(&so->so_rcv); 1723 *mp = m_copym(m, 0, len, copy_flag); 1724 if (copy_flag == M_WAIT) 1725 SOCKBUF_LOCK(&so->so_rcv); 1726 if (*mp == NULL) { 1727 /* 1728 * m_copym() couldn't 1729 * allocate an mbuf. Adjust 1730 * uio_resid back (it was 1731 * adjusted down by len 1732 * bytes, which we didn't end 1733 * up "copying" over). 1734 */ 1735 uio->uio_resid += len; 1736 break; 1737 } 1738 } 1739 m->m_data += len; 1740 m->m_len -= len; 1741 so->so_rcv.sb_cc -= len; 1742 } 1743 } 1744 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1745 if (so->so_oobmark) { 1746 if ((flags & MSG_PEEK) == 0) { 1747 so->so_oobmark -= len; 1748 if (so->so_oobmark == 0) { 1749 so->so_rcv.sb_state |= SBS_RCVATMARK; 1750 break; 1751 } 1752 } else { 1753 offset += len; 1754 if (offset == so->so_oobmark) 1755 break; 1756 } 1757 } 1758 if (flags & MSG_EOR) 1759 break; 1760 /* 1761 * If the MSG_WAITALL flag is set (for non-atomic socket), we 1762 * must not quit until "uio->uio_resid == 0" or an error 1763 * termination. If a signal/timeout occurs, return with a 1764 * short count but without error. Keep sockbuf locked 1765 * against other readers. 1766 */ 1767 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 1768 !sosendallatonce(so) && nextrecord == NULL) { 1769 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1770 if (so->so_error || so->so_rcv.sb_state & SBS_CANTRCVMORE) 1771 break; 1772 /* 1773 * Notify the protocol that some data has been 1774 * drained before blocking. 1775 */ 1776 if (pr->pr_flags & PR_WANTRCVD) { 1777 SOCKBUF_UNLOCK(&so->so_rcv); 1778 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 1779 SOCKBUF_LOCK(&so->so_rcv); 1780 } 1781 SBLASTRECORDCHK(&so->so_rcv); 1782 SBLASTMBUFCHK(&so->so_rcv); 1783 error = sbwait(&so->so_rcv); 1784 if (error) { 1785 SOCKBUF_UNLOCK(&so->so_rcv); 1786 goto release; 1787 } 1788 m = so->so_rcv.sb_mb; 1789 if (m != NULL) 1790 nextrecord = m->m_nextpkt; 1791 } 1792 } 1793 1794 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1795 if (m != NULL && pr->pr_flags & PR_ATOMIC) { 1796 flags |= MSG_TRUNC; 1797 if ((flags & MSG_PEEK) == 0) 1798 (void) sbdroprecord_locked(&so->so_rcv); 1799 } 1800 if ((flags & MSG_PEEK) == 0) { 1801 if (m == NULL) { 1802 /* 1803 * First part is an inline SB_EMPTY_FIXUP(). Second 1804 * part makes sure sb_lastrecord is up-to-date if 1805 * there is still data in the socket buffer. 1806 */ 1807 so->so_rcv.sb_mb = nextrecord; 1808 if (so->so_rcv.sb_mb == NULL) { 1809 so->so_rcv.sb_mbtail = NULL; 1810 so->so_rcv.sb_lastrecord = NULL; 1811 } else if (nextrecord->m_nextpkt == NULL) 1812 so->so_rcv.sb_lastrecord = nextrecord; 1813 } 1814 SBLASTRECORDCHK(&so->so_rcv); 1815 SBLASTMBUFCHK(&so->so_rcv); 1816 /* 1817 * If soreceive() is being done from the socket callback, 1818 * then don't need to generate ACK to peer to update window, 1819 * since ACK will be generated on return to TCP. 1820 */ 1821 if (!(flags & MSG_SOCALLBCK) && 1822 (pr->pr_flags & PR_WANTRCVD)) { 1823 SOCKBUF_UNLOCK(&so->so_rcv); 1824 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 1825 SOCKBUF_LOCK(&so->so_rcv); 1826 } 1827 } 1828 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1829 if (orig_resid == uio->uio_resid && orig_resid && 1830 (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) { 1831 SOCKBUF_UNLOCK(&so->so_rcv); 1832 goto restart; 1833 } 1834 SOCKBUF_UNLOCK(&so->so_rcv); 1835 1836 if (flagsp != NULL) 1837 *flagsp |= flags; 1838release: 1839 sbunlock(&so->so_rcv); 1840 return (error); 1841} 1842 1843/* 1844 * Optimized version of soreceive() for simple datagram cases from userspace. 1845 * Unlike in the stream case, we're able to drop a datagram if copyout() 1846 * fails, and because we handle datagrams atomically, we don't need to use a 1847 * sleep lock to prevent I/O interlacing. 1848 */ 1849int 1850soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, 1851 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 1852{ 1853 struct mbuf *m, *m2; 1854 int flags, len, error; 1855 struct protosw *pr = so->so_proto; 1856 struct mbuf *nextrecord; 1857 1858 if (psa != NULL) 1859 *psa = NULL; 1860 if (controlp != NULL) 1861 *controlp = NULL; 1862 if (flagsp != NULL) 1863 flags = *flagsp &~ MSG_EOR; 1864 else 1865 flags = 0; 1866 1867 /* 1868 * For any complicated cases, fall back to the full 1869 * soreceive_generic(). 1870 */ 1871 if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB)) 1872 return (soreceive_generic(so, psa, uio, mp0, controlp, 1873 flagsp)); 1874 1875 /* 1876 * Enforce restrictions on use. 1877 */ 1878 KASSERT((pr->pr_flags & PR_WANTRCVD) == 0, 1879 ("soreceive_dgram: wantrcvd")); 1880 KASSERT(pr->pr_flags & PR_ATOMIC, ("soreceive_dgram: !atomic")); 1881 KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0, 1882 ("soreceive_dgram: SBS_RCVATMARK")); 1883 KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0, 1884 ("soreceive_dgram: P_CONNREQUIRED")); 1885 1886 /* 1887 * Loop blocking while waiting for a datagram. 1888 */ 1889 SOCKBUF_LOCK(&so->so_rcv); 1890 while ((m = so->so_rcv.sb_mb) == NULL) { 1891 KASSERT(so->so_rcv.sb_cc == 0, 1892 ("soreceive_dgram: sb_mb NULL but sb_cc %u", 1893 so->so_rcv.sb_cc)); 1894 if (so->so_error) { 1895 error = so->so_error; 1896 so->so_error = 0; 1897 SOCKBUF_UNLOCK(&so->so_rcv); 1898 return (error); 1899 } 1900 if (so->so_rcv.sb_state & SBS_CANTRCVMORE || 1901 uio->uio_resid == 0) { 1902 SOCKBUF_UNLOCK(&so->so_rcv); 1903 return (0); 1904 } 1905 if ((so->so_state & SS_NBIO) || 1906 (flags & (MSG_DONTWAIT|MSG_NBIO))) { 1907 SOCKBUF_UNLOCK(&so->so_rcv); 1908 return (EWOULDBLOCK); 1909 } 1910 SBLASTRECORDCHK(&so->so_rcv); 1911 SBLASTMBUFCHK(&so->so_rcv); 1912 error = sbwait(&so->so_rcv); 1913 if (error) { 1914 SOCKBUF_UNLOCK(&so->so_rcv); 1915 return (error); 1916 } 1917 } 1918 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1919 1920 if (uio->uio_td) 1921 uio->uio_td->td_ru.ru_msgrcv++; 1922 SBLASTRECORDCHK(&so->so_rcv); 1923 SBLASTMBUFCHK(&so->so_rcv); 1924 nextrecord = m->m_nextpkt; 1925 if (nextrecord == NULL) { 1926 KASSERT(so->so_rcv.sb_lastrecord == m, 1927 ("soreceive_dgram: lastrecord != m")); 1928 } 1929 1930 KASSERT(so->so_rcv.sb_mb->m_nextpkt == nextrecord, 1931 ("soreceive_dgram: m_nextpkt != nextrecord")); 1932 1933 /* 1934 * Pull 'm' and its chain off the front of the packet queue. 1935 */ 1936 so->so_rcv.sb_mb = NULL; 1937 sockbuf_pushsync(&so->so_rcv, nextrecord); 1938 1939 /* 1940 * Walk 'm's chain and free that many bytes from the socket buffer. 1941 */ 1942 for (m2 = m; m2 != NULL; m2 = m2->m_next) 1943 sbfree(&so->so_rcv, m2); 1944 1945 /* 1946 * Do a few last checks before we let go of the lock. 1947 */ 1948 SBLASTRECORDCHK(&so->so_rcv); 1949 SBLASTMBUFCHK(&so->so_rcv); 1950 SOCKBUF_UNLOCK(&so->so_rcv); 1951 1952 if (pr->pr_flags & PR_ADDR) { 1953 KASSERT(m->m_type == MT_SONAME, 1954 ("m->m_type == %d", m->m_type)); 1955 if (psa != NULL) 1956 *psa = sodupsockaddr(mtod(m, struct sockaddr *), 1957 M_NOWAIT); 1958 m = m_free(m); 1959 } 1960 if (m == NULL) { 1961 /* XXXRW: Can this happen? */ 1962 return (0); 1963 } 1964 1965 /* 1966 * Packet to copyout() is now in 'm' and it is disconnected from the 1967 * queue. 1968 * 1969 * Process one or more MT_CONTROL mbufs present before any data mbufs 1970 * in the first mbuf chain on the socket buffer. We call into the 1971 * protocol to perform externalization (or freeing if controlp == 1972 * NULL). 1973 */ 1974 if (m->m_type == MT_CONTROL) { 1975 struct mbuf *cm = NULL, *cmn; 1976 struct mbuf **cme = &cm; 1977 1978 do { 1979 m2 = m->m_next; 1980 m->m_next = NULL; 1981 *cme = m; 1982 cme = &(*cme)->m_next; 1983 m = m2; 1984 } while (m != NULL && m->m_type == MT_CONTROL); 1985 while (cm != NULL) { 1986 cmn = cm->m_next; 1987 cm->m_next = NULL; 1988 if (pr->pr_domain->dom_externalize != NULL) { 1989 error = (*pr->pr_domain->dom_externalize) 1990 (cm, controlp); 1991 } else if (controlp != NULL) 1992 *controlp = cm; 1993 else 1994 m_freem(cm); 1995 if (controlp != NULL) { 1996 while (*controlp != NULL) 1997 controlp = &(*controlp)->m_next; 1998 } 1999 cm = cmn; 2000 } 2001 } 2002 KASSERT(m->m_type == MT_DATA, ("soreceive_dgram: !data")); 2003 2004 while (m != NULL && uio->uio_resid > 0) { 2005 len = uio->uio_resid; 2006 if (len > m->m_len) 2007 len = m->m_len; 2008 error = uiomove(mtod(m, char *), (int)len, uio); 2009 if (error) { 2010 m_freem(m); 2011 return (error); 2012 } 2013 m = m_free(m); 2014 } 2015 if (m != NULL) 2016 flags |= MSG_TRUNC; 2017 m_freem(m); 2018 if (flagsp != NULL) 2019 *flagsp |= flags; 2020 return (0); 2021} 2022 2023int 2024soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, 2025 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 2026{ 2027 2028 return (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0, 2029 controlp, flagsp)); 2030} 2031 2032int 2033soshutdown(struct socket *so, int how) 2034{ 2035 struct protosw *pr = so->so_proto; 2036 2037 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 2038 return (EINVAL); 2039 if (pr->pr_usrreqs->pru_flush != NULL) { 2040 (*pr->pr_usrreqs->pru_flush)(so, how); 2041 } 2042 if (how != SHUT_WR) 2043 sorflush(so); 2044 if (how != SHUT_RD) 2045 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 2046 return (0); 2047} 2048 2049void 2050sorflush(struct socket *so) 2051{ 2052 struct sockbuf *sb = &so->so_rcv; 2053 struct protosw *pr = so->so_proto; 2054 struct sockbuf asb; 2055 2056 /* 2057 * In order to avoid calling dom_dispose with the socket buffer mutex 2058 * held, and in order to generally avoid holding the lock for a long 2059 * time, we make a copy of the socket buffer and clear the original 2060 * (except locks, state). The new socket buffer copy won't have 2061 * initialized locks so we can only call routines that won't use or 2062 * assert those locks. 2063 * 2064 * Dislodge threads currently blocked in receive and wait to acquire 2065 * a lock against other simultaneous readers before clearing the 2066 * socket buffer. Don't let our acquire be interrupted by a signal 2067 * despite any existing socket disposition on interruptable waiting. 2068 */ 2069 socantrcvmore(so); 2070 (void) sblock(sb, SBL_WAIT | SBL_NOINTR); 2071 2072 /* 2073 * Invalidate/clear most of the sockbuf structure, but leave selinfo 2074 * and mutex data unchanged. 2075 */ 2076 SOCKBUF_LOCK(sb); 2077 bzero(&asb, offsetof(struct sockbuf, sb_startzero)); 2078 bcopy(&sb->sb_startzero, &asb.sb_startzero, 2079 sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); 2080 bzero(&sb->sb_startzero, 2081 sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); 2082 SOCKBUF_UNLOCK(sb); 2083 sbunlock(sb); 2084 2085 /* 2086 * Dispose of special rights and flush the socket buffer. Don't call 2087 * any unsafe routines (that rely on locks being initialized) on asb. 2088 */ 2089 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) 2090 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 2091 sbrelease_internal(&asb, so); 2092} 2093 2094/* 2095 * Perhaps this routine, and sooptcopyout(), below, ought to come in an 2096 * additional variant to handle the case where the option value needs to be 2097 * some kind of integer, but not a specific size. In addition to their use 2098 * here, these functions are also called by the protocol-level pr_ctloutput() 2099 * routines. 2100 */ 2101int 2102sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) 2103{ 2104 size_t valsize; 2105 2106 /* 2107 * If the user gives us more than we wanted, we ignore it, but if we 2108 * don't get the minimum length the caller wants, we return EINVAL. 2109 * On success, sopt->sopt_valsize is set to however much we actually 2110 * retrieved. 2111 */ 2112 if ((valsize = sopt->sopt_valsize) < minlen) 2113 return EINVAL; 2114 if (valsize > len) 2115 sopt->sopt_valsize = valsize = len; 2116 2117 if (sopt->sopt_td != NULL) 2118 return (copyin(sopt->sopt_val, buf, valsize)); 2119 2120 bcopy(sopt->sopt_val, buf, valsize); 2121 return (0); 2122} 2123 2124/* 2125 * Kernel version of setsockopt(2). 2126 * 2127 * XXX: optlen is size_t, not socklen_t 2128 */ 2129int 2130so_setsockopt(struct socket *so, int level, int optname, void *optval, 2131 size_t optlen) 2132{ 2133 struct sockopt sopt; 2134 2135 sopt.sopt_level = level; 2136 sopt.sopt_name = optname; 2137 sopt.sopt_dir = SOPT_SET; 2138 sopt.sopt_val = optval; 2139 sopt.sopt_valsize = optlen; 2140 sopt.sopt_td = NULL; 2141 return (sosetopt(so, &sopt)); 2142} 2143 2144int 2145sosetopt(struct socket *so, struct sockopt *sopt) 2146{ 2147 int error, optval; 2148 struct linger l; 2149 struct timeval tv; 2150 u_long val; 2151#ifdef MAC 2152 struct mac extmac; 2153#endif 2154 2155 error = 0; 2156 if (sopt->sopt_level != SOL_SOCKET) { 2157 if (so->so_proto && so->so_proto->pr_ctloutput) 2158 return ((*so->so_proto->pr_ctloutput) 2159 (so, sopt)); 2160 error = ENOPROTOOPT; 2161 } else { 2162 switch (sopt->sopt_name) { 2163#ifdef INET 2164 case SO_ACCEPTFILTER: 2165 error = do_setopt_accept_filter(so, sopt); 2166 if (error) 2167 goto bad; 2168 break; 2169#endif 2170 case SO_LINGER: 2171 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 2172 if (error) 2173 goto bad; 2174 2175 SOCK_LOCK(so); 2176 so->so_linger = l.l_linger; 2177 if (l.l_onoff) 2178 so->so_options |= SO_LINGER; 2179 else 2180 so->so_options &= ~SO_LINGER; 2181 SOCK_UNLOCK(so); 2182 break; 2183 2184 case SO_DEBUG: 2185 case SO_KEEPALIVE: 2186 case SO_DONTROUTE: 2187 case SO_USELOOPBACK: 2188 case SO_BROADCAST: 2189 case SO_REUSEADDR: 2190 case SO_REUSEPORT: 2191 case SO_OOBINLINE: 2192 case SO_TIMESTAMP: 2193 case SO_BINTIME: 2194 case SO_NOSIGPIPE: 2195 case SO_NO_DDP: 2196 case SO_NO_OFFLOAD: 2197 error = sooptcopyin(sopt, &optval, sizeof optval, 2198 sizeof optval); 2199 if (error) 2200 goto bad; 2201 SOCK_LOCK(so); 2202 if (optval) 2203 so->so_options |= sopt->sopt_name; 2204 else 2205 so->so_options &= ~sopt->sopt_name; 2206 SOCK_UNLOCK(so); 2207 break; 2208 2209 case SO_SETFIB: 2210 error = sooptcopyin(sopt, &optval, sizeof optval, 2211 sizeof optval); 2212 if (optval < 1 || optval > rt_numfibs) { 2213 error = EINVAL; 2214 goto bad; 2215 } 2216 if ((so->so_proto->pr_domain->dom_family == PF_INET) || 2217 (so->so_proto->pr_domain->dom_family == PF_ROUTE)) { 2218 so->so_fibnum = optval; 2219 /* Note: ignore error */ 2220 if (so->so_proto && so->so_proto->pr_ctloutput) 2221 (*so->so_proto->pr_ctloutput)(so, sopt); 2222 } else { 2223 so->so_fibnum = 0; 2224 } 2225 break; 2226 case SO_SNDBUF: 2227 case SO_RCVBUF: 2228 case SO_SNDLOWAT: 2229 case SO_RCVLOWAT: 2230 error = sooptcopyin(sopt, &optval, sizeof optval, 2231 sizeof optval); 2232 if (error) 2233 goto bad; 2234 2235 /* 2236 * Values < 1 make no sense for any of these options, 2237 * so disallow them. 2238 */ 2239 if (optval < 1) { 2240 error = EINVAL; 2241 goto bad; 2242 } 2243 2244 switch (sopt->sopt_name) { 2245 case SO_SNDBUF: 2246 case SO_RCVBUF: 2247 if (sbreserve(sopt->sopt_name == SO_SNDBUF ? 2248 &so->so_snd : &so->so_rcv, (u_long)optval, 2249 so, curthread) == 0) { 2250 error = ENOBUFS; 2251 goto bad; 2252 } 2253 (sopt->sopt_name == SO_SNDBUF ? &so->so_snd : 2254 &so->so_rcv)->sb_flags &= ~SB_AUTOSIZE; 2255 break; 2256 2257 /* 2258 * Make sure the low-water is never greater than the 2259 * high-water. 2260 */ 2261 case SO_SNDLOWAT: 2262 SOCKBUF_LOCK(&so->so_snd); 2263 so->so_snd.sb_lowat = 2264 (optval > so->so_snd.sb_hiwat) ? 2265 so->so_snd.sb_hiwat : optval; 2266 SOCKBUF_UNLOCK(&so->so_snd); 2267 break; 2268 case SO_RCVLOWAT: 2269 SOCKBUF_LOCK(&so->so_rcv); 2270 so->so_rcv.sb_lowat = 2271 (optval > so->so_rcv.sb_hiwat) ? 2272 so->so_rcv.sb_hiwat : optval; 2273 SOCKBUF_UNLOCK(&so->so_rcv); 2274 break; 2275 } 2276 break; 2277 2278 case SO_SNDTIMEO: 2279 case SO_RCVTIMEO: 2280#ifdef COMPAT_IA32 2281 if (SV_CURPROC_FLAG(SV_ILP32)) { 2282 struct timeval32 tv32; 2283 2284 error = sooptcopyin(sopt, &tv32, sizeof tv32, 2285 sizeof tv32); 2286 CP(tv32, tv, tv_sec); 2287 CP(tv32, tv, tv_usec); 2288 } else 2289#endif 2290 error = sooptcopyin(sopt, &tv, sizeof tv, 2291 sizeof tv); 2292 if (error) 2293 goto bad; 2294 2295 /* assert(hz > 0); */ 2296 if (tv.tv_sec < 0 || tv.tv_sec > INT_MAX / hz || 2297 tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 2298 error = EDOM; 2299 goto bad; 2300 } 2301 /* assert(tick > 0); */ 2302 /* assert(ULONG_MAX - INT_MAX >= 1000000); */ 2303 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick; 2304 if (val > INT_MAX) { 2305 error = EDOM; 2306 goto bad; 2307 } 2308 if (val == 0 && tv.tv_usec != 0) 2309 val = 1; 2310 2311 switch (sopt->sopt_name) { 2312 case SO_SNDTIMEO: 2313 so->so_snd.sb_timeo = val; 2314 break; 2315 case SO_RCVTIMEO: 2316 so->so_rcv.sb_timeo = val; 2317 break; 2318 } 2319 break; 2320 2321 case SO_LABEL: 2322#ifdef MAC 2323 error = sooptcopyin(sopt, &extmac, sizeof extmac, 2324 sizeof extmac); 2325 if (error) 2326 goto bad; 2327 error = mac_setsockopt_label(sopt->sopt_td->td_ucred, 2328 so, &extmac); 2329#else 2330 error = EOPNOTSUPP; 2331#endif 2332 break; 2333 2334 default: 2335 error = ENOPROTOOPT; 2336 break; 2337 } 2338 if (error == 0 && so->so_proto != NULL && 2339 so->so_proto->pr_ctloutput != NULL) { 2340 (void) ((*so->so_proto->pr_ctloutput) 2341 (so, sopt)); 2342 } 2343 } 2344bad: 2345 return (error); 2346} 2347 2348/* 2349 * Helper routine for getsockopt. 2350 */ 2351int 2352sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) 2353{ 2354 int error; 2355 size_t valsize; 2356 2357 error = 0; 2358 2359 /* 2360 * Documented get behavior is that we always return a value, possibly 2361 * truncated to fit in the user's buffer. Traditional behavior is 2362 * that we always tell the user precisely how much we copied, rather 2363 * than something useful like the total amount we had available for 2364 * her. Note that this interface is not idempotent; the entire 2365 * answer must generated ahead of time. 2366 */ 2367 valsize = min(len, sopt->sopt_valsize); 2368 sopt->sopt_valsize = valsize; 2369 if (sopt->sopt_val != NULL) { 2370 if (sopt->sopt_td != NULL) 2371 error = copyout(buf, sopt->sopt_val, valsize); 2372 else 2373 bcopy(buf, sopt->sopt_val, valsize); 2374 } 2375 return (error); 2376} 2377 2378int 2379sogetopt(struct socket *so, struct sockopt *sopt) 2380{ 2381 int error, optval; 2382 struct linger l; 2383 struct timeval tv; 2384#ifdef MAC 2385 struct mac extmac; 2386#endif 2387 2388 error = 0; 2389 if (sopt->sopt_level != SOL_SOCKET) { 2390 if (so->so_proto && so->so_proto->pr_ctloutput) { 2391 return ((*so->so_proto->pr_ctloutput) 2392 (so, sopt)); 2393 } else 2394 return (ENOPROTOOPT); 2395 } else { 2396 switch (sopt->sopt_name) { 2397#ifdef INET 2398 case SO_ACCEPTFILTER: 2399 error = do_getopt_accept_filter(so, sopt); 2400 break; 2401#endif 2402 case SO_LINGER: 2403 SOCK_LOCK(so); 2404 l.l_onoff = so->so_options & SO_LINGER; 2405 l.l_linger = so->so_linger; 2406 SOCK_UNLOCK(so); 2407 error = sooptcopyout(sopt, &l, sizeof l); 2408 break; 2409 2410 case SO_USELOOPBACK: 2411 case SO_DONTROUTE: 2412 case SO_DEBUG: 2413 case SO_KEEPALIVE: 2414 case SO_REUSEADDR: 2415 case SO_REUSEPORT: 2416 case SO_BROADCAST: 2417 case SO_OOBINLINE: 2418 case SO_ACCEPTCONN: 2419 case SO_TIMESTAMP: 2420 case SO_BINTIME: 2421 case SO_NOSIGPIPE: 2422 optval = so->so_options & sopt->sopt_name; 2423integer: 2424 error = sooptcopyout(sopt, &optval, sizeof optval); 2425 break; 2426 2427 case SO_TYPE: 2428 optval = so->so_type; 2429 goto integer; 2430 2431 case SO_ERROR: 2432 SOCK_LOCK(so); 2433 optval = so->so_error; 2434 so->so_error = 0; 2435 SOCK_UNLOCK(so); 2436 goto integer; 2437 2438 case SO_SNDBUF: 2439 optval = so->so_snd.sb_hiwat; 2440 goto integer; 2441 2442 case SO_RCVBUF: 2443 optval = so->so_rcv.sb_hiwat; 2444 goto integer; 2445 2446 case SO_SNDLOWAT: 2447 optval = so->so_snd.sb_lowat; 2448 goto integer; 2449 2450 case SO_RCVLOWAT: 2451 optval = so->so_rcv.sb_lowat; 2452 goto integer; 2453 2454 case SO_SNDTIMEO: 2455 case SO_RCVTIMEO: 2456 optval = (sopt->sopt_name == SO_SNDTIMEO ? 2457 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 2458 2459 tv.tv_sec = optval / hz; 2460 tv.tv_usec = (optval % hz) * tick; 2461#ifdef COMPAT_IA32 2462 if (SV_CURPROC_FLAG(SV_ILP32)) { 2463 struct timeval32 tv32; 2464 2465 CP(tv, tv32, tv_sec); 2466 CP(tv, tv32, tv_usec); 2467 error = sooptcopyout(sopt, &tv32, sizeof tv32); 2468 } else 2469#endif 2470 error = sooptcopyout(sopt, &tv, sizeof tv); 2471 break; 2472 2473 case SO_LABEL: 2474#ifdef MAC 2475 error = sooptcopyin(sopt, &extmac, sizeof(extmac), 2476 sizeof(extmac)); 2477 if (error) 2478 return (error); 2479 error = mac_getsockopt_label(sopt->sopt_td->td_ucred, 2480 so, &extmac); 2481 if (error) 2482 return (error); 2483 error = sooptcopyout(sopt, &extmac, sizeof extmac); 2484#else 2485 error = EOPNOTSUPP; 2486#endif 2487 break; 2488 2489 case SO_PEERLABEL: 2490#ifdef MAC 2491 error = sooptcopyin(sopt, &extmac, sizeof(extmac), 2492 sizeof(extmac)); 2493 if (error) 2494 return (error); 2495 error = mac_getsockopt_peerlabel( 2496 sopt->sopt_td->td_ucred, so, &extmac); 2497 if (error) 2498 return (error); 2499 error = sooptcopyout(sopt, &extmac, sizeof extmac); 2500#else 2501 error = EOPNOTSUPP; 2502#endif 2503 break; 2504 2505 case SO_LISTENQLIMIT: 2506 optval = so->so_qlimit; 2507 goto integer; 2508 2509 case SO_LISTENQLEN: 2510 optval = so->so_qlen; 2511 goto integer; 2512 2513 case SO_LISTENINCQLEN: 2514 optval = so->so_incqlen; 2515 goto integer; 2516 2517 default: 2518 error = ENOPROTOOPT; 2519 break; 2520 } 2521 return (error); 2522 } 2523} 2524 2525/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 2526int 2527soopt_getm(struct sockopt *sopt, struct mbuf **mp) 2528{ 2529 struct mbuf *m, *m_prev; 2530 int sopt_size = sopt->sopt_valsize; 2531 2532 MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA); 2533 if (m == NULL) 2534 return ENOBUFS; 2535 if (sopt_size > MLEN) { 2536 MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT); 2537 if ((m->m_flags & M_EXT) == 0) { 2538 m_free(m); 2539 return ENOBUFS; 2540 } 2541 m->m_len = min(MCLBYTES, sopt_size); 2542 } else { 2543 m->m_len = min(MLEN, sopt_size); 2544 } 2545 sopt_size -= m->m_len; 2546 *mp = m; 2547 m_prev = m; 2548 2549 while (sopt_size) { 2550 MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA); 2551 if (m == NULL) { 2552 m_freem(*mp); 2553 return ENOBUFS; 2554 } 2555 if (sopt_size > MLEN) { 2556 MCLGET(m, sopt->sopt_td != NULL ? M_WAIT : 2557 M_DONTWAIT); 2558 if ((m->m_flags & M_EXT) == 0) { 2559 m_freem(m); 2560 m_freem(*mp); 2561 return ENOBUFS; 2562 } 2563 m->m_len = min(MCLBYTES, sopt_size); 2564 } else { 2565 m->m_len = min(MLEN, sopt_size); 2566 } 2567 sopt_size -= m->m_len; 2568 m_prev->m_next = m; 2569 m_prev = m; 2570 } 2571 return (0); 2572} 2573 2574/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 2575int 2576soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) 2577{ 2578 struct mbuf *m0 = m; 2579 2580 if (sopt->sopt_val == NULL) 2581 return (0); 2582 while (m != NULL && sopt->sopt_valsize >= m->m_len) { 2583 if (sopt->sopt_td != NULL) { 2584 int error; 2585 2586 error = copyin(sopt->sopt_val, mtod(m, char *), 2587 m->m_len); 2588 if (error != 0) { 2589 m_freem(m0); 2590 return(error); 2591 } 2592 } else 2593 bcopy(sopt->sopt_val, mtod(m, char *), m->m_len); 2594 sopt->sopt_valsize -= m->m_len; 2595 sopt->sopt_val = (char *)sopt->sopt_val + m->m_len; 2596 m = m->m_next; 2597 } 2598 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ 2599 panic("ip6_sooptmcopyin"); 2600 return (0); 2601} 2602 2603/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 2604int 2605soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) 2606{ 2607 struct mbuf *m0 = m; 2608 size_t valsize = 0; 2609 2610 if (sopt->sopt_val == NULL) 2611 return (0); 2612 while (m != NULL && sopt->sopt_valsize >= m->m_len) { 2613 if (sopt->sopt_td != NULL) { 2614 int error; 2615 2616 error = copyout(mtod(m, char *), sopt->sopt_val, 2617 m->m_len); 2618 if (error != 0) { 2619 m_freem(m0); 2620 return(error); 2621 } 2622 } else 2623 bcopy(mtod(m, char *), sopt->sopt_val, m->m_len); 2624 sopt->sopt_valsize -= m->m_len; 2625 sopt->sopt_val = (char *)sopt->sopt_val + m->m_len; 2626 valsize += m->m_len; 2627 m = m->m_next; 2628 } 2629 if (m != NULL) { 2630 /* enough soopt buffer should be given from user-land */ 2631 m_freem(m0); 2632 return(EINVAL); 2633 } 2634 sopt->sopt_valsize = valsize; 2635 return (0); 2636} 2637 2638/* 2639 * sohasoutofband(): protocol notifies socket layer of the arrival of new 2640 * out-of-band data, which will then notify socket consumers. 2641 */ 2642void 2643sohasoutofband(struct socket *so) 2644{ 2645 2646 if (so->so_sigio != NULL) 2647 pgsigio(&so->so_sigio, SIGURG, 0); 2648 selwakeuppri(&so->so_rcv.sb_sel, PSOCK); 2649} 2650 2651int 2652sopoll(struct socket *so, int events, struct ucred *active_cred, 2653 struct thread *td) 2654{ 2655 2656 return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred, 2657 td)); 2658} 2659 2660int 2661sopoll_generic(struct socket *so, int events, struct ucred *active_cred, 2662 struct thread *td) 2663{ 2664 int revents = 0; 2665 2666 SOCKBUF_LOCK(&so->so_snd); 2667 SOCKBUF_LOCK(&so->so_rcv); 2668 if (events & (POLLIN | POLLRDNORM)) 2669 if (soreadable(so)) 2670 revents |= events & (POLLIN | POLLRDNORM); 2671 2672 if (events & POLLINIGNEOF) 2673 if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat || 2674 !TAILQ_EMPTY(&so->so_comp) || so->so_error) 2675 revents |= POLLINIGNEOF; 2676 2677 if (events & (POLLOUT | POLLWRNORM)) 2678 if (sowriteable(so)) 2679 revents |= events & (POLLOUT | POLLWRNORM); 2680 2681 if (events & (POLLPRI | POLLRDBAND)) 2682 if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK)) 2683 revents |= events & (POLLPRI | POLLRDBAND); 2684 2685 if (revents == 0) { 2686 if (events & 2687 (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | 2688 POLLRDBAND)) { 2689 selrecord(td, &so->so_rcv.sb_sel); 2690 so->so_rcv.sb_flags |= SB_SEL; 2691 } 2692 2693 if (events & (POLLOUT | POLLWRNORM)) { 2694 selrecord(td, &so->so_snd.sb_sel); 2695 so->so_snd.sb_flags |= SB_SEL; 2696 } 2697 } 2698 2699 SOCKBUF_UNLOCK(&so->so_rcv); 2700 SOCKBUF_UNLOCK(&so->so_snd); 2701 return (revents); 2702} 2703 2704int 2705soo_kqfilter(struct file *fp, struct knote *kn) 2706{ 2707 struct socket *so = kn->kn_fp->f_data; 2708 struct sockbuf *sb; 2709 2710 switch (kn->kn_filter) { 2711 case EVFILT_READ: 2712 if (so->so_options & SO_ACCEPTCONN) 2713 kn->kn_fop = &solisten_filtops; 2714 else 2715 kn->kn_fop = &soread_filtops; 2716 sb = &so->so_rcv; 2717 break; 2718 case EVFILT_WRITE: 2719 kn->kn_fop = &sowrite_filtops; 2720 sb = &so->so_snd; 2721 break; 2722 default: 2723 return (EINVAL); 2724 } 2725 2726 SOCKBUF_LOCK(sb); 2727 knlist_add(&sb->sb_sel.si_note, kn, 1); 2728 sb->sb_flags |= SB_KNOTE; 2729 SOCKBUF_UNLOCK(sb); 2730 return (0); 2731} 2732 2733/* 2734 * Some routines that return EOPNOTSUPP for entry points that are not 2735 * supported by a protocol. Fill in as needed. 2736 */ 2737int 2738pru_accept_notsupp(struct socket *so, struct sockaddr **nam) 2739{ 2740 2741 return EOPNOTSUPP; 2742} 2743 2744int 2745pru_attach_notsupp(struct socket *so, int proto, struct thread *td) 2746{ 2747 2748 return EOPNOTSUPP; 2749} 2750 2751int 2752pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) 2753{ 2754 2755 return EOPNOTSUPP; 2756} 2757 2758int 2759pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) 2760{ 2761 2762 return EOPNOTSUPP; 2763} 2764 2765int 2766pru_connect2_notsupp(struct socket *so1, struct socket *so2) 2767{ 2768 2769 return EOPNOTSUPP; 2770} 2771 2772int 2773pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, 2774 struct ifnet *ifp, struct thread *td) 2775{ 2776 2777 return EOPNOTSUPP; 2778} 2779 2780int 2781pru_disconnect_notsupp(struct socket *so) 2782{ 2783 2784 return EOPNOTSUPP; 2785} 2786 2787int 2788pru_listen_notsupp(struct socket *so, int backlog, struct thread *td) 2789{ 2790 2791 return EOPNOTSUPP; 2792} 2793 2794int 2795pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam) 2796{ 2797 2798 return EOPNOTSUPP; 2799} 2800 2801int 2802pru_rcvd_notsupp(struct socket *so, int flags) 2803{ 2804 2805 return EOPNOTSUPP; 2806} 2807 2808int 2809pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags) 2810{ 2811 2812 return EOPNOTSUPP; 2813} 2814 2815int 2816pru_send_notsupp(struct socket *so, int flags, struct mbuf *m, 2817 struct sockaddr *addr, struct mbuf *control, struct thread *td) 2818{ 2819 2820 return EOPNOTSUPP; 2821} 2822 2823/* 2824 * This isn't really a ``null'' operation, but it's the default one and 2825 * doesn't do anything destructive. 2826 */ 2827int 2828pru_sense_null(struct socket *so, struct stat *sb) 2829{ 2830 2831 sb->st_blksize = so->so_snd.sb_hiwat; 2832 return 0; 2833} 2834 2835int 2836pru_shutdown_notsupp(struct socket *so) 2837{ 2838 2839 return EOPNOTSUPP; 2840} 2841 2842int 2843pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam) 2844{ 2845 2846 return EOPNOTSUPP; 2847} 2848 2849int 2850pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio, 2851 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 2852{ 2853 2854 return EOPNOTSUPP; 2855} 2856 2857int 2858pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, 2859 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 2860{ 2861 2862 return EOPNOTSUPP; 2863} 2864 2865int 2866pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred, 2867 struct thread *td) 2868{ 2869 2870 return EOPNOTSUPP; 2871} 2872 2873static void 2874filt_sordetach(struct knote *kn) 2875{ 2876 struct socket *so = kn->kn_fp->f_data; 2877 2878 SOCKBUF_LOCK(&so->so_rcv); 2879 knlist_remove(&so->so_rcv.sb_sel.si_note, kn, 1); 2880 if (knlist_empty(&so->so_rcv.sb_sel.si_note)) 2881 so->so_rcv.sb_flags &= ~SB_KNOTE; 2882 SOCKBUF_UNLOCK(&so->so_rcv); 2883} 2884 2885/*ARGSUSED*/ 2886static int 2887filt_soread(struct knote *kn, long hint) 2888{ 2889 struct socket *so; 2890 2891 so = kn->kn_fp->f_data; 2892 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 2893 2894 kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl; 2895 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { 2896 kn->kn_flags |= EV_EOF; 2897 kn->kn_fflags = so->so_error; 2898 return (1); 2899 } else if (so->so_error) /* temporary udp error */ 2900 return (1); 2901 else if (kn->kn_sfflags & NOTE_LOWAT) 2902 return (kn->kn_data >= kn->kn_sdata); 2903 else 2904 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat); 2905} 2906 2907static void 2908filt_sowdetach(struct knote *kn) 2909{ 2910 struct socket *so = kn->kn_fp->f_data; 2911 2912 SOCKBUF_LOCK(&so->so_snd); 2913 knlist_remove(&so->so_snd.sb_sel.si_note, kn, 1); 2914 if (knlist_empty(&so->so_snd.sb_sel.si_note)) 2915 so->so_snd.sb_flags &= ~SB_KNOTE; 2916 SOCKBUF_UNLOCK(&so->so_snd); 2917} 2918 2919/*ARGSUSED*/ 2920static int 2921filt_sowrite(struct knote *kn, long hint) 2922{ 2923 struct socket *so; 2924 2925 so = kn->kn_fp->f_data; 2926 SOCKBUF_LOCK_ASSERT(&so->so_snd); 2927 kn->kn_data = sbspace(&so->so_snd); 2928 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2929 kn->kn_flags |= EV_EOF; 2930 kn->kn_fflags = so->so_error; 2931 return (1); 2932 } else if (so->so_error) /* temporary udp error */ 2933 return (1); 2934 else if (((so->so_state & SS_ISCONNECTED) == 0) && 2935 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 2936 return (0); 2937 else if (kn->kn_sfflags & NOTE_LOWAT) 2938 return (kn->kn_data >= kn->kn_sdata); 2939 else 2940 return (kn->kn_data >= so->so_snd.sb_lowat); 2941} 2942 2943/*ARGSUSED*/ 2944static int 2945filt_solisten(struct knote *kn, long hint) 2946{ 2947 struct socket *so = kn->kn_fp->f_data; 2948 2949 kn->kn_data = so->so_qlen; 2950 return (! TAILQ_EMPTY(&so->so_comp)); 2951} 2952 2953int 2954socheckuid(struct socket *so, uid_t uid) 2955{ 2956 2957 if (so == NULL) 2958 return (EPERM); 2959 if (so->so_cred->cr_uid != uid) 2960 return (EPERM); 2961 return (0); 2962} 2963 2964static int 2965sysctl_somaxconn(SYSCTL_HANDLER_ARGS) 2966{ 2967 int error; 2968 int val; 2969 2970 val = somaxconn; 2971 error = sysctl_handle_int(oidp, &val, 0, req); 2972 if (error || !req->newptr ) 2973 return (error); 2974 2975 if (val < 1 || val > USHRT_MAX) 2976 return (EINVAL); 2977 2978 somaxconn = val; 2979 return (0); 2980} 2981 2982/* 2983 * These functions are used by protocols to notify the socket layer (and its 2984 * consumers) of state changes in the sockets driven by protocol-side events. 2985 */ 2986 2987/* 2988 * Procedures to manipulate state flags of socket and do appropriate wakeups. 2989 * 2990 * Normal sequence from the active (originating) side is that 2991 * soisconnecting() is called during processing of connect() call, resulting 2992 * in an eventual call to soisconnected() if/when the connection is 2993 * established. When the connection is torn down soisdisconnecting() is 2994 * called during processing of disconnect() call, and soisdisconnected() is 2995 * called when the connection to the peer is totally severed. The semantics 2996 * of these routines are such that connectionless protocols can call 2997 * soisconnected() and soisdisconnected() only, bypassing the in-progress 2998 * calls when setting up a ``connection'' takes no time. 2999 * 3000 * From the passive side, a socket is created with two queues of sockets: 3001 * so_incomp for connections in progress and so_comp for connections already 3002 * made and awaiting user acceptance. As a protocol is preparing incoming 3003 * connections, it creates a socket structure queued on so_incomp by calling 3004 * sonewconn(). When the connection is established, soisconnected() is 3005 * called, and transfers the socket structure to so_comp, making it available 3006 * to accept(). 3007 * 3008 * If a socket is closed with sockets on either so_incomp or so_comp, these 3009 * sockets are dropped. 3010 * 3011 * If higher-level protocols are implemented in the kernel, the wakeups done 3012 * here will sometimes cause software-interrupt process scheduling. 3013 */ 3014void 3015soisconnecting(struct socket *so) 3016{ 3017 3018 SOCK_LOCK(so); 3019 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 3020 so->so_state |= SS_ISCONNECTING; 3021 SOCK_UNLOCK(so); 3022} 3023 3024void 3025soisconnected(struct socket *so) 3026{ 3027 struct socket *head; 3028 3029 ACCEPT_LOCK(); 3030 SOCK_LOCK(so); 3031 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 3032 so->so_state |= SS_ISCONNECTED; 3033 head = so->so_head; 3034 if (head != NULL && (so->so_qstate & SQ_INCOMP)) { 3035 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 3036 SOCK_UNLOCK(so); 3037 TAILQ_REMOVE(&head->so_incomp, so, so_list); 3038 head->so_incqlen--; 3039 so->so_qstate &= ~SQ_INCOMP; 3040 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); 3041 head->so_qlen++; 3042 so->so_qstate |= SQ_COMP; 3043 ACCEPT_UNLOCK(); 3044 sorwakeup(head); 3045 wakeup_one(&head->so_timeo); 3046 } else { 3047 ACCEPT_UNLOCK(); 3048 so->so_upcall = 3049 head->so_accf->so_accept_filter->accf_callback; 3050 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 3051 so->so_rcv.sb_flags |= SB_UPCALL; 3052 so->so_options &= ~SO_ACCEPTFILTER; 3053 SOCK_UNLOCK(so); 3054 so->so_upcall(so, so->so_upcallarg, M_DONTWAIT); 3055 } 3056 return; 3057 } 3058 SOCK_UNLOCK(so); 3059 ACCEPT_UNLOCK(); 3060 wakeup(&so->so_timeo); 3061 sorwakeup(so); 3062 sowwakeup(so); 3063} 3064 3065void 3066soisdisconnecting(struct socket *so) 3067{ 3068 3069 /* 3070 * Note: This code assumes that SOCK_LOCK(so) and 3071 * SOCKBUF_LOCK(&so->so_rcv) are the same. 3072 */ 3073 SOCKBUF_LOCK(&so->so_rcv); 3074 so->so_state &= ~SS_ISCONNECTING; 3075 so->so_state |= SS_ISDISCONNECTING; 3076 so->so_rcv.sb_state |= SBS_CANTRCVMORE; 3077 sorwakeup_locked(so); 3078 SOCKBUF_LOCK(&so->so_snd); 3079 so->so_snd.sb_state |= SBS_CANTSENDMORE; 3080 sowwakeup_locked(so); 3081 wakeup(&so->so_timeo); 3082} 3083 3084void 3085soisdisconnected(struct socket *so) 3086{ 3087 3088 /* 3089 * Note: This code assumes that SOCK_LOCK(so) and 3090 * SOCKBUF_LOCK(&so->so_rcv) are the same. 3091 */ 3092 SOCKBUF_LOCK(&so->so_rcv); 3093 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 3094 so->so_state |= SS_ISDISCONNECTED; 3095 so->so_rcv.sb_state |= SBS_CANTRCVMORE; 3096 sorwakeup_locked(so); 3097 SOCKBUF_LOCK(&so->so_snd); 3098 so->so_snd.sb_state |= SBS_CANTSENDMORE; 3099 sbdrop_locked(&so->so_snd, so->so_snd.sb_cc); 3100 sowwakeup_locked(so); 3101 wakeup(&so->so_timeo); 3102} 3103 3104/* 3105 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME. 3106 */ 3107struct sockaddr * 3108sodupsockaddr(const struct sockaddr *sa, int mflags) 3109{ 3110 struct sockaddr *sa2; 3111 3112 sa2 = malloc(sa->sa_len, M_SONAME, mflags); 3113 if (sa2) 3114 bcopy(sa, sa2, sa->sa_len); 3115 return sa2; 3116} 3117 3118/* 3119 * Create an external-format (``xsocket'') structure using the information in 3120 * the kernel-format socket structure pointed to by so. This is done to 3121 * reduce the spew of irrelevant information over this interface, to isolate 3122 * user code from changes in the kernel structure, and potentially to provide 3123 * information-hiding if we decide that some of this information should be 3124 * hidden from users. 3125 */ 3126void 3127sotoxsocket(struct socket *so, struct xsocket *xso) 3128{ 3129 3130 xso->xso_len = sizeof *xso; 3131 xso->xso_so = so; 3132 xso->so_type = so->so_type; 3133 xso->so_options = so->so_options; 3134 xso->so_linger = so->so_linger; 3135 xso->so_state = so->so_state; 3136 xso->so_pcb = so->so_pcb; 3137 xso->xso_protocol = so->so_proto->pr_protocol; 3138 xso->xso_family = so->so_proto->pr_domain->dom_family; 3139 xso->so_qlen = so->so_qlen; 3140 xso->so_incqlen = so->so_incqlen; 3141 xso->so_qlimit = so->so_qlimit; 3142 xso->so_timeo = so->so_timeo; 3143 xso->so_error = so->so_error; 3144 xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0; 3145 xso->so_oobmark = so->so_oobmark; 3146 sbtoxsockbuf(&so->so_snd, &xso->so_snd); 3147 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); 3148 xso->so_uid = so->so_cred->cr_uid; 3149} 3150 3151 3152/* 3153 * Socket accessor functions to provide external consumers with 3154 * a safe interface to socket state 3155 * 3156 */ 3157 3158void 3159so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg) 3160{ 3161 3162 TAILQ_FOREACH(so, &so->so_comp, so_list) 3163 func(so, arg); 3164} 3165 3166struct sockbuf * 3167so_sockbuf_rcv(struct socket *so) 3168{ 3169 3170 return (&so->so_rcv); 3171} 3172 3173struct sockbuf * 3174so_sockbuf_snd(struct socket *so) 3175{ 3176 3177 return (&so->so_snd); 3178} 3179 3180int 3181so_state_get(const struct socket *so) 3182{ 3183 3184 return (so->so_state); 3185} 3186 3187void 3188so_state_set(struct socket *so, int val) 3189{ 3190 3191 so->so_state = val; 3192} 3193 3194int 3195so_options_get(const struct socket *so) 3196{ 3197 3198 return (so->so_options); 3199} 3200 3201void 3202so_options_set(struct socket *so, int val) 3203{ 3204 3205 so->so_options = val; 3206} 3207 3208int 3209so_error_get(const struct socket *so) 3210{ 3211 3212 return (so->so_error); 3213} 3214 3215void 3216so_error_set(struct socket *so, int val) 3217{ 3218 3219 so->so_error = val; 3220} 3221 3222int 3223so_linger_get(const struct socket *so) 3224{ 3225 3226 return (so->so_linger); 3227} 3228 3229void 3230so_linger_set(struct socket *so, int val) 3231{ 3232 3233 so->so_linger = val; 3234} 3235 3236struct protosw * 3237so_protosw_get(const struct socket *so) 3238{ 3239 3240 return (so->so_proto); 3241} 3242 3243void 3244so_protosw_set(struct socket *so, struct protosw *val) 3245{ 3246 3247 so->so_proto = val; 3248} 3249 3250void 3251so_sorwakeup(struct socket *so) 3252{ 3253 3254 sorwakeup(so); 3255} 3256 3257void 3258so_sowwakeup(struct socket *so) 3259{ 3260 3261 sowwakeup(so); 3262} 3263 3264void 3265so_sorwakeup_locked(struct socket *so) 3266{ 3267 3268 sorwakeup_locked(so); 3269} 3270 3271void 3272so_sowwakeup_locked(struct socket *so) 3273{ 3274 3275 sowwakeup_locked(so); 3276} 3277 3278void 3279so_lock(struct socket *so) 3280{ 3281 SOCK_LOCK(so); 3282} 3283 3284void 3285so_unlock(struct socket *so) 3286{ 3287 SOCK_UNLOCK(so); 3288} 3289