uipc_socket.c revision 185892
1/*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2004 The FreeBSD Foundation 5 * Copyright (c) 2004-2008 Robert N. M. Watson 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35/* 36 * Comments on the socket life cycle: 37 * 38 * soalloc() sets of socket layer state for a socket, called only by 39 * socreate() and sonewconn(). Socket layer private. 40 * 41 * sodealloc() tears down socket layer state for a socket, called only by 42 * sofree() and sonewconn(). Socket layer private. 43 * 44 * pru_attach() associates protocol layer state with an allocated socket; 45 * called only once, may fail, aborting socket allocation. This is called 46 * from socreate() and sonewconn(). Socket layer private. 47 * 48 * pru_detach() disassociates protocol layer state from an attached socket, 49 * and will be called exactly once for sockets in which pru_attach() has 50 * been successfully called. If pru_attach() returned an error, 51 * pru_detach() will not be called. Socket layer private. 52 * 53 * pru_abort() and pru_close() notify the protocol layer that the last 54 * consumer of a socket is starting to tear down the socket, and that the 55 * protocol should terminate the connection. Historically, pru_abort() also 56 * detached protocol state from the socket state, but this is no longer the 57 * case. 58 * 59 * socreate() creates a socket and attaches protocol state. This is a public 60 * interface that may be used by socket layer consumers to create new 61 * sockets. 62 * 63 * sonewconn() creates a socket and attaches protocol state. This is a 64 * public interface that may be used by protocols to create new sockets when 65 * a new connection is received and will be available for accept() on a 66 * listen socket. 67 * 68 * soclose() destroys a socket after possibly waiting for it to disconnect. 69 * This is a public interface that socket consumers should use to close and 70 * release a socket when done with it. 71 * 72 * soabort() destroys a socket without waiting for it to disconnect (used 73 * only for incoming connections that are already partially or fully 74 * connected). This is used internally by the socket layer when clearing 75 * listen socket queues (due to overflow or close on the listen socket), but 76 * is also a public interface protocols may use to abort connections in 77 * their incomplete listen queues should they no longer be required. Sockets 78 * placed in completed connection listen queues should not be aborted for 79 * reasons described in the comment above the soclose() implementation. This 80 * is not a general purpose close routine, and except in the specific 81 * circumstances described here, should not be used. 82 * 83 * sofree() will free a socket and its protocol state if all references on 84 * the socket have been released, and is the public interface to attempt to 85 * free a socket when a reference is removed. This is a socket layer private 86 * interface. 87 * 88 * NOTE: In addition to socreate() and soclose(), which provide a single 89 * socket reference to the consumer to be managed as required, there are two 90 * calls to explicitly manage socket references, soref(), and sorele(). 91 * Currently, these are generally required only when transitioning a socket 92 * from a listen queue to a file descriptor, in order to prevent garbage 93 * collection of the socket at an untimely moment. For a number of reasons, 94 * these interfaces are not preferred, and should be avoided. 95 */ 96 97#include <sys/cdefs.h> 98__FBSDID("$FreeBSD: head/sys/kern/uipc_socket.c 185892 2008-12-10 22:10:37Z bz $"); 99 100#include "opt_inet.h" 101#include "opt_inet6.h" 102#include "opt_mac.h" 103#include "opt_zero.h" 104#include "opt_compat.h" 105 106#include <sys/param.h> 107#include <sys/systm.h> 108#include <sys/fcntl.h> 109#include <sys/limits.h> 110#include <sys/lock.h> 111#include <sys/mac.h> 112#include <sys/malloc.h> 113#include <sys/mbuf.h> 114#include <sys/mutex.h> 115#include <sys/domain.h> 116#include <sys/file.h> /* for struct knote */ 117#include <sys/kernel.h> 118#include <sys/event.h> 119#include <sys/eventhandler.h> 120#include <sys/poll.h> 121#include <sys/proc.h> 122#include <sys/protosw.h> 123#include <sys/socket.h> 124#include <sys/socketvar.h> 125#include <sys/resourcevar.h> 126#include <net/route.h> 127#include <sys/signalvar.h> 128#include <sys/stat.h> 129#include <sys/sx.h> 130#include <sys/sysctl.h> 131#include <sys/uio.h> 132#include <sys/jail.h> 133 134#include <security/mac/mac_framework.h> 135 136#include <vm/uma.h> 137 138#ifdef COMPAT_IA32 139#include <sys/mount.h> 140#include <sys/sysent.h> 141#include <compat/freebsd32/freebsd32.h> 142#endif 143 144static int soreceive_rcvoob(struct socket *so, struct uio *uio, 145 int flags); 146 147static void filt_sordetach(struct knote *kn); 148static int filt_soread(struct knote *kn, long hint); 149static void filt_sowdetach(struct knote *kn); 150static int filt_sowrite(struct knote *kn, long hint); 151static int filt_solisten(struct knote *kn, long hint); 152 153static struct filterops solisten_filtops = 154 { 1, NULL, filt_sordetach, filt_solisten }; 155static struct filterops soread_filtops = 156 { 1, NULL, filt_sordetach, filt_soread }; 157static struct filterops sowrite_filtops = 158 { 1, NULL, filt_sowdetach, filt_sowrite }; 159 160uma_zone_t socket_zone; 161so_gen_t so_gencnt; /* generation count for sockets */ 162 163int maxsockets; 164 165MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 166MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 167 168static int somaxconn = SOMAXCONN; 169static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS); 170/* XXX: we dont have SYSCTL_USHORT */ 171SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT | CTLFLAG_RW, 172 0, sizeof(int), sysctl_somaxconn, "I", "Maximum pending socket connection " 173 "queue size"); 174static int numopensockets; 175SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD, 176 &numopensockets, 0, "Number of open sockets"); 177#ifdef ZERO_COPY_SOCKETS 178/* These aren't static because they're used in other files. */ 179int so_zero_copy_send = 1; 180int so_zero_copy_receive = 1; 181SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0, 182 "Zero copy controls"); 183SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW, 184 &so_zero_copy_receive, 0, "Enable zero copy receive"); 185SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW, 186 &so_zero_copy_send, 0, "Enable zero copy send"); 187#endif /* ZERO_COPY_SOCKETS */ 188 189/* 190 * accept_mtx locks down per-socket fields relating to accept queues. See 191 * socketvar.h for an annotation of the protected fields of struct socket. 192 */ 193struct mtx accept_mtx; 194MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF); 195 196/* 197 * so_global_mtx protects so_gencnt, numopensockets, and the per-socket 198 * so_gencnt field. 199 */ 200static struct mtx so_global_mtx; 201MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF); 202 203/* 204 * General IPC sysctl name space, used by sockets and a variety of other IPC 205 * types. 206 */ 207SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC"); 208 209/* 210 * Sysctl to get and set the maximum global sockets limit. Notify protocols 211 * of the change so that they can update their dependent limits as required. 212 */ 213static int 214sysctl_maxsockets(SYSCTL_HANDLER_ARGS) 215{ 216 int error, newmaxsockets; 217 218 newmaxsockets = maxsockets; 219 error = sysctl_handle_int(oidp, &newmaxsockets, 0, req); 220 if (error == 0 && req->newptr) { 221 if (newmaxsockets > maxsockets) { 222 maxsockets = newmaxsockets; 223 if (maxsockets > ((maxfiles / 4) * 3)) { 224 maxfiles = (maxsockets * 5) / 4; 225 maxfilesperproc = (maxfiles * 9) / 10; 226 } 227 EVENTHANDLER_INVOKE(maxsockets_change); 228 } else 229 error = EINVAL; 230 } 231 return (error); 232} 233 234SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW, 235 &maxsockets, 0, sysctl_maxsockets, "IU", 236 "Maximum number of sockets avaliable"); 237 238/* 239 * Initialise maxsockets. 240 */ 241static void 242init_maxsockets(void *ignored) 243{ 244 245 TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); 246 maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters)); 247} 248SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL); 249 250/* 251 * Socket operation routines. These routines are called by the routines in 252 * sys_socket.c or from a system process, and implement the semantics of 253 * socket operations by switching out to the protocol specific routines. 254 */ 255 256/* 257 * Get a socket structure from our zone, and initialize it. Note that it 258 * would probably be better to allocate socket and PCB at the same time, but 259 * I'm not convinced that all the protocols can be easily modified to do 260 * this. 261 * 262 * soalloc() returns a socket with a ref count of 0. 263 */ 264static struct socket * 265soalloc(void) 266{ 267 struct socket *so; 268 269 so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO); 270 if (so == NULL) 271 return (NULL); 272#ifdef MAC 273 if (mac_socket_init(so, M_NOWAIT) != 0) { 274 uma_zfree(socket_zone, so); 275 return (NULL); 276 } 277#endif 278 SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd"); 279 SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv"); 280 sx_init(&so->so_snd.sb_sx, "so_snd_sx"); 281 sx_init(&so->so_rcv.sb_sx, "so_rcv_sx"); 282 TAILQ_INIT(&so->so_aiojobq); 283 mtx_lock(&so_global_mtx); 284 so->so_gencnt = ++so_gencnt; 285 ++numopensockets; 286 mtx_unlock(&so_global_mtx); 287 return (so); 288} 289 290/* 291 * Free the storage associated with a socket at the socket layer, tear down 292 * locks, labels, etc. All protocol state is assumed already to have been 293 * torn down (and possibly never set up) by the caller. 294 */ 295static void 296sodealloc(struct socket *so) 297{ 298 299 KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count)); 300 KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL")); 301 302 mtx_lock(&so_global_mtx); 303 so->so_gencnt = ++so_gencnt; 304 --numopensockets; /* Could be below, but faster here. */ 305 mtx_unlock(&so_global_mtx); 306 if (so->so_rcv.sb_hiwat) 307 (void)chgsbsize(so->so_cred->cr_uidinfo, 308 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); 309 if (so->so_snd.sb_hiwat) 310 (void)chgsbsize(so->so_cred->cr_uidinfo, 311 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); 312#ifdef INET 313 /* remove acccept filter if one is present. */ 314 if (so->so_accf != NULL) 315 do_setopt_accept_filter(so, NULL); 316#endif 317#ifdef MAC 318 mac_socket_destroy(so); 319#endif 320 crfree(so->so_cred); 321 sx_destroy(&so->so_snd.sb_sx); 322 sx_destroy(&so->so_rcv.sb_sx); 323 SOCKBUF_LOCK_DESTROY(&so->so_snd); 324 SOCKBUF_LOCK_DESTROY(&so->so_rcv); 325 uma_zfree(socket_zone, so); 326} 327 328/* 329 * socreate returns a socket with a ref count of 1. The socket should be 330 * closed with soclose(). 331 */ 332int 333socreate(int dom, struct socket **aso, int type, int proto, 334 struct ucred *cred, struct thread *td) 335{ 336 struct protosw *prp; 337 struct socket *so; 338 int error; 339 340 if (proto) 341 prp = pffindproto(dom, proto, type); 342 else 343 prp = pffindtype(dom, type); 344 345 if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL || 346 prp->pr_usrreqs->pru_attach == pru_attach_notsupp) 347 return (EPROTONOSUPPORT); 348 349 if (jailed(cred) && jail_socket_unixiproute_only && 350 prp->pr_domain->dom_family != PF_LOCAL && 351 prp->pr_domain->dom_family != PF_INET && 352#ifdef INET6 353 prp->pr_domain->dom_family != PF_INET6 && 354#endif 355 prp->pr_domain->dom_family != PF_ROUTE) { 356 return (EPROTONOSUPPORT); 357 } 358 359 if (prp->pr_type != type) 360 return (EPROTOTYPE); 361 so = soalloc(); 362 if (so == NULL) 363 return (ENOBUFS); 364 365 TAILQ_INIT(&so->so_incomp); 366 TAILQ_INIT(&so->so_comp); 367 so->so_type = type; 368 so->so_cred = crhold(cred); 369 if ((prp->pr_domain->dom_family == PF_INET) || 370 (prp->pr_domain->dom_family == PF_ROUTE)) 371 so->so_fibnum = td->td_proc->p_fibnum; 372 else 373 so->so_fibnum = 0; 374 so->so_proto = prp; 375#ifdef MAC 376 mac_socket_create(cred, so); 377#endif 378 knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv), 379 NULL, NULL, NULL); 380 knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd), 381 NULL, NULL, NULL); 382 so->so_count = 1; 383 /* 384 * Auto-sizing of socket buffers is managed by the protocols and 385 * the appropriate flags must be set in the pru_attach function. 386 */ 387 error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); 388 if (error) { 389 KASSERT(so->so_count == 1, ("socreate: so_count %d", 390 so->so_count)); 391 so->so_count = 0; 392 sodealloc(so); 393 return (error); 394 } 395 *aso = so; 396 return (0); 397} 398 399#ifdef REGRESSION 400static int regression_sonewconn_earlytest = 1; 401SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW, 402 ®ression_sonewconn_earlytest, 0, "Perform early sonewconn limit test"); 403#endif 404 405/* 406 * When an attempt at a new connection is noted on a socket which accepts 407 * connections, sonewconn is called. If the connection is possible (subject 408 * to space constraints, etc.) then we allocate a new structure, propoerly 409 * linked into the data structure of the original socket, and return this. 410 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 411 * 412 * Note: the ref count on the socket is 0 on return. 413 */ 414struct socket * 415sonewconn(struct socket *head, int connstatus) 416{ 417 struct socket *so; 418 int over; 419 420 ACCEPT_LOCK(); 421 over = (head->so_qlen > 3 * head->so_qlimit / 2); 422 ACCEPT_UNLOCK(); 423#ifdef REGRESSION 424 if (regression_sonewconn_earlytest && over) 425#else 426 if (over) 427#endif 428 return (NULL); 429 so = soalloc(); 430 if (so == NULL) 431 return (NULL); 432 if ((head->so_options & SO_ACCEPTFILTER) != 0) 433 connstatus = 0; 434 so->so_head = head; 435 so->so_type = head->so_type; 436 so->so_options = head->so_options &~ SO_ACCEPTCONN; 437 so->so_linger = head->so_linger; 438 so->so_state = head->so_state | SS_NOFDREF; 439 so->so_proto = head->so_proto; 440 so->so_cred = crhold(head->so_cred); 441#ifdef MAC 442 SOCK_LOCK(head); 443 mac_socket_newconn(head, so); 444 SOCK_UNLOCK(head); 445#endif 446 knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv), 447 NULL, NULL, NULL); 448 knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd), 449 NULL, NULL, NULL); 450 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || 451 (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { 452 sodealloc(so); 453 return (NULL); 454 } 455 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 456 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 457 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 458 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 459 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; 460 so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; 461 so->so_state |= connstatus; 462 ACCEPT_LOCK(); 463 if (connstatus) { 464 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); 465 so->so_qstate |= SQ_COMP; 466 head->so_qlen++; 467 } else { 468 /* 469 * Keep removing sockets from the head until there's room for 470 * us to insert on the tail. In pre-locking revisions, this 471 * was a simple if(), but as we could be racing with other 472 * threads and soabort() requires dropping locks, we must 473 * loop waiting for the condition to be true. 474 */ 475 while (head->so_incqlen > head->so_qlimit) { 476 struct socket *sp; 477 sp = TAILQ_FIRST(&head->so_incomp); 478 TAILQ_REMOVE(&head->so_incomp, sp, so_list); 479 head->so_incqlen--; 480 sp->so_qstate &= ~SQ_INCOMP; 481 sp->so_head = NULL; 482 ACCEPT_UNLOCK(); 483 soabort(sp); 484 ACCEPT_LOCK(); 485 } 486 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list); 487 so->so_qstate |= SQ_INCOMP; 488 head->so_incqlen++; 489 } 490 ACCEPT_UNLOCK(); 491 if (connstatus) { 492 sorwakeup(head); 493 wakeup_one(&head->so_timeo); 494 } 495 return (so); 496} 497 498int 499sobind(struct socket *so, struct sockaddr *nam, struct thread *td) 500{ 501 502 return ((*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td)); 503} 504 505/* 506 * solisten() transitions a socket from a non-listening state to a listening 507 * state, but can also be used to update the listen queue depth on an 508 * existing listen socket. The protocol will call back into the sockets 509 * layer using solisten_proto_check() and solisten_proto() to check and set 510 * socket-layer listen state. Call backs are used so that the protocol can 511 * acquire both protocol and socket layer locks in whatever order is required 512 * by the protocol. 513 * 514 * Protocol implementors are advised to hold the socket lock across the 515 * socket-layer test and set to avoid races at the socket layer. 516 */ 517int 518solisten(struct socket *so, int backlog, struct thread *td) 519{ 520 521 return ((*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td)); 522} 523 524int 525solisten_proto_check(struct socket *so) 526{ 527 528 SOCK_LOCK_ASSERT(so); 529 530 if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | 531 SS_ISDISCONNECTING)) 532 return (EINVAL); 533 return (0); 534} 535 536void 537solisten_proto(struct socket *so, int backlog) 538{ 539 540 SOCK_LOCK_ASSERT(so); 541 542 if (backlog < 0 || backlog > somaxconn) 543 backlog = somaxconn; 544 so->so_qlimit = backlog; 545 so->so_options |= SO_ACCEPTCONN; 546} 547 548/* 549 * Attempt to free a socket. This should really be sotryfree(). 550 * 551 * sofree() will succeed if: 552 * 553 * - There are no outstanding file descriptor references or related consumers 554 * (so_count == 0). 555 * 556 * - The socket has been closed by user space, if ever open (SS_NOFDREF). 557 * 558 * - The protocol does not have an outstanding strong reference on the socket 559 * (SS_PROTOREF). 560 * 561 * - The socket is not in a completed connection queue, so a process has been 562 * notified that it is present. If it is removed, the user process may 563 * block in accept() despite select() saying the socket was ready. 564 * 565 * Otherwise, it will quietly abort so that a future call to sofree(), when 566 * conditions are right, can succeed. 567 */ 568void 569sofree(struct socket *so) 570{ 571 struct protosw *pr = so->so_proto; 572 struct socket *head; 573 574 ACCEPT_LOCK_ASSERT(); 575 SOCK_LOCK_ASSERT(so); 576 577 if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 || 578 (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)) { 579 SOCK_UNLOCK(so); 580 ACCEPT_UNLOCK(); 581 return; 582 } 583 584 head = so->so_head; 585 if (head != NULL) { 586 KASSERT((so->so_qstate & SQ_COMP) != 0 || 587 (so->so_qstate & SQ_INCOMP) != 0, 588 ("sofree: so_head != NULL, but neither SQ_COMP nor " 589 "SQ_INCOMP")); 590 KASSERT((so->so_qstate & SQ_COMP) == 0 || 591 (so->so_qstate & SQ_INCOMP) == 0, 592 ("sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP")); 593 TAILQ_REMOVE(&head->so_incomp, so, so_list); 594 head->so_incqlen--; 595 so->so_qstate &= ~SQ_INCOMP; 596 so->so_head = NULL; 597 } 598 KASSERT((so->so_qstate & SQ_COMP) == 0 && 599 (so->so_qstate & SQ_INCOMP) == 0, 600 ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)", 601 so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP)); 602 if (so->so_options & SO_ACCEPTCONN) { 603 KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated")); 604 KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_comp populated")); 605 } 606 SOCK_UNLOCK(so); 607 ACCEPT_UNLOCK(); 608 609 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) 610 (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb); 611 if (pr->pr_usrreqs->pru_detach != NULL) 612 (*pr->pr_usrreqs->pru_detach)(so); 613 614 /* 615 * From this point on, we assume that no other references to this 616 * socket exist anywhere else in the stack. Therefore, no locks need 617 * to be acquired or held. 618 * 619 * We used to do a lot of socket buffer and socket locking here, as 620 * well as invoke sorflush() and perform wakeups. The direct call to 621 * dom_dispose() and sbrelease_internal() are an inlining of what was 622 * necessary from sorflush(). 623 * 624 * Notice that the socket buffer and kqueue state are torn down 625 * before calling pru_detach. This means that protocols shold not 626 * assume they can perform socket wakeups, etc, in their detach code. 627 */ 628 sbdestroy(&so->so_snd, so); 629 sbdestroy(&so->so_rcv, so); 630 knlist_destroy(&so->so_rcv.sb_sel.si_note); 631 knlist_destroy(&so->so_snd.sb_sel.si_note); 632 sodealloc(so); 633} 634 635/* 636 * Close a socket on last file table reference removal. Initiate disconnect 637 * if connected. Free socket when disconnect complete. 638 * 639 * This function will sorele() the socket. Note that soclose() may be called 640 * prior to the ref count reaching zero. The actual socket structure will 641 * not be freed until the ref count reaches zero. 642 */ 643int 644soclose(struct socket *so) 645{ 646 int error = 0; 647 648 KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter")); 649 650 funsetown(&so->so_sigio); 651 if (so->so_state & SS_ISCONNECTED) { 652 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 653 error = sodisconnect(so); 654 if (error) 655 goto drop; 656 } 657 if (so->so_options & SO_LINGER) { 658 if ((so->so_state & SS_ISDISCONNECTING) && 659 (so->so_state & SS_NBIO)) 660 goto drop; 661 while (so->so_state & SS_ISCONNECTED) { 662 error = tsleep(&so->so_timeo, 663 PSOCK | PCATCH, "soclos", so->so_linger * hz); 664 if (error) 665 break; 666 } 667 } 668 } 669 670drop: 671 if (so->so_proto->pr_usrreqs->pru_close != NULL) 672 (*so->so_proto->pr_usrreqs->pru_close)(so); 673 if (so->so_options & SO_ACCEPTCONN) { 674 struct socket *sp; 675 ACCEPT_LOCK(); 676 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { 677 TAILQ_REMOVE(&so->so_incomp, sp, so_list); 678 so->so_incqlen--; 679 sp->so_qstate &= ~SQ_INCOMP; 680 sp->so_head = NULL; 681 ACCEPT_UNLOCK(); 682 soabort(sp); 683 ACCEPT_LOCK(); 684 } 685 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { 686 TAILQ_REMOVE(&so->so_comp, sp, so_list); 687 so->so_qlen--; 688 sp->so_qstate &= ~SQ_COMP; 689 sp->so_head = NULL; 690 ACCEPT_UNLOCK(); 691 soabort(sp); 692 ACCEPT_LOCK(); 693 } 694 ACCEPT_UNLOCK(); 695 } 696 ACCEPT_LOCK(); 697 SOCK_LOCK(so); 698 KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF")); 699 so->so_state |= SS_NOFDREF; 700 sorele(so); 701 return (error); 702} 703 704/* 705 * soabort() is used to abruptly tear down a connection, such as when a 706 * resource limit is reached (listen queue depth exceeded), or if a listen 707 * socket is closed while there are sockets waiting to be accepted. 708 * 709 * This interface is tricky, because it is called on an unreferenced socket, 710 * and must be called only by a thread that has actually removed the socket 711 * from the listen queue it was on, or races with other threads are risked. 712 * 713 * This interface will call into the protocol code, so must not be called 714 * with any socket locks held. Protocols do call it while holding their own 715 * recursible protocol mutexes, but this is something that should be subject 716 * to review in the future. 717 */ 718void 719soabort(struct socket *so) 720{ 721 722 /* 723 * In as much as is possible, assert that no references to this 724 * socket are held. This is not quite the same as asserting that the 725 * current thread is responsible for arranging for no references, but 726 * is as close as we can get for now. 727 */ 728 KASSERT(so->so_count == 0, ("soabort: so_count")); 729 KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF")); 730 KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF")); 731 KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP")); 732 KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP")); 733 734 if (so->so_proto->pr_usrreqs->pru_abort != NULL) 735 (*so->so_proto->pr_usrreqs->pru_abort)(so); 736 ACCEPT_LOCK(); 737 SOCK_LOCK(so); 738 sofree(so); 739} 740 741int 742soaccept(struct socket *so, struct sockaddr **nam) 743{ 744 int error; 745 746 SOCK_LOCK(so); 747 KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF")); 748 so->so_state &= ~SS_NOFDREF; 749 SOCK_UNLOCK(so); 750 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 751 return (error); 752} 753 754int 755soconnect(struct socket *so, struct sockaddr *nam, struct thread *td) 756{ 757 int error; 758 759 if (so->so_options & SO_ACCEPTCONN) 760 return (EOPNOTSUPP); 761 /* 762 * If protocol is connection-based, can only connect once. 763 * Otherwise, if connected, try to disconnect first. This allows 764 * user to disconnect by connecting to, e.g., a null address. 765 */ 766 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 767 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 768 (error = sodisconnect(so)))) { 769 error = EISCONN; 770 } else { 771 /* 772 * Prevent accumulated error from previous connection from 773 * biting us. 774 */ 775 so->so_error = 0; 776 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td); 777 } 778 779 return (error); 780} 781 782int 783soconnect2(struct socket *so1, struct socket *so2) 784{ 785 786 return ((*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2)); 787} 788 789int 790sodisconnect(struct socket *so) 791{ 792 int error; 793 794 if ((so->so_state & SS_ISCONNECTED) == 0) 795 return (ENOTCONN); 796 if (so->so_state & SS_ISDISCONNECTING) 797 return (EALREADY); 798 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 799 return (error); 800} 801 802#ifdef ZERO_COPY_SOCKETS 803struct so_zerocopy_stats{ 804 int size_ok; 805 int align_ok; 806 int found_ifp; 807}; 808struct so_zerocopy_stats so_zerocp_stats = {0,0,0}; 809#include <netinet/in.h> 810#include <net/route.h> 811#include <netinet/in_pcb.h> 812#include <vm/vm.h> 813#include <vm/vm_page.h> 814#include <vm/vm_object.h> 815 816/* 817 * sosend_copyin() is only used if zero copy sockets are enabled. Otherwise 818 * sosend_dgram() and sosend_generic() use m_uiotombuf(). 819 * 820 * sosend_copyin() accepts a uio and prepares an mbuf chain holding part or 821 * all of the data referenced by the uio. If desired, it uses zero-copy. 822 * *space will be updated to reflect data copied in. 823 * 824 * NB: If atomic I/O is requested, the caller must already have checked that 825 * space can hold resid bytes. 826 * 827 * NB: In the event of an error, the caller may need to free the partial 828 * chain pointed to by *mpp. The contents of both *uio and *space may be 829 * modified even in the case of an error. 830 */ 831static int 832sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, 833 int flags) 834{ 835 struct mbuf *m, **mp, *top; 836 long len, resid; 837 int error; 838#ifdef ZERO_COPY_SOCKETS 839 int cow_send; 840#endif 841 842 *retmp = top = NULL; 843 mp = ⊤ 844 len = 0; 845 resid = uio->uio_resid; 846 error = 0; 847 do { 848#ifdef ZERO_COPY_SOCKETS 849 cow_send = 0; 850#endif /* ZERO_COPY_SOCKETS */ 851 if (resid >= MINCLSIZE) { 852#ifdef ZERO_COPY_SOCKETS 853 if (top == NULL) { 854 m = m_gethdr(M_WAITOK, MT_DATA); 855 m->m_pkthdr.len = 0; 856 m->m_pkthdr.rcvif = NULL; 857 } else 858 m = m_get(M_WAITOK, MT_DATA); 859 if (so_zero_copy_send && 860 resid>=PAGE_SIZE && 861 *space>=PAGE_SIZE && 862 uio->uio_iov->iov_len>=PAGE_SIZE) { 863 so_zerocp_stats.size_ok++; 864 so_zerocp_stats.align_ok++; 865 cow_send = socow_setup(m, uio); 866 len = cow_send; 867 } 868 if (!cow_send) { 869 m_clget(m, M_WAITOK); 870 len = min(min(MCLBYTES, resid), *space); 871 } 872#else /* ZERO_COPY_SOCKETS */ 873 if (top == NULL) { 874 m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR); 875 m->m_pkthdr.len = 0; 876 m->m_pkthdr.rcvif = NULL; 877 } else 878 m = m_getcl(M_WAIT, MT_DATA, 0); 879 len = min(min(MCLBYTES, resid), *space); 880#endif /* ZERO_COPY_SOCKETS */ 881 } else { 882 if (top == NULL) { 883 m = m_gethdr(M_WAIT, MT_DATA); 884 m->m_pkthdr.len = 0; 885 m->m_pkthdr.rcvif = NULL; 886 887 len = min(min(MHLEN, resid), *space); 888 /* 889 * For datagram protocols, leave room 890 * for protocol headers in first mbuf. 891 */ 892 if (atomic && m && len < MHLEN) 893 MH_ALIGN(m, len); 894 } else { 895 m = m_get(M_WAIT, MT_DATA); 896 len = min(min(MLEN, resid), *space); 897 } 898 } 899 if (m == NULL) { 900 error = ENOBUFS; 901 goto out; 902 } 903 904 *space -= len; 905#ifdef ZERO_COPY_SOCKETS 906 if (cow_send) 907 error = 0; 908 else 909#endif /* ZERO_COPY_SOCKETS */ 910 error = uiomove(mtod(m, void *), (int)len, uio); 911 resid = uio->uio_resid; 912 m->m_len = len; 913 *mp = m; 914 top->m_pkthdr.len += len; 915 if (error) 916 goto out; 917 mp = &m->m_next; 918 if (resid <= 0) { 919 if (flags & MSG_EOR) 920 top->m_flags |= M_EOR; 921 break; 922 } 923 } while (*space > 0 && atomic); 924out: 925 *retmp = top; 926 return (error); 927} 928#endif /*ZERO_COPY_SOCKETS*/ 929 930#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) 931 932int 933sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, 934 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 935{ 936 long space, resid; 937 int clen = 0, error, dontroute; 938#ifdef ZERO_COPY_SOCKETS 939 int atomic = sosendallatonce(so) || top; 940#endif 941 942 KASSERT(so->so_type == SOCK_DGRAM, ("sodgram_send: !SOCK_DGRAM")); 943 KASSERT(so->so_proto->pr_flags & PR_ATOMIC, 944 ("sodgram_send: !PR_ATOMIC")); 945 946 if (uio != NULL) 947 resid = uio->uio_resid; 948 else 949 resid = top->m_pkthdr.len; 950 /* 951 * In theory resid should be unsigned. However, space must be 952 * signed, as it might be less than 0 if we over-committed, and we 953 * must use a signed comparison of space and resid. On the other 954 * hand, a negative resid causes us to loop sending 0-length 955 * segments to the protocol. 956 * 957 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 958 * type sockets since that's an error. 959 */ 960 if (resid < 0) { 961 error = EINVAL; 962 goto out; 963 } 964 965 dontroute = 966 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0; 967 if (td != NULL) 968 td->td_ru.ru_msgsnd++; 969 if (control != NULL) 970 clen = control->m_len; 971 972 SOCKBUF_LOCK(&so->so_snd); 973 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 974 SOCKBUF_UNLOCK(&so->so_snd); 975 error = EPIPE; 976 goto out; 977 } 978 if (so->so_error) { 979 error = so->so_error; 980 so->so_error = 0; 981 SOCKBUF_UNLOCK(&so->so_snd); 982 goto out; 983 } 984 if ((so->so_state & SS_ISCONNECTED) == 0) { 985 /* 986 * `sendto' and `sendmsg' is allowed on a connection-based 987 * socket if it supports implied connect. Return ENOTCONN if 988 * not connected and no address is supplied. 989 */ 990 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 991 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 992 if ((so->so_state & SS_ISCONFIRMING) == 0 && 993 !(resid == 0 && clen != 0)) { 994 SOCKBUF_UNLOCK(&so->so_snd); 995 error = ENOTCONN; 996 goto out; 997 } 998 } else if (addr == NULL) { 999 if (so->so_proto->pr_flags & PR_CONNREQUIRED) 1000 error = ENOTCONN; 1001 else 1002 error = EDESTADDRREQ; 1003 SOCKBUF_UNLOCK(&so->so_snd); 1004 goto out; 1005 } 1006 } 1007 1008 /* 1009 * Do we need MSG_OOB support in SOCK_DGRAM? Signs here may be a 1010 * problem and need fixing. 1011 */ 1012 space = sbspace(&so->so_snd); 1013 if (flags & MSG_OOB) 1014 space += 1024; 1015 space -= clen; 1016 SOCKBUF_UNLOCK(&so->so_snd); 1017 if (resid > space) { 1018 error = EMSGSIZE; 1019 goto out; 1020 } 1021 if (uio == NULL) { 1022 resid = 0; 1023 if (flags & MSG_EOR) 1024 top->m_flags |= M_EOR; 1025 } else { 1026#ifdef ZERO_COPY_SOCKETS 1027 error = sosend_copyin(uio, &top, atomic, &space, flags); 1028 if (error) 1029 goto out; 1030#else 1031 /* 1032 * Copy the data from userland into a mbuf chain. 1033 * If no data is to be copied in, a single empty mbuf 1034 * is returned. 1035 */ 1036 top = m_uiotombuf(uio, M_WAITOK, space, max_hdr, 1037 (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0))); 1038 if (top == NULL) { 1039 error = EFAULT; /* only possible error */ 1040 goto out; 1041 } 1042 space -= resid - uio->uio_resid; 1043#endif 1044 resid = uio->uio_resid; 1045 } 1046 KASSERT(resid == 0, ("sosend_dgram: resid != 0")); 1047 /* 1048 * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock 1049 * than with. 1050 */ 1051 if (dontroute) { 1052 SOCK_LOCK(so); 1053 so->so_options |= SO_DONTROUTE; 1054 SOCK_UNLOCK(so); 1055 } 1056 /* 1057 * XXX all the SBS_CANTSENDMORE checks previously done could be out 1058 * of date. We could have recieved a reset packet in an interrupt or 1059 * maybe we slept while doing page faults in uiomove() etc. We could 1060 * probably recheck again inside the locking protection here, but 1061 * there are probably other places that this also happens. We must 1062 * rethink this. 1063 */ 1064 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 1065 (flags & MSG_OOB) ? PRUS_OOB : 1066 /* 1067 * If the user set MSG_EOF, the protocol understands this flag and 1068 * nothing left to send then use PRU_SEND_EOF instead of PRU_SEND. 1069 */ 1070 ((flags & MSG_EOF) && 1071 (so->so_proto->pr_flags & PR_IMPLOPCL) && 1072 (resid <= 0)) ? 1073 PRUS_EOF : 1074 /* If there is more to send set PRUS_MORETOCOME */ 1075 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, 1076 top, addr, control, td); 1077 if (dontroute) { 1078 SOCK_LOCK(so); 1079 so->so_options &= ~SO_DONTROUTE; 1080 SOCK_UNLOCK(so); 1081 } 1082 clen = 0; 1083 control = NULL; 1084 top = NULL; 1085out: 1086 if (top != NULL) 1087 m_freem(top); 1088 if (control != NULL) 1089 m_freem(control); 1090 return (error); 1091} 1092 1093/* 1094 * Send on a socket. If send must go all at once and message is larger than 1095 * send buffering, then hard error. Lock against other senders. If must go 1096 * all at once and not enough room now, then inform user that this would 1097 * block and do nothing. Otherwise, if nonblocking, send as much as 1098 * possible. The data to be sent is described by "uio" if nonzero, otherwise 1099 * by the mbuf chain "top" (which must be null if uio is not). Data provided 1100 * in mbuf chain must be small enough to send all at once. 1101 * 1102 * Returns nonzero on error, timeout or signal; callers must check for short 1103 * counts if EINTR/ERESTART are returned. Data and control buffers are freed 1104 * on return. 1105 */ 1106int 1107sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, 1108 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 1109{ 1110 long space, resid; 1111 int clen = 0, error, dontroute; 1112 int atomic = sosendallatonce(so) || top; 1113 1114 if (uio != NULL) 1115 resid = uio->uio_resid; 1116 else 1117 resid = top->m_pkthdr.len; 1118 /* 1119 * In theory resid should be unsigned. However, space must be 1120 * signed, as it might be less than 0 if we over-committed, and we 1121 * must use a signed comparison of space and resid. On the other 1122 * hand, a negative resid causes us to loop sending 0-length 1123 * segments to the protocol. 1124 * 1125 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 1126 * type sockets since that's an error. 1127 */ 1128 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 1129 error = EINVAL; 1130 goto out; 1131 } 1132 1133 dontroute = 1134 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 1135 (so->so_proto->pr_flags & PR_ATOMIC); 1136 if (td != NULL) 1137 td->td_ru.ru_msgsnd++; 1138 if (control != NULL) 1139 clen = control->m_len; 1140 1141 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 1142 if (error) 1143 goto out; 1144 1145restart: 1146 do { 1147 SOCKBUF_LOCK(&so->so_snd); 1148 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 1149 SOCKBUF_UNLOCK(&so->so_snd); 1150 error = EPIPE; 1151 goto release; 1152 } 1153 if (so->so_error) { 1154 error = so->so_error; 1155 so->so_error = 0; 1156 SOCKBUF_UNLOCK(&so->so_snd); 1157 goto release; 1158 } 1159 if ((so->so_state & SS_ISCONNECTED) == 0) { 1160 /* 1161 * `sendto' and `sendmsg' is allowed on a connection- 1162 * based socket if it supports implied connect. 1163 * Return ENOTCONN if not connected and no address is 1164 * supplied. 1165 */ 1166 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 1167 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 1168 if ((so->so_state & SS_ISCONFIRMING) == 0 && 1169 !(resid == 0 && clen != 0)) { 1170 SOCKBUF_UNLOCK(&so->so_snd); 1171 error = ENOTCONN; 1172 goto release; 1173 } 1174 } else if (addr == NULL) { 1175 SOCKBUF_UNLOCK(&so->so_snd); 1176 if (so->so_proto->pr_flags & PR_CONNREQUIRED) 1177 error = ENOTCONN; 1178 else 1179 error = EDESTADDRREQ; 1180 goto release; 1181 } 1182 } 1183 space = sbspace(&so->so_snd); 1184 if (flags & MSG_OOB) 1185 space += 1024; 1186 if ((atomic && resid > so->so_snd.sb_hiwat) || 1187 clen > so->so_snd.sb_hiwat) { 1188 SOCKBUF_UNLOCK(&so->so_snd); 1189 error = EMSGSIZE; 1190 goto release; 1191 } 1192 if (space < resid + clen && 1193 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 1194 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) { 1195 SOCKBUF_UNLOCK(&so->so_snd); 1196 error = EWOULDBLOCK; 1197 goto release; 1198 } 1199 error = sbwait(&so->so_snd); 1200 SOCKBUF_UNLOCK(&so->so_snd); 1201 if (error) 1202 goto release; 1203 goto restart; 1204 } 1205 SOCKBUF_UNLOCK(&so->so_snd); 1206 space -= clen; 1207 do { 1208 if (uio == NULL) { 1209 resid = 0; 1210 if (flags & MSG_EOR) 1211 top->m_flags |= M_EOR; 1212 } else { 1213#ifdef ZERO_COPY_SOCKETS 1214 error = sosend_copyin(uio, &top, atomic, 1215 &space, flags); 1216 if (error != 0) 1217 goto release; 1218#else 1219 /* 1220 * Copy the data from userland into a mbuf 1221 * chain. If no data is to be copied in, 1222 * a single empty mbuf is returned. 1223 */ 1224 top = m_uiotombuf(uio, M_WAITOK, space, 1225 (atomic ? max_hdr : 0), 1226 (atomic ? M_PKTHDR : 0) | 1227 ((flags & MSG_EOR) ? M_EOR : 0)); 1228 if (top == NULL) { 1229 error = EFAULT; /* only possible error */ 1230 goto release; 1231 } 1232 space -= resid - uio->uio_resid; 1233#endif 1234 resid = uio->uio_resid; 1235 } 1236 if (dontroute) { 1237 SOCK_LOCK(so); 1238 so->so_options |= SO_DONTROUTE; 1239 SOCK_UNLOCK(so); 1240 } 1241 /* 1242 * XXX all the SBS_CANTSENDMORE checks previously 1243 * done could be out of date. We could have recieved 1244 * a reset packet in an interrupt or maybe we slept 1245 * while doing page faults in uiomove() etc. We 1246 * could probably recheck again inside the locking 1247 * protection here, but there are probably other 1248 * places that this also happens. We must rethink 1249 * this. 1250 */ 1251 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 1252 (flags & MSG_OOB) ? PRUS_OOB : 1253 /* 1254 * If the user set MSG_EOF, the protocol understands 1255 * this flag and nothing left to send then use 1256 * PRU_SEND_EOF instead of PRU_SEND. 1257 */ 1258 ((flags & MSG_EOF) && 1259 (so->so_proto->pr_flags & PR_IMPLOPCL) && 1260 (resid <= 0)) ? 1261 PRUS_EOF : 1262 /* If there is more to send set PRUS_MORETOCOME. */ 1263 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, 1264 top, addr, control, td); 1265 if (dontroute) { 1266 SOCK_LOCK(so); 1267 so->so_options &= ~SO_DONTROUTE; 1268 SOCK_UNLOCK(so); 1269 } 1270 clen = 0; 1271 control = NULL; 1272 top = NULL; 1273 if (error) 1274 goto release; 1275 } while (resid && space > 0); 1276 } while (resid); 1277 1278release: 1279 sbunlock(&so->so_snd); 1280out: 1281 if (top != NULL) 1282 m_freem(top); 1283 if (control != NULL) 1284 m_freem(control); 1285 return (error); 1286} 1287 1288int 1289sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, 1290 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 1291{ 1292 1293 return (so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top, 1294 control, flags, td)); 1295} 1296 1297/* 1298 * The part of soreceive() that implements reading non-inline out-of-band 1299 * data from a socket. For more complete comments, see soreceive(), from 1300 * which this code originated. 1301 * 1302 * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is 1303 * unable to return an mbuf chain to the caller. 1304 */ 1305static int 1306soreceive_rcvoob(struct socket *so, struct uio *uio, int flags) 1307{ 1308 struct protosw *pr = so->so_proto; 1309 struct mbuf *m; 1310 int error; 1311 1312 KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0")); 1313 1314 m = m_get(M_WAIT, MT_DATA); 1315 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 1316 if (error) 1317 goto bad; 1318 do { 1319#ifdef ZERO_COPY_SOCKETS 1320 if (so_zero_copy_receive) { 1321 int disposable; 1322 1323 if ((m->m_flags & M_EXT) 1324 && (m->m_ext.ext_type == EXT_DISPOSABLE)) 1325 disposable = 1; 1326 else 1327 disposable = 0; 1328 1329 error = uiomoveco(mtod(m, void *), 1330 min(uio->uio_resid, m->m_len), 1331 uio, disposable); 1332 } else 1333#endif /* ZERO_COPY_SOCKETS */ 1334 error = uiomove(mtod(m, void *), 1335 (int) min(uio->uio_resid, m->m_len), uio); 1336 m = m_free(m); 1337 } while (uio->uio_resid && error == 0 && m); 1338bad: 1339 if (m != NULL) 1340 m_freem(m); 1341 return (error); 1342} 1343 1344/* 1345 * Following replacement or removal of the first mbuf on the first mbuf chain 1346 * of a socket buffer, push necessary state changes back into the socket 1347 * buffer so that other consumers see the values consistently. 'nextrecord' 1348 * is the callers locally stored value of the original value of 1349 * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes. 1350 * NOTE: 'nextrecord' may be NULL. 1351 */ 1352static __inline void 1353sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord) 1354{ 1355 1356 SOCKBUF_LOCK_ASSERT(sb); 1357 /* 1358 * First, update for the new value of nextrecord. If necessary, make 1359 * it the first record. 1360 */ 1361 if (sb->sb_mb != NULL) 1362 sb->sb_mb->m_nextpkt = nextrecord; 1363 else 1364 sb->sb_mb = nextrecord; 1365 1366 /* 1367 * Now update any dependent socket buffer fields to reflect the new 1368 * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the 1369 * addition of a second clause that takes care of the case where 1370 * sb_mb has been updated, but remains the last record. 1371 */ 1372 if (sb->sb_mb == NULL) { 1373 sb->sb_mbtail = NULL; 1374 sb->sb_lastrecord = NULL; 1375 } else if (sb->sb_mb->m_nextpkt == NULL) 1376 sb->sb_lastrecord = sb->sb_mb; 1377} 1378 1379 1380/* 1381 * Implement receive operations on a socket. We depend on the way that 1382 * records are added to the sockbuf by sbappend. In particular, each record 1383 * (mbufs linked through m_next) must begin with an address if the protocol 1384 * so specifies, followed by an optional mbuf or mbufs containing ancillary 1385 * data, and then zero or more mbufs of data. In order to allow parallelism 1386 * between network receive and copying to user space, as well as avoid 1387 * sleeping with a mutex held, we release the socket buffer mutex during the 1388 * user space copy. Although the sockbuf is locked, new data may still be 1389 * appended, and thus we must maintain consistency of the sockbuf during that 1390 * time. 1391 * 1392 * The caller may receive the data as a single mbuf chain by supplying an 1393 * mbuf **mp0 for use in returning the chain. The uio is then used only for 1394 * the count in uio_resid. 1395 */ 1396int 1397soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, 1398 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 1399{ 1400 struct mbuf *m, **mp; 1401 int flags, len, error, offset; 1402 struct protosw *pr = so->so_proto; 1403 struct mbuf *nextrecord; 1404 int moff, type = 0; 1405 int orig_resid = uio->uio_resid; 1406 1407 mp = mp0; 1408 if (psa != NULL) 1409 *psa = NULL; 1410 if (controlp != NULL) 1411 *controlp = NULL; 1412 if (flagsp != NULL) 1413 flags = *flagsp &~ MSG_EOR; 1414 else 1415 flags = 0; 1416 if (flags & MSG_OOB) 1417 return (soreceive_rcvoob(so, uio, flags)); 1418 if (mp != NULL) 1419 *mp = NULL; 1420 if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING) 1421 && uio->uio_resid) 1422 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 1423 1424 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 1425 if (error) 1426 return (error); 1427 1428restart: 1429 SOCKBUF_LOCK(&so->so_rcv); 1430 m = so->so_rcv.sb_mb; 1431 /* 1432 * If we have less data than requested, block awaiting more (subject 1433 * to any timeout) if: 1434 * 1. the current count is less than the low water mark, or 1435 * 2. MSG_WAITALL is set, and it is possible to do the entire 1436 * receive operation at once if we block (resid <= hiwat). 1437 * 3. MSG_DONTWAIT is not set 1438 * If MSG_WAITALL is set but resid is larger than the receive buffer, 1439 * we have to do the receive in sections, and thus risk returning a 1440 * short count if a timeout or signal occurs after we start. 1441 */ 1442 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 1443 so->so_rcv.sb_cc < uio->uio_resid) && 1444 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 1445 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 1446 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 1447 KASSERT(m != NULL || !so->so_rcv.sb_cc, 1448 ("receive: m == %p so->so_rcv.sb_cc == %u", 1449 m, so->so_rcv.sb_cc)); 1450 if (so->so_error) { 1451 if (m != NULL) 1452 goto dontblock; 1453 error = so->so_error; 1454 if ((flags & MSG_PEEK) == 0) 1455 so->so_error = 0; 1456 SOCKBUF_UNLOCK(&so->so_rcv); 1457 goto release; 1458 } 1459 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1460 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { 1461 if (m == NULL) { 1462 SOCKBUF_UNLOCK(&so->so_rcv); 1463 goto release; 1464 } else 1465 goto dontblock; 1466 } 1467 for (; m != NULL; m = m->m_next) 1468 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 1469 m = so->so_rcv.sb_mb; 1470 goto dontblock; 1471 } 1472 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1473 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1474 SOCKBUF_UNLOCK(&so->so_rcv); 1475 error = ENOTCONN; 1476 goto release; 1477 } 1478 if (uio->uio_resid == 0) { 1479 SOCKBUF_UNLOCK(&so->so_rcv); 1480 goto release; 1481 } 1482 if ((so->so_state & SS_NBIO) || 1483 (flags & (MSG_DONTWAIT|MSG_NBIO))) { 1484 SOCKBUF_UNLOCK(&so->so_rcv); 1485 error = EWOULDBLOCK; 1486 goto release; 1487 } 1488 SBLASTRECORDCHK(&so->so_rcv); 1489 SBLASTMBUFCHK(&so->so_rcv); 1490 error = sbwait(&so->so_rcv); 1491 SOCKBUF_UNLOCK(&so->so_rcv); 1492 if (error) 1493 goto release; 1494 goto restart; 1495 } 1496dontblock: 1497 /* 1498 * From this point onward, we maintain 'nextrecord' as a cache of the 1499 * pointer to the next record in the socket buffer. We must keep the 1500 * various socket buffer pointers and local stack versions of the 1501 * pointers in sync, pushing out modifications before dropping the 1502 * socket buffer mutex, and re-reading them when picking it up. 1503 * 1504 * Otherwise, we will race with the network stack appending new data 1505 * or records onto the socket buffer by using inconsistent/stale 1506 * versions of the field, possibly resulting in socket buffer 1507 * corruption. 1508 * 1509 * By holding the high-level sblock(), we prevent simultaneous 1510 * readers from pulling off the front of the socket buffer. 1511 */ 1512 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1513 if (uio->uio_td) 1514 uio->uio_td->td_ru.ru_msgrcv++; 1515 KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb")); 1516 SBLASTRECORDCHK(&so->so_rcv); 1517 SBLASTMBUFCHK(&so->so_rcv); 1518 nextrecord = m->m_nextpkt; 1519 if (pr->pr_flags & PR_ADDR) { 1520 KASSERT(m->m_type == MT_SONAME, 1521 ("m->m_type == %d", m->m_type)); 1522 orig_resid = 0; 1523 if (psa != NULL) 1524 *psa = sodupsockaddr(mtod(m, struct sockaddr *), 1525 M_NOWAIT); 1526 if (flags & MSG_PEEK) { 1527 m = m->m_next; 1528 } else { 1529 sbfree(&so->so_rcv, m); 1530 so->so_rcv.sb_mb = m_free(m); 1531 m = so->so_rcv.sb_mb; 1532 sockbuf_pushsync(&so->so_rcv, nextrecord); 1533 } 1534 } 1535 1536 /* 1537 * Process one or more MT_CONTROL mbufs present before any data mbufs 1538 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we 1539 * just copy the data; if !MSG_PEEK, we call into the protocol to 1540 * perform externalization (or freeing if controlp == NULL). 1541 */ 1542 if (m != NULL && m->m_type == MT_CONTROL) { 1543 struct mbuf *cm = NULL, *cmn; 1544 struct mbuf **cme = &cm; 1545 1546 do { 1547 if (flags & MSG_PEEK) { 1548 if (controlp != NULL) { 1549 *controlp = m_copy(m, 0, m->m_len); 1550 controlp = &(*controlp)->m_next; 1551 } 1552 m = m->m_next; 1553 } else { 1554 sbfree(&so->so_rcv, m); 1555 so->so_rcv.sb_mb = m->m_next; 1556 m->m_next = NULL; 1557 *cme = m; 1558 cme = &(*cme)->m_next; 1559 m = so->so_rcv.sb_mb; 1560 } 1561 } while (m != NULL && m->m_type == MT_CONTROL); 1562 if ((flags & MSG_PEEK) == 0) 1563 sockbuf_pushsync(&so->so_rcv, nextrecord); 1564 while (cm != NULL) { 1565 cmn = cm->m_next; 1566 cm->m_next = NULL; 1567 if (pr->pr_domain->dom_externalize != NULL) { 1568 SOCKBUF_UNLOCK(&so->so_rcv); 1569 error = (*pr->pr_domain->dom_externalize) 1570 (cm, controlp); 1571 SOCKBUF_LOCK(&so->so_rcv); 1572 } else if (controlp != NULL) 1573 *controlp = cm; 1574 else 1575 m_freem(cm); 1576 if (controlp != NULL) { 1577 orig_resid = 0; 1578 while (*controlp != NULL) 1579 controlp = &(*controlp)->m_next; 1580 } 1581 cm = cmn; 1582 } 1583 if (m != NULL) 1584 nextrecord = so->so_rcv.sb_mb->m_nextpkt; 1585 else 1586 nextrecord = so->so_rcv.sb_mb; 1587 orig_resid = 0; 1588 } 1589 if (m != NULL) { 1590 if ((flags & MSG_PEEK) == 0) { 1591 KASSERT(m->m_nextpkt == nextrecord, 1592 ("soreceive: post-control, nextrecord !sync")); 1593 if (nextrecord == NULL) { 1594 KASSERT(so->so_rcv.sb_mb == m, 1595 ("soreceive: post-control, sb_mb!=m")); 1596 KASSERT(so->so_rcv.sb_lastrecord == m, 1597 ("soreceive: post-control, lastrecord!=m")); 1598 } 1599 } 1600 type = m->m_type; 1601 if (type == MT_OOBDATA) 1602 flags |= MSG_OOB; 1603 } else { 1604 if ((flags & MSG_PEEK) == 0) { 1605 KASSERT(so->so_rcv.sb_mb == nextrecord, 1606 ("soreceive: sb_mb != nextrecord")); 1607 if (so->so_rcv.sb_mb == NULL) { 1608 KASSERT(so->so_rcv.sb_lastrecord == NULL, 1609 ("soreceive: sb_lastercord != NULL")); 1610 } 1611 } 1612 } 1613 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1614 SBLASTRECORDCHK(&so->so_rcv); 1615 SBLASTMBUFCHK(&so->so_rcv); 1616 1617 /* 1618 * Now continue to read any data mbufs off of the head of the socket 1619 * buffer until the read request is satisfied. Note that 'type' is 1620 * used to store the type of any mbuf reads that have happened so far 1621 * such that soreceive() can stop reading if the type changes, which 1622 * causes soreceive() to return only one of regular data and inline 1623 * out-of-band data in a single socket receive operation. 1624 */ 1625 moff = 0; 1626 offset = 0; 1627 while (m != NULL && uio->uio_resid > 0 && error == 0) { 1628 /* 1629 * If the type of mbuf has changed since the last mbuf 1630 * examined ('type'), end the receive operation. 1631 */ 1632 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1633 if (m->m_type == MT_OOBDATA) { 1634 if (type != MT_OOBDATA) 1635 break; 1636 } else if (type == MT_OOBDATA) 1637 break; 1638 else 1639 KASSERT(m->m_type == MT_DATA, 1640 ("m->m_type == %d", m->m_type)); 1641 so->so_rcv.sb_state &= ~SBS_RCVATMARK; 1642 len = uio->uio_resid; 1643 if (so->so_oobmark && len > so->so_oobmark - offset) 1644 len = so->so_oobmark - offset; 1645 if (len > m->m_len - moff) 1646 len = m->m_len - moff; 1647 /* 1648 * If mp is set, just pass back the mbufs. Otherwise copy 1649 * them out via the uio, then free. Sockbuf must be 1650 * consistent here (points to current mbuf, it points to next 1651 * record) when we drop priority; we must note any additions 1652 * to the sockbuf when we block interrupts again. 1653 */ 1654 if (mp == NULL) { 1655 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1656 SBLASTRECORDCHK(&so->so_rcv); 1657 SBLASTMBUFCHK(&so->so_rcv); 1658 SOCKBUF_UNLOCK(&so->so_rcv); 1659#ifdef ZERO_COPY_SOCKETS 1660 if (so_zero_copy_receive) { 1661 int disposable; 1662 1663 if ((m->m_flags & M_EXT) 1664 && (m->m_ext.ext_type == EXT_DISPOSABLE)) 1665 disposable = 1; 1666 else 1667 disposable = 0; 1668 1669 error = uiomoveco(mtod(m, char *) + moff, 1670 (int)len, uio, 1671 disposable); 1672 } else 1673#endif /* ZERO_COPY_SOCKETS */ 1674 error = uiomove(mtod(m, char *) + moff, (int)len, uio); 1675 SOCKBUF_LOCK(&so->so_rcv); 1676 if (error) { 1677 /* 1678 * The MT_SONAME mbuf has already been removed 1679 * from the record, so it is necessary to 1680 * remove the data mbufs, if any, to preserve 1681 * the invariant in the case of PR_ADDR that 1682 * requires MT_SONAME mbufs at the head of 1683 * each record. 1684 */ 1685 if (m && pr->pr_flags & PR_ATOMIC && 1686 ((flags & MSG_PEEK) == 0)) 1687 (void)sbdroprecord_locked(&so->so_rcv); 1688 SOCKBUF_UNLOCK(&so->so_rcv); 1689 goto release; 1690 } 1691 } else 1692 uio->uio_resid -= len; 1693 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1694 if (len == m->m_len - moff) { 1695 if (m->m_flags & M_EOR) 1696 flags |= MSG_EOR; 1697 if (flags & MSG_PEEK) { 1698 m = m->m_next; 1699 moff = 0; 1700 } else { 1701 nextrecord = m->m_nextpkt; 1702 sbfree(&so->so_rcv, m); 1703 if (mp != NULL) { 1704 *mp = m; 1705 mp = &m->m_next; 1706 so->so_rcv.sb_mb = m = m->m_next; 1707 *mp = NULL; 1708 } else { 1709 so->so_rcv.sb_mb = m_free(m); 1710 m = so->so_rcv.sb_mb; 1711 } 1712 sockbuf_pushsync(&so->so_rcv, nextrecord); 1713 SBLASTRECORDCHK(&so->so_rcv); 1714 SBLASTMBUFCHK(&so->so_rcv); 1715 } 1716 } else { 1717 if (flags & MSG_PEEK) 1718 moff += len; 1719 else { 1720 if (mp != NULL) { 1721 int copy_flag; 1722 1723 if (flags & MSG_DONTWAIT) 1724 copy_flag = M_DONTWAIT; 1725 else 1726 copy_flag = M_WAIT; 1727 if (copy_flag == M_WAIT) 1728 SOCKBUF_UNLOCK(&so->so_rcv); 1729 *mp = m_copym(m, 0, len, copy_flag); 1730 if (copy_flag == M_WAIT) 1731 SOCKBUF_LOCK(&so->so_rcv); 1732 if (*mp == NULL) { 1733 /* 1734 * m_copym() couldn't 1735 * allocate an mbuf. Adjust 1736 * uio_resid back (it was 1737 * adjusted down by len 1738 * bytes, which we didn't end 1739 * up "copying" over). 1740 */ 1741 uio->uio_resid += len; 1742 break; 1743 } 1744 } 1745 m->m_data += len; 1746 m->m_len -= len; 1747 so->so_rcv.sb_cc -= len; 1748 } 1749 } 1750 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1751 if (so->so_oobmark) { 1752 if ((flags & MSG_PEEK) == 0) { 1753 so->so_oobmark -= len; 1754 if (so->so_oobmark == 0) { 1755 so->so_rcv.sb_state |= SBS_RCVATMARK; 1756 break; 1757 } 1758 } else { 1759 offset += len; 1760 if (offset == so->so_oobmark) 1761 break; 1762 } 1763 } 1764 if (flags & MSG_EOR) 1765 break; 1766 /* 1767 * If the MSG_WAITALL flag is set (for non-atomic socket), we 1768 * must not quit until "uio->uio_resid == 0" or an error 1769 * termination. If a signal/timeout occurs, return with a 1770 * short count but without error. Keep sockbuf locked 1771 * against other readers. 1772 */ 1773 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 1774 !sosendallatonce(so) && nextrecord == NULL) { 1775 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1776 if (so->so_error || so->so_rcv.sb_state & SBS_CANTRCVMORE) 1777 break; 1778 /* 1779 * Notify the protocol that some data has been 1780 * drained before blocking. 1781 */ 1782 if (pr->pr_flags & PR_WANTRCVD) { 1783 SOCKBUF_UNLOCK(&so->so_rcv); 1784 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 1785 SOCKBUF_LOCK(&so->so_rcv); 1786 } 1787 SBLASTRECORDCHK(&so->so_rcv); 1788 SBLASTMBUFCHK(&so->so_rcv); 1789 error = sbwait(&so->so_rcv); 1790 if (error) { 1791 SOCKBUF_UNLOCK(&so->so_rcv); 1792 goto release; 1793 } 1794 m = so->so_rcv.sb_mb; 1795 if (m != NULL) 1796 nextrecord = m->m_nextpkt; 1797 } 1798 } 1799 1800 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1801 if (m != NULL && pr->pr_flags & PR_ATOMIC) { 1802 flags |= MSG_TRUNC; 1803 if ((flags & MSG_PEEK) == 0) 1804 (void) sbdroprecord_locked(&so->so_rcv); 1805 } 1806 if ((flags & MSG_PEEK) == 0) { 1807 if (m == NULL) { 1808 /* 1809 * First part is an inline SB_EMPTY_FIXUP(). Second 1810 * part makes sure sb_lastrecord is up-to-date if 1811 * there is still data in the socket buffer. 1812 */ 1813 so->so_rcv.sb_mb = nextrecord; 1814 if (so->so_rcv.sb_mb == NULL) { 1815 so->so_rcv.sb_mbtail = NULL; 1816 so->so_rcv.sb_lastrecord = NULL; 1817 } else if (nextrecord->m_nextpkt == NULL) 1818 so->so_rcv.sb_lastrecord = nextrecord; 1819 } 1820 SBLASTRECORDCHK(&so->so_rcv); 1821 SBLASTMBUFCHK(&so->so_rcv); 1822 /* 1823 * If soreceive() is being done from the socket callback, 1824 * then don't need to generate ACK to peer to update window, 1825 * since ACK will be generated on return to TCP. 1826 */ 1827 if (!(flags & MSG_SOCALLBCK) && 1828 (pr->pr_flags & PR_WANTRCVD)) { 1829 SOCKBUF_UNLOCK(&so->so_rcv); 1830 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 1831 SOCKBUF_LOCK(&so->so_rcv); 1832 } 1833 } 1834 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1835 if (orig_resid == uio->uio_resid && orig_resid && 1836 (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) { 1837 SOCKBUF_UNLOCK(&so->so_rcv); 1838 goto restart; 1839 } 1840 SOCKBUF_UNLOCK(&so->so_rcv); 1841 1842 if (flagsp != NULL) 1843 *flagsp |= flags; 1844release: 1845 sbunlock(&so->so_rcv); 1846 return (error); 1847} 1848 1849/* 1850 * Optimized version of soreceive() for simple datagram cases from userspace. 1851 * Unlike in the stream case, we're able to drop a datagram if copyout() 1852 * fails, and because we handle datagrams atomically, we don't need to use a 1853 * sleep lock to prevent I/O interlacing. 1854 */ 1855int 1856soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, 1857 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 1858{ 1859 struct mbuf *m, *m2; 1860 int flags, len, error, offset; 1861 struct protosw *pr = so->so_proto; 1862 struct mbuf *nextrecord; 1863 1864 if (psa != NULL) 1865 *psa = NULL; 1866 if (controlp != NULL) 1867 *controlp = NULL; 1868 if (flagsp != NULL) 1869 flags = *flagsp &~ MSG_EOR; 1870 else 1871 flags = 0; 1872 1873 /* 1874 * For any complicated cases, fall back to the full 1875 * soreceive_generic(). 1876 */ 1877 if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB)) 1878 return (soreceive_generic(so, psa, uio, mp0, controlp, 1879 flagsp)); 1880 1881 /* 1882 * Enforce restrictions on use. 1883 */ 1884 KASSERT((pr->pr_flags & PR_WANTRCVD) == 0, 1885 ("soreceive_dgram: wantrcvd")); 1886 KASSERT(pr->pr_flags & PR_ATOMIC, ("soreceive_dgram: !atomic")); 1887 KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0, 1888 ("soreceive_dgram: SBS_RCVATMARK")); 1889 KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0, 1890 ("soreceive_dgram: P_CONNREQUIRED")); 1891 1892 /* 1893 * Loop blocking while waiting for a datagram. 1894 */ 1895 SOCKBUF_LOCK(&so->so_rcv); 1896 while ((m = so->so_rcv.sb_mb) == NULL) { 1897 KASSERT(so->so_rcv.sb_cc == 0, 1898 ("soreceive_dgram: sb_mb NULL but sb_cc %u", 1899 so->so_rcv.sb_cc)); 1900 if (so->so_error) { 1901 error = so->so_error; 1902 so->so_error = 0; 1903 SOCKBUF_UNLOCK(&so->so_rcv); 1904 return (error); 1905 } 1906 if (so->so_rcv.sb_state & SBS_CANTRCVMORE || 1907 uio->uio_resid == 0) { 1908 SOCKBUF_UNLOCK(&so->so_rcv); 1909 return (0); 1910 } 1911 if ((so->so_state & SS_NBIO) || 1912 (flags & (MSG_DONTWAIT|MSG_NBIO))) { 1913 SOCKBUF_UNLOCK(&so->so_rcv); 1914 return (EWOULDBLOCK); 1915 } 1916 SBLASTRECORDCHK(&so->so_rcv); 1917 SBLASTMBUFCHK(&so->so_rcv); 1918 error = sbwait(&so->so_rcv); 1919 if (error) { 1920 SOCKBUF_UNLOCK(&so->so_rcv); 1921 return (error); 1922 } 1923 } 1924 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1925 1926 if (uio->uio_td) 1927 uio->uio_td->td_ru.ru_msgrcv++; 1928 SBLASTRECORDCHK(&so->so_rcv); 1929 SBLASTMBUFCHK(&so->so_rcv); 1930 nextrecord = m->m_nextpkt; 1931 if (nextrecord == NULL) { 1932 KASSERT(so->so_rcv.sb_lastrecord == m, 1933 ("soreceive_dgram: lastrecord != m")); 1934 } 1935 1936 KASSERT(so->so_rcv.sb_mb->m_nextpkt == nextrecord, 1937 ("soreceive_dgram: m_nextpkt != nextrecord")); 1938 1939 /* 1940 * Pull 'm' and its chain off the front of the packet queue. 1941 */ 1942 so->so_rcv.sb_mb = NULL; 1943 sockbuf_pushsync(&so->so_rcv, nextrecord); 1944 1945 /* 1946 * Walk 'm's chain and free that many bytes from the socket buffer. 1947 */ 1948 for (m2 = m; m2 != NULL; m2 = m2->m_next) 1949 sbfree(&so->so_rcv, m2); 1950 1951 /* 1952 * Do a few last checks before we let go of the lock. 1953 */ 1954 SBLASTRECORDCHK(&so->so_rcv); 1955 SBLASTMBUFCHK(&so->so_rcv); 1956 SOCKBUF_UNLOCK(&so->so_rcv); 1957 1958 if (pr->pr_flags & PR_ADDR) { 1959 KASSERT(m->m_type == MT_SONAME, 1960 ("m->m_type == %d", m->m_type)); 1961 if (psa != NULL) 1962 *psa = sodupsockaddr(mtod(m, struct sockaddr *), 1963 M_NOWAIT); 1964 m = m_free(m); 1965 } 1966 if (m == NULL) { 1967 /* XXXRW: Can this happen? */ 1968 return (0); 1969 } 1970 1971 /* 1972 * Packet to copyout() is now in 'm' and it is disconnected from the 1973 * queue. 1974 * 1975 * Process one or more MT_CONTROL mbufs present before any data mbufs 1976 * in the first mbuf chain on the socket buffer. We call into the 1977 * protocol to perform externalization (or freeing if controlp == 1978 * NULL). 1979 */ 1980 if (m->m_type == MT_CONTROL) { 1981 struct mbuf *cm = NULL, *cmn; 1982 struct mbuf **cme = &cm; 1983 1984 do { 1985 m2 = m->m_next; 1986 m->m_next = NULL; 1987 *cme = m; 1988 cme = &(*cme)->m_next; 1989 m = m2; 1990 } while (m != NULL && m->m_type == MT_CONTROL); 1991 while (cm != NULL) { 1992 cmn = cm->m_next; 1993 cm->m_next = NULL; 1994 if (pr->pr_domain->dom_externalize != NULL) { 1995 error = (*pr->pr_domain->dom_externalize) 1996 (cm, controlp); 1997 } else if (controlp != NULL) 1998 *controlp = cm; 1999 else 2000 m_freem(cm); 2001 if (controlp != NULL) { 2002 while (*controlp != NULL) 2003 controlp = &(*controlp)->m_next; 2004 } 2005 cm = cmn; 2006 } 2007 } 2008 KASSERT(m->m_type == MT_DATA, ("soreceive_dgram: !data")); 2009 2010 offset = 0; 2011 while (m != NULL && uio->uio_resid > 0) { 2012 len = uio->uio_resid; 2013 if (len > m->m_len) 2014 len = m->m_len; 2015 error = uiomove(mtod(m, char *), (int)len, uio); 2016 if (error) { 2017 m_freem(m); 2018 return (error); 2019 } 2020 m = m_free(m); 2021 } 2022 if (m != NULL) 2023 flags |= MSG_TRUNC; 2024 m_freem(m); 2025 if (flagsp != NULL) 2026 *flagsp |= flags; 2027 return (0); 2028} 2029 2030int 2031soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, 2032 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 2033{ 2034 2035 return (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0, 2036 controlp, flagsp)); 2037} 2038 2039int 2040soshutdown(struct socket *so, int how) 2041{ 2042 struct protosw *pr = so->so_proto; 2043 2044 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 2045 return (EINVAL); 2046 if (pr->pr_usrreqs->pru_flush != NULL) { 2047 (*pr->pr_usrreqs->pru_flush)(so, how); 2048 } 2049 if (how != SHUT_WR) 2050 sorflush(so); 2051 if (how != SHUT_RD) 2052 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 2053 return (0); 2054} 2055 2056void 2057sorflush(struct socket *so) 2058{ 2059 struct sockbuf *sb = &so->so_rcv; 2060 struct protosw *pr = so->so_proto; 2061 struct sockbuf asb; 2062 2063 /* 2064 * In order to avoid calling dom_dispose with the socket buffer mutex 2065 * held, and in order to generally avoid holding the lock for a long 2066 * time, we make a copy of the socket buffer and clear the original 2067 * (except locks, state). The new socket buffer copy won't have 2068 * initialized locks so we can only call routines that won't use or 2069 * assert those locks. 2070 * 2071 * Dislodge threads currently blocked in receive and wait to acquire 2072 * a lock against other simultaneous readers before clearing the 2073 * socket buffer. Don't let our acquire be interrupted by a signal 2074 * despite any existing socket disposition on interruptable waiting. 2075 */ 2076 socantrcvmore(so); 2077 (void) sblock(sb, SBL_WAIT | SBL_NOINTR); 2078 2079 /* 2080 * Invalidate/clear most of the sockbuf structure, but leave selinfo 2081 * and mutex data unchanged. 2082 */ 2083 SOCKBUF_LOCK(sb); 2084 bzero(&asb, offsetof(struct sockbuf, sb_startzero)); 2085 bcopy(&sb->sb_startzero, &asb.sb_startzero, 2086 sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); 2087 bzero(&sb->sb_startzero, 2088 sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); 2089 SOCKBUF_UNLOCK(sb); 2090 sbunlock(sb); 2091 2092 /* 2093 * Dispose of special rights and flush the socket buffer. Don't call 2094 * any unsafe routines (that rely on locks being initialized) on asb. 2095 */ 2096 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) 2097 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 2098 sbrelease_internal(&asb, so); 2099} 2100 2101/* 2102 * Perhaps this routine, and sooptcopyout(), below, ought to come in an 2103 * additional variant to handle the case where the option value needs to be 2104 * some kind of integer, but not a specific size. In addition to their use 2105 * here, these functions are also called by the protocol-level pr_ctloutput() 2106 * routines. 2107 */ 2108int 2109sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) 2110{ 2111 size_t valsize; 2112 2113 /* 2114 * If the user gives us more than we wanted, we ignore it, but if we 2115 * don't get the minimum length the caller wants, we return EINVAL. 2116 * On success, sopt->sopt_valsize is set to however much we actually 2117 * retrieved. 2118 */ 2119 if ((valsize = sopt->sopt_valsize) < minlen) 2120 return EINVAL; 2121 if (valsize > len) 2122 sopt->sopt_valsize = valsize = len; 2123 2124 if (sopt->sopt_td != NULL) 2125 return (copyin(sopt->sopt_val, buf, valsize)); 2126 2127 bcopy(sopt->sopt_val, buf, valsize); 2128 return (0); 2129} 2130 2131/* 2132 * Kernel version of setsockopt(2). 2133 * 2134 * XXX: optlen is size_t, not socklen_t 2135 */ 2136int 2137so_setsockopt(struct socket *so, int level, int optname, void *optval, 2138 size_t optlen) 2139{ 2140 struct sockopt sopt; 2141 2142 sopt.sopt_level = level; 2143 sopt.sopt_name = optname; 2144 sopt.sopt_dir = SOPT_SET; 2145 sopt.sopt_val = optval; 2146 sopt.sopt_valsize = optlen; 2147 sopt.sopt_td = NULL; 2148 return (sosetopt(so, &sopt)); 2149} 2150 2151int 2152sosetopt(struct socket *so, struct sockopt *sopt) 2153{ 2154 int error, optval; 2155 struct linger l; 2156 struct timeval tv; 2157 u_long val; 2158#ifdef MAC 2159 struct mac extmac; 2160#endif 2161 2162 error = 0; 2163 if (sopt->sopt_level != SOL_SOCKET) { 2164 if (so->so_proto && so->so_proto->pr_ctloutput) 2165 return ((*so->so_proto->pr_ctloutput) 2166 (so, sopt)); 2167 error = ENOPROTOOPT; 2168 } else { 2169 switch (sopt->sopt_name) { 2170#ifdef INET 2171 case SO_ACCEPTFILTER: 2172 error = do_setopt_accept_filter(so, sopt); 2173 if (error) 2174 goto bad; 2175 break; 2176#endif 2177 case SO_LINGER: 2178 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 2179 if (error) 2180 goto bad; 2181 2182 SOCK_LOCK(so); 2183 so->so_linger = l.l_linger; 2184 if (l.l_onoff) 2185 so->so_options |= SO_LINGER; 2186 else 2187 so->so_options &= ~SO_LINGER; 2188 SOCK_UNLOCK(so); 2189 break; 2190 2191 case SO_DEBUG: 2192 case SO_KEEPALIVE: 2193 case SO_DONTROUTE: 2194 case SO_USELOOPBACK: 2195 case SO_BROADCAST: 2196 case SO_REUSEADDR: 2197 case SO_REUSEPORT: 2198 case SO_OOBINLINE: 2199 case SO_TIMESTAMP: 2200 case SO_BINTIME: 2201 case SO_NOSIGPIPE: 2202 case SO_NO_DDP: 2203 case SO_NO_OFFLOAD: 2204 error = sooptcopyin(sopt, &optval, sizeof optval, 2205 sizeof optval); 2206 if (error) 2207 goto bad; 2208 SOCK_LOCK(so); 2209 if (optval) 2210 so->so_options |= sopt->sopt_name; 2211 else 2212 so->so_options &= ~sopt->sopt_name; 2213 SOCK_UNLOCK(so); 2214 break; 2215 2216 case SO_SETFIB: 2217 error = sooptcopyin(sopt, &optval, sizeof optval, 2218 sizeof optval); 2219 if (optval < 1 || optval > rt_numfibs) { 2220 error = EINVAL; 2221 goto bad; 2222 } 2223 if ((so->so_proto->pr_domain->dom_family == PF_INET) || 2224 (so->so_proto->pr_domain->dom_family == PF_ROUTE)) { 2225 so->so_fibnum = optval; 2226 /* Note: ignore error */ 2227 if (so->so_proto && so->so_proto->pr_ctloutput) 2228 (*so->so_proto->pr_ctloutput)(so, sopt); 2229 } else { 2230 so->so_fibnum = 0; 2231 } 2232 break; 2233 case SO_SNDBUF: 2234 case SO_RCVBUF: 2235 case SO_SNDLOWAT: 2236 case SO_RCVLOWAT: 2237 error = sooptcopyin(sopt, &optval, sizeof optval, 2238 sizeof optval); 2239 if (error) 2240 goto bad; 2241 2242 /* 2243 * Values < 1 make no sense for any of these options, 2244 * so disallow them. 2245 */ 2246 if (optval < 1) { 2247 error = EINVAL; 2248 goto bad; 2249 } 2250 2251 switch (sopt->sopt_name) { 2252 case SO_SNDBUF: 2253 case SO_RCVBUF: 2254 if (sbreserve(sopt->sopt_name == SO_SNDBUF ? 2255 &so->so_snd : &so->so_rcv, (u_long)optval, 2256 so, curthread) == 0) { 2257 error = ENOBUFS; 2258 goto bad; 2259 } 2260 (sopt->sopt_name == SO_SNDBUF ? &so->so_snd : 2261 &so->so_rcv)->sb_flags &= ~SB_AUTOSIZE; 2262 break; 2263 2264 /* 2265 * Make sure the low-water is never greater than the 2266 * high-water. 2267 */ 2268 case SO_SNDLOWAT: 2269 SOCKBUF_LOCK(&so->so_snd); 2270 so->so_snd.sb_lowat = 2271 (optval > so->so_snd.sb_hiwat) ? 2272 so->so_snd.sb_hiwat : optval; 2273 SOCKBUF_UNLOCK(&so->so_snd); 2274 break; 2275 case SO_RCVLOWAT: 2276 SOCKBUF_LOCK(&so->so_rcv); 2277 so->so_rcv.sb_lowat = 2278 (optval > so->so_rcv.sb_hiwat) ? 2279 so->so_rcv.sb_hiwat : optval; 2280 SOCKBUF_UNLOCK(&so->so_rcv); 2281 break; 2282 } 2283 break; 2284 2285 case SO_SNDTIMEO: 2286 case SO_RCVTIMEO: 2287#ifdef COMPAT_IA32 2288 if (SV_CURPROC_FLAG(SV_ILP32)) { 2289 struct timeval32 tv32; 2290 2291 error = sooptcopyin(sopt, &tv32, sizeof tv32, 2292 sizeof tv32); 2293 CP(tv32, tv, tv_sec); 2294 CP(tv32, tv, tv_usec); 2295 } else 2296#endif 2297 error = sooptcopyin(sopt, &tv, sizeof tv, 2298 sizeof tv); 2299 if (error) 2300 goto bad; 2301 2302 /* assert(hz > 0); */ 2303 if (tv.tv_sec < 0 || tv.tv_sec > INT_MAX / hz || 2304 tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 2305 error = EDOM; 2306 goto bad; 2307 } 2308 /* assert(tick > 0); */ 2309 /* assert(ULONG_MAX - INT_MAX >= 1000000); */ 2310 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick; 2311 if (val > INT_MAX) { 2312 error = EDOM; 2313 goto bad; 2314 } 2315 if (val == 0 && tv.tv_usec != 0) 2316 val = 1; 2317 2318 switch (sopt->sopt_name) { 2319 case SO_SNDTIMEO: 2320 so->so_snd.sb_timeo = val; 2321 break; 2322 case SO_RCVTIMEO: 2323 so->so_rcv.sb_timeo = val; 2324 break; 2325 } 2326 break; 2327 2328 case SO_LABEL: 2329#ifdef MAC 2330 error = sooptcopyin(sopt, &extmac, sizeof extmac, 2331 sizeof extmac); 2332 if (error) 2333 goto bad; 2334 error = mac_setsockopt_label(sopt->sopt_td->td_ucred, 2335 so, &extmac); 2336#else 2337 error = EOPNOTSUPP; 2338#endif 2339 break; 2340 2341 default: 2342 error = ENOPROTOOPT; 2343 break; 2344 } 2345 if (error == 0 && so->so_proto != NULL && 2346 so->so_proto->pr_ctloutput != NULL) { 2347 (void) ((*so->so_proto->pr_ctloutput) 2348 (so, sopt)); 2349 } 2350 } 2351bad: 2352 return (error); 2353} 2354 2355/* 2356 * Helper routine for getsockopt. 2357 */ 2358int 2359sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) 2360{ 2361 int error; 2362 size_t valsize; 2363 2364 error = 0; 2365 2366 /* 2367 * Documented get behavior is that we always return a value, possibly 2368 * truncated to fit in the user's buffer. Traditional behavior is 2369 * that we always tell the user precisely how much we copied, rather 2370 * than something useful like the total amount we had available for 2371 * her. Note that this interface is not idempotent; the entire 2372 * answer must generated ahead of time. 2373 */ 2374 valsize = min(len, sopt->sopt_valsize); 2375 sopt->sopt_valsize = valsize; 2376 if (sopt->sopt_val != NULL) { 2377 if (sopt->sopt_td != NULL) 2378 error = copyout(buf, sopt->sopt_val, valsize); 2379 else 2380 bcopy(buf, sopt->sopt_val, valsize); 2381 } 2382 return (error); 2383} 2384 2385int 2386sogetopt(struct socket *so, struct sockopt *sopt) 2387{ 2388 int error, optval; 2389 struct linger l; 2390 struct timeval tv; 2391#ifdef MAC 2392 struct mac extmac; 2393#endif 2394 2395 error = 0; 2396 if (sopt->sopt_level != SOL_SOCKET) { 2397 if (so->so_proto && so->so_proto->pr_ctloutput) { 2398 return ((*so->so_proto->pr_ctloutput) 2399 (so, sopt)); 2400 } else 2401 return (ENOPROTOOPT); 2402 } else { 2403 switch (sopt->sopt_name) { 2404#ifdef INET 2405 case SO_ACCEPTFILTER: 2406 error = do_getopt_accept_filter(so, sopt); 2407 break; 2408#endif 2409 case SO_LINGER: 2410 SOCK_LOCK(so); 2411 l.l_onoff = so->so_options & SO_LINGER; 2412 l.l_linger = so->so_linger; 2413 SOCK_UNLOCK(so); 2414 error = sooptcopyout(sopt, &l, sizeof l); 2415 break; 2416 2417 case SO_USELOOPBACK: 2418 case SO_DONTROUTE: 2419 case SO_DEBUG: 2420 case SO_KEEPALIVE: 2421 case SO_REUSEADDR: 2422 case SO_REUSEPORT: 2423 case SO_BROADCAST: 2424 case SO_OOBINLINE: 2425 case SO_ACCEPTCONN: 2426 case SO_TIMESTAMP: 2427 case SO_BINTIME: 2428 case SO_NOSIGPIPE: 2429 optval = so->so_options & sopt->sopt_name; 2430integer: 2431 error = sooptcopyout(sopt, &optval, sizeof optval); 2432 break; 2433 2434 case SO_TYPE: 2435 optval = so->so_type; 2436 goto integer; 2437 2438 case SO_ERROR: 2439 SOCK_LOCK(so); 2440 optval = so->so_error; 2441 so->so_error = 0; 2442 SOCK_UNLOCK(so); 2443 goto integer; 2444 2445 case SO_SNDBUF: 2446 optval = so->so_snd.sb_hiwat; 2447 goto integer; 2448 2449 case SO_RCVBUF: 2450 optval = so->so_rcv.sb_hiwat; 2451 goto integer; 2452 2453 case SO_SNDLOWAT: 2454 optval = so->so_snd.sb_lowat; 2455 goto integer; 2456 2457 case SO_RCVLOWAT: 2458 optval = so->so_rcv.sb_lowat; 2459 goto integer; 2460 2461 case SO_SNDTIMEO: 2462 case SO_RCVTIMEO: 2463 optval = (sopt->sopt_name == SO_SNDTIMEO ? 2464 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 2465 2466 tv.tv_sec = optval / hz; 2467 tv.tv_usec = (optval % hz) * tick; 2468#ifdef COMPAT_IA32 2469 if (SV_CURPROC_FLAG(SV_ILP32)) { 2470 struct timeval32 tv32; 2471 2472 CP(tv, tv32, tv_sec); 2473 CP(tv, tv32, tv_usec); 2474 error = sooptcopyout(sopt, &tv32, sizeof tv32); 2475 } else 2476#endif 2477 error = sooptcopyout(sopt, &tv, sizeof tv); 2478 break; 2479 2480 case SO_LABEL: 2481#ifdef MAC 2482 error = sooptcopyin(sopt, &extmac, sizeof(extmac), 2483 sizeof(extmac)); 2484 if (error) 2485 return (error); 2486 error = mac_getsockopt_label(sopt->sopt_td->td_ucred, 2487 so, &extmac); 2488 if (error) 2489 return (error); 2490 error = sooptcopyout(sopt, &extmac, sizeof extmac); 2491#else 2492 error = EOPNOTSUPP; 2493#endif 2494 break; 2495 2496 case SO_PEERLABEL: 2497#ifdef MAC 2498 error = sooptcopyin(sopt, &extmac, sizeof(extmac), 2499 sizeof(extmac)); 2500 if (error) 2501 return (error); 2502 error = mac_getsockopt_peerlabel( 2503 sopt->sopt_td->td_ucred, so, &extmac); 2504 if (error) 2505 return (error); 2506 error = sooptcopyout(sopt, &extmac, sizeof extmac); 2507#else 2508 error = EOPNOTSUPP; 2509#endif 2510 break; 2511 2512 case SO_LISTENQLIMIT: 2513 optval = so->so_qlimit; 2514 goto integer; 2515 2516 case SO_LISTENQLEN: 2517 optval = so->so_qlen; 2518 goto integer; 2519 2520 case SO_LISTENINCQLEN: 2521 optval = so->so_incqlen; 2522 goto integer; 2523 2524 default: 2525 error = ENOPROTOOPT; 2526 break; 2527 } 2528 return (error); 2529 } 2530} 2531 2532/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 2533int 2534soopt_getm(struct sockopt *sopt, struct mbuf **mp) 2535{ 2536 struct mbuf *m, *m_prev; 2537 int sopt_size = sopt->sopt_valsize; 2538 2539 MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA); 2540 if (m == NULL) 2541 return ENOBUFS; 2542 if (sopt_size > MLEN) { 2543 MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT); 2544 if ((m->m_flags & M_EXT) == 0) { 2545 m_free(m); 2546 return ENOBUFS; 2547 } 2548 m->m_len = min(MCLBYTES, sopt_size); 2549 } else { 2550 m->m_len = min(MLEN, sopt_size); 2551 } 2552 sopt_size -= m->m_len; 2553 *mp = m; 2554 m_prev = m; 2555 2556 while (sopt_size) { 2557 MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA); 2558 if (m == NULL) { 2559 m_freem(*mp); 2560 return ENOBUFS; 2561 } 2562 if (sopt_size > MLEN) { 2563 MCLGET(m, sopt->sopt_td != NULL ? M_WAIT : 2564 M_DONTWAIT); 2565 if ((m->m_flags & M_EXT) == 0) { 2566 m_freem(m); 2567 m_freem(*mp); 2568 return ENOBUFS; 2569 } 2570 m->m_len = min(MCLBYTES, sopt_size); 2571 } else { 2572 m->m_len = min(MLEN, sopt_size); 2573 } 2574 sopt_size -= m->m_len; 2575 m_prev->m_next = m; 2576 m_prev = m; 2577 } 2578 return (0); 2579} 2580 2581/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 2582int 2583soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) 2584{ 2585 struct mbuf *m0 = m; 2586 2587 if (sopt->sopt_val == NULL) 2588 return (0); 2589 while (m != NULL && sopt->sopt_valsize >= m->m_len) { 2590 if (sopt->sopt_td != NULL) { 2591 int error; 2592 2593 error = copyin(sopt->sopt_val, mtod(m, char *), 2594 m->m_len); 2595 if (error != 0) { 2596 m_freem(m0); 2597 return(error); 2598 } 2599 } else 2600 bcopy(sopt->sopt_val, mtod(m, char *), m->m_len); 2601 sopt->sopt_valsize -= m->m_len; 2602 sopt->sopt_val = (char *)sopt->sopt_val + m->m_len; 2603 m = m->m_next; 2604 } 2605 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ 2606 panic("ip6_sooptmcopyin"); 2607 return (0); 2608} 2609 2610/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 2611int 2612soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) 2613{ 2614 struct mbuf *m0 = m; 2615 size_t valsize = 0; 2616 2617 if (sopt->sopt_val == NULL) 2618 return (0); 2619 while (m != NULL && sopt->sopt_valsize >= m->m_len) { 2620 if (sopt->sopt_td != NULL) { 2621 int error; 2622 2623 error = copyout(mtod(m, char *), sopt->sopt_val, 2624 m->m_len); 2625 if (error != 0) { 2626 m_freem(m0); 2627 return(error); 2628 } 2629 } else 2630 bcopy(mtod(m, char *), sopt->sopt_val, m->m_len); 2631 sopt->sopt_valsize -= m->m_len; 2632 sopt->sopt_val = (char *)sopt->sopt_val + m->m_len; 2633 valsize += m->m_len; 2634 m = m->m_next; 2635 } 2636 if (m != NULL) { 2637 /* enough soopt buffer should be given from user-land */ 2638 m_freem(m0); 2639 return(EINVAL); 2640 } 2641 sopt->sopt_valsize = valsize; 2642 return (0); 2643} 2644 2645/* 2646 * sohasoutofband(): protocol notifies socket layer of the arrival of new 2647 * out-of-band data, which will then notify socket consumers. 2648 */ 2649void 2650sohasoutofband(struct socket *so) 2651{ 2652 2653 if (so->so_sigio != NULL) 2654 pgsigio(&so->so_sigio, SIGURG, 0); 2655 selwakeuppri(&so->so_rcv.sb_sel, PSOCK); 2656} 2657 2658int 2659sopoll(struct socket *so, int events, struct ucred *active_cred, 2660 struct thread *td) 2661{ 2662 2663 return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred, 2664 td)); 2665} 2666 2667int 2668sopoll_generic(struct socket *so, int events, struct ucred *active_cred, 2669 struct thread *td) 2670{ 2671 int revents = 0; 2672 2673 SOCKBUF_LOCK(&so->so_snd); 2674 SOCKBUF_LOCK(&so->so_rcv); 2675 if (events & (POLLIN | POLLRDNORM)) 2676 if (soreadable(so)) 2677 revents |= events & (POLLIN | POLLRDNORM); 2678 2679 if (events & POLLINIGNEOF) 2680 if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat || 2681 !TAILQ_EMPTY(&so->so_comp) || so->so_error) 2682 revents |= POLLINIGNEOF; 2683 2684 if (events & (POLLOUT | POLLWRNORM)) 2685 if (sowriteable(so)) 2686 revents |= events & (POLLOUT | POLLWRNORM); 2687 2688 if (events & (POLLPRI | POLLRDBAND)) 2689 if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK)) 2690 revents |= events & (POLLPRI | POLLRDBAND); 2691 2692 if (revents == 0) { 2693 if (events & 2694 (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | 2695 POLLRDBAND)) { 2696 selrecord(td, &so->so_rcv.sb_sel); 2697 so->so_rcv.sb_flags |= SB_SEL; 2698 } 2699 2700 if (events & (POLLOUT | POLLWRNORM)) { 2701 selrecord(td, &so->so_snd.sb_sel); 2702 so->so_snd.sb_flags |= SB_SEL; 2703 } 2704 } 2705 2706 SOCKBUF_UNLOCK(&so->so_rcv); 2707 SOCKBUF_UNLOCK(&so->so_snd); 2708 return (revents); 2709} 2710 2711int 2712soo_kqfilter(struct file *fp, struct knote *kn) 2713{ 2714 struct socket *so = kn->kn_fp->f_data; 2715 struct sockbuf *sb; 2716 2717 switch (kn->kn_filter) { 2718 case EVFILT_READ: 2719 if (so->so_options & SO_ACCEPTCONN) 2720 kn->kn_fop = &solisten_filtops; 2721 else 2722 kn->kn_fop = &soread_filtops; 2723 sb = &so->so_rcv; 2724 break; 2725 case EVFILT_WRITE: 2726 kn->kn_fop = &sowrite_filtops; 2727 sb = &so->so_snd; 2728 break; 2729 default: 2730 return (EINVAL); 2731 } 2732 2733 SOCKBUF_LOCK(sb); 2734 knlist_add(&sb->sb_sel.si_note, kn, 1); 2735 sb->sb_flags |= SB_KNOTE; 2736 SOCKBUF_UNLOCK(sb); 2737 return (0); 2738} 2739 2740/* 2741 * Some routines that return EOPNOTSUPP for entry points that are not 2742 * supported by a protocol. Fill in as needed. 2743 */ 2744int 2745pru_accept_notsupp(struct socket *so, struct sockaddr **nam) 2746{ 2747 2748 return EOPNOTSUPP; 2749} 2750 2751int 2752pru_attach_notsupp(struct socket *so, int proto, struct thread *td) 2753{ 2754 2755 return EOPNOTSUPP; 2756} 2757 2758int 2759pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) 2760{ 2761 2762 return EOPNOTSUPP; 2763} 2764 2765int 2766pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) 2767{ 2768 2769 return EOPNOTSUPP; 2770} 2771 2772int 2773pru_connect2_notsupp(struct socket *so1, struct socket *so2) 2774{ 2775 2776 return EOPNOTSUPP; 2777} 2778 2779int 2780pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, 2781 struct ifnet *ifp, struct thread *td) 2782{ 2783 2784 return EOPNOTSUPP; 2785} 2786 2787int 2788pru_disconnect_notsupp(struct socket *so) 2789{ 2790 2791 return EOPNOTSUPP; 2792} 2793 2794int 2795pru_listen_notsupp(struct socket *so, int backlog, struct thread *td) 2796{ 2797 2798 return EOPNOTSUPP; 2799} 2800 2801int 2802pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam) 2803{ 2804 2805 return EOPNOTSUPP; 2806} 2807 2808int 2809pru_rcvd_notsupp(struct socket *so, int flags) 2810{ 2811 2812 return EOPNOTSUPP; 2813} 2814 2815int 2816pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags) 2817{ 2818 2819 return EOPNOTSUPP; 2820} 2821 2822int 2823pru_send_notsupp(struct socket *so, int flags, struct mbuf *m, 2824 struct sockaddr *addr, struct mbuf *control, struct thread *td) 2825{ 2826 2827 return EOPNOTSUPP; 2828} 2829 2830/* 2831 * This isn't really a ``null'' operation, but it's the default one and 2832 * doesn't do anything destructive. 2833 */ 2834int 2835pru_sense_null(struct socket *so, struct stat *sb) 2836{ 2837 2838 sb->st_blksize = so->so_snd.sb_hiwat; 2839 return 0; 2840} 2841 2842int 2843pru_shutdown_notsupp(struct socket *so) 2844{ 2845 2846 return EOPNOTSUPP; 2847} 2848 2849int 2850pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam) 2851{ 2852 2853 return EOPNOTSUPP; 2854} 2855 2856int 2857pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio, 2858 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 2859{ 2860 2861 return EOPNOTSUPP; 2862} 2863 2864int 2865pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, 2866 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 2867{ 2868 2869 return EOPNOTSUPP; 2870} 2871 2872int 2873pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred, 2874 struct thread *td) 2875{ 2876 2877 return EOPNOTSUPP; 2878} 2879 2880static void 2881filt_sordetach(struct knote *kn) 2882{ 2883 struct socket *so = kn->kn_fp->f_data; 2884 2885 SOCKBUF_LOCK(&so->so_rcv); 2886 knlist_remove(&so->so_rcv.sb_sel.si_note, kn, 1); 2887 if (knlist_empty(&so->so_rcv.sb_sel.si_note)) 2888 so->so_rcv.sb_flags &= ~SB_KNOTE; 2889 SOCKBUF_UNLOCK(&so->so_rcv); 2890} 2891 2892/*ARGSUSED*/ 2893static int 2894filt_soread(struct knote *kn, long hint) 2895{ 2896 struct socket *so; 2897 2898 so = kn->kn_fp->f_data; 2899 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 2900 2901 kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl; 2902 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { 2903 kn->kn_flags |= EV_EOF; 2904 kn->kn_fflags = so->so_error; 2905 return (1); 2906 } else if (so->so_error) /* temporary udp error */ 2907 return (1); 2908 else if (kn->kn_sfflags & NOTE_LOWAT) 2909 return (kn->kn_data >= kn->kn_sdata); 2910 else 2911 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat); 2912} 2913 2914static void 2915filt_sowdetach(struct knote *kn) 2916{ 2917 struct socket *so = kn->kn_fp->f_data; 2918 2919 SOCKBUF_LOCK(&so->so_snd); 2920 knlist_remove(&so->so_snd.sb_sel.si_note, kn, 1); 2921 if (knlist_empty(&so->so_snd.sb_sel.si_note)) 2922 so->so_snd.sb_flags &= ~SB_KNOTE; 2923 SOCKBUF_UNLOCK(&so->so_snd); 2924} 2925 2926/*ARGSUSED*/ 2927static int 2928filt_sowrite(struct knote *kn, long hint) 2929{ 2930 struct socket *so; 2931 2932 so = kn->kn_fp->f_data; 2933 SOCKBUF_LOCK_ASSERT(&so->so_snd); 2934 kn->kn_data = sbspace(&so->so_snd); 2935 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2936 kn->kn_flags |= EV_EOF; 2937 kn->kn_fflags = so->so_error; 2938 return (1); 2939 } else if (so->so_error) /* temporary udp error */ 2940 return (1); 2941 else if (((so->so_state & SS_ISCONNECTED) == 0) && 2942 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 2943 return (0); 2944 else if (kn->kn_sfflags & NOTE_LOWAT) 2945 return (kn->kn_data >= kn->kn_sdata); 2946 else 2947 return (kn->kn_data >= so->so_snd.sb_lowat); 2948} 2949 2950/*ARGSUSED*/ 2951static int 2952filt_solisten(struct knote *kn, long hint) 2953{ 2954 struct socket *so = kn->kn_fp->f_data; 2955 2956 kn->kn_data = so->so_qlen; 2957 return (! TAILQ_EMPTY(&so->so_comp)); 2958} 2959 2960int 2961socheckuid(struct socket *so, uid_t uid) 2962{ 2963 2964 if (so == NULL) 2965 return (EPERM); 2966 if (so->so_cred->cr_uid != uid) 2967 return (EPERM); 2968 return (0); 2969} 2970 2971static int 2972sysctl_somaxconn(SYSCTL_HANDLER_ARGS) 2973{ 2974 int error; 2975 int val; 2976 2977 val = somaxconn; 2978 error = sysctl_handle_int(oidp, &val, 0, req); 2979 if (error || !req->newptr ) 2980 return (error); 2981 2982 if (val < 1 || val > USHRT_MAX) 2983 return (EINVAL); 2984 2985 somaxconn = val; 2986 return (0); 2987} 2988 2989/* 2990 * These functions are used by protocols to notify the socket layer (and its 2991 * consumers) of state changes in the sockets driven by protocol-side events. 2992 */ 2993 2994/* 2995 * Procedures to manipulate state flags of socket and do appropriate wakeups. 2996 * 2997 * Normal sequence from the active (originating) side is that 2998 * soisconnecting() is called during processing of connect() call, resulting 2999 * in an eventual call to soisconnected() if/when the connection is 3000 * established. When the connection is torn down soisdisconnecting() is 3001 * called during processing of disconnect() call, and soisdisconnected() is 3002 * called when the connection to the peer is totally severed. The semantics 3003 * of these routines are such that connectionless protocols can call 3004 * soisconnected() and soisdisconnected() only, bypassing the in-progress 3005 * calls when setting up a ``connection'' takes no time. 3006 * 3007 * From the passive side, a socket is created with two queues of sockets: 3008 * so_incomp for connections in progress and so_comp for connections already 3009 * made and awaiting user acceptance. As a protocol is preparing incoming 3010 * connections, it creates a socket structure queued on so_incomp by calling 3011 * sonewconn(). When the connection is established, soisconnected() is 3012 * called, and transfers the socket structure to so_comp, making it available 3013 * to accept(). 3014 * 3015 * If a socket is closed with sockets on either so_incomp or so_comp, these 3016 * sockets are dropped. 3017 * 3018 * If higher-level protocols are implemented in the kernel, the wakeups done 3019 * here will sometimes cause software-interrupt process scheduling. 3020 */ 3021void 3022soisconnecting(struct socket *so) 3023{ 3024 3025 SOCK_LOCK(so); 3026 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 3027 so->so_state |= SS_ISCONNECTING; 3028 SOCK_UNLOCK(so); 3029} 3030 3031void 3032soisconnected(struct socket *so) 3033{ 3034 struct socket *head; 3035 3036 ACCEPT_LOCK(); 3037 SOCK_LOCK(so); 3038 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 3039 so->so_state |= SS_ISCONNECTED; 3040 head = so->so_head; 3041 if (head != NULL && (so->so_qstate & SQ_INCOMP)) { 3042 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 3043 SOCK_UNLOCK(so); 3044 TAILQ_REMOVE(&head->so_incomp, so, so_list); 3045 head->so_incqlen--; 3046 so->so_qstate &= ~SQ_INCOMP; 3047 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); 3048 head->so_qlen++; 3049 so->so_qstate |= SQ_COMP; 3050 ACCEPT_UNLOCK(); 3051 sorwakeup(head); 3052 wakeup_one(&head->so_timeo); 3053 } else { 3054 ACCEPT_UNLOCK(); 3055 so->so_upcall = 3056 head->so_accf->so_accept_filter->accf_callback; 3057 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 3058 so->so_rcv.sb_flags |= SB_UPCALL; 3059 so->so_options &= ~SO_ACCEPTFILTER; 3060 SOCK_UNLOCK(so); 3061 so->so_upcall(so, so->so_upcallarg, M_DONTWAIT); 3062 } 3063 return; 3064 } 3065 SOCK_UNLOCK(so); 3066 ACCEPT_UNLOCK(); 3067 wakeup(&so->so_timeo); 3068 sorwakeup(so); 3069 sowwakeup(so); 3070} 3071 3072void 3073soisdisconnecting(struct socket *so) 3074{ 3075 3076 /* 3077 * Note: This code assumes that SOCK_LOCK(so) and 3078 * SOCKBUF_LOCK(&so->so_rcv) are the same. 3079 */ 3080 SOCKBUF_LOCK(&so->so_rcv); 3081 so->so_state &= ~SS_ISCONNECTING; 3082 so->so_state |= SS_ISDISCONNECTING; 3083 so->so_rcv.sb_state |= SBS_CANTRCVMORE; 3084 sorwakeup_locked(so); 3085 SOCKBUF_LOCK(&so->so_snd); 3086 so->so_snd.sb_state |= SBS_CANTSENDMORE; 3087 sowwakeup_locked(so); 3088 wakeup(&so->so_timeo); 3089} 3090 3091void 3092soisdisconnected(struct socket *so) 3093{ 3094 3095 /* 3096 * Note: This code assumes that SOCK_LOCK(so) and 3097 * SOCKBUF_LOCK(&so->so_rcv) are the same. 3098 */ 3099 SOCKBUF_LOCK(&so->so_rcv); 3100 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 3101 so->so_state |= SS_ISDISCONNECTED; 3102 so->so_rcv.sb_state |= SBS_CANTRCVMORE; 3103 sorwakeup_locked(so); 3104 SOCKBUF_LOCK(&so->so_snd); 3105 so->so_snd.sb_state |= SBS_CANTSENDMORE; 3106 sbdrop_locked(&so->so_snd, so->so_snd.sb_cc); 3107 sowwakeup_locked(so); 3108 wakeup(&so->so_timeo); 3109} 3110 3111/* 3112 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME. 3113 */ 3114struct sockaddr * 3115sodupsockaddr(const struct sockaddr *sa, int mflags) 3116{ 3117 struct sockaddr *sa2; 3118 3119 sa2 = malloc(sa->sa_len, M_SONAME, mflags); 3120 if (sa2) 3121 bcopy(sa, sa2, sa->sa_len); 3122 return sa2; 3123} 3124 3125/* 3126 * Create an external-format (``xsocket'') structure using the information in 3127 * the kernel-format socket structure pointed to by so. This is done to 3128 * reduce the spew of irrelevant information over this interface, to isolate 3129 * user code from changes in the kernel structure, and potentially to provide 3130 * information-hiding if we decide that some of this information should be 3131 * hidden from users. 3132 */ 3133void 3134sotoxsocket(struct socket *so, struct xsocket *xso) 3135{ 3136 3137 xso->xso_len = sizeof *xso; 3138 xso->xso_so = so; 3139 xso->so_type = so->so_type; 3140 xso->so_options = so->so_options; 3141 xso->so_linger = so->so_linger; 3142 xso->so_state = so->so_state; 3143 xso->so_pcb = so->so_pcb; 3144 xso->xso_protocol = so->so_proto->pr_protocol; 3145 xso->xso_family = so->so_proto->pr_domain->dom_family; 3146 xso->so_qlen = so->so_qlen; 3147 xso->so_incqlen = so->so_incqlen; 3148 xso->so_qlimit = so->so_qlimit; 3149 xso->so_timeo = so->so_timeo; 3150 xso->so_error = so->so_error; 3151 xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0; 3152 xso->so_oobmark = so->so_oobmark; 3153 sbtoxsockbuf(&so->so_snd, &xso->so_snd); 3154 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); 3155 xso->so_uid = so->so_cred->cr_uid; 3156} 3157 3158 3159/* 3160 * Socket accessor functions to provide external consumers with 3161 * a safe interface to socket state 3162 * 3163 */ 3164 3165void 3166so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg) 3167{ 3168 3169 TAILQ_FOREACH(so, &so->so_comp, so_list) 3170 func(so, arg); 3171} 3172 3173struct sockbuf * 3174so_sockbuf_rcv(struct socket *so) 3175{ 3176 3177 return (&so->so_rcv); 3178} 3179 3180struct sockbuf * 3181so_sockbuf_snd(struct socket *so) 3182{ 3183 3184 return (&so->so_snd); 3185} 3186 3187int 3188so_state_get(const struct socket *so) 3189{ 3190 3191 return (so->so_state); 3192} 3193 3194void 3195so_state_set(struct socket *so, int val) 3196{ 3197 3198 so->so_state = val; 3199} 3200 3201int 3202so_options_get(const struct socket *so) 3203{ 3204 3205 return (so->so_options); 3206} 3207 3208void 3209so_options_set(struct socket *so, int val) 3210{ 3211 3212 so->so_options = val; 3213} 3214 3215int 3216so_error_get(const struct socket *so) 3217{ 3218 3219 return (so->so_error); 3220} 3221 3222void 3223so_error_set(struct socket *so, int val) 3224{ 3225 3226 so->so_error = val; 3227} 3228 3229int 3230so_linger_get(const struct socket *so) 3231{ 3232 3233 return (so->so_linger); 3234} 3235 3236void 3237so_linger_set(struct socket *so, int val) 3238{ 3239 3240 so->so_linger = val; 3241} 3242 3243struct protosw * 3244so_protosw_get(const struct socket *so) 3245{ 3246 3247 return (so->so_proto); 3248} 3249 3250void 3251so_protosw_set(struct socket *so, struct protosw *val) 3252{ 3253 3254 so->so_proto = val; 3255} 3256 3257void 3258so_sorwakeup(struct socket *so) 3259{ 3260 3261 sorwakeup(so); 3262} 3263 3264void 3265so_sowwakeup(struct socket *so) 3266{ 3267 3268 sowwakeup(so); 3269} 3270 3271void 3272so_sorwakeup_locked(struct socket *so) 3273{ 3274 3275 sorwakeup_locked(so); 3276} 3277 3278void 3279so_sowwakeup_locked(struct socket *so) 3280{ 3281 3282 sowwakeup_locked(so); 3283} 3284 3285void 3286so_lock(struct socket *so) 3287{ 3288 SOCK_LOCK(so); 3289} 3290 3291void 3292so_unlock(struct socket *so) 3293{ 3294 SOCK_UNLOCK(so); 3295} 3296