uipc_socket2.c revision 1.96
1/* $NetBSD: uipc_socket2.c,v 1.96 2008/06/18 09:06:27 yamt Exp $ */ 2 3/*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 * Copyright (c) 1982, 1986, 1988, 1990, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 58 */ 59 60#include <sys/cdefs.h> 61__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.96 2008/06/18 09:06:27 yamt Exp $"); 62 63#include "opt_mbuftrace.h" 64#include "opt_sb_max.h" 65 66#include <sys/param.h> 67#include <sys/systm.h> 68#include <sys/proc.h> 69#include <sys/file.h> 70#include <sys/buf.h> 71#include <sys/malloc.h> 72#include <sys/mbuf.h> 73#include <sys/protosw.h> 74#include <sys/domain.h> 75#include <sys/poll.h> 76#include <sys/socket.h> 77#include <sys/socketvar.h> 78#include <sys/signalvar.h> 79#include <sys/kauth.h> 80#include <sys/pool.h> 81 82/* 83 * Primitive routines for operating on sockets and socket buffers. 84 * 85 * Locking rules and assumptions: 86 * 87 * o socket::so_lock can change on the fly. The low level routines used 88 * to lock sockets are aware of this. When so_lock is acquired, the 89 * routine locking must check to see if so_lock still points to the 90 * lock that was acquired. If so_lock has changed in the meantime, the 91 * now irellevant lock that was acquired must be dropped and the lock 92 * operation retried. Although not proven here, this is completely safe 93 * on a multiprocessor system, even with relaxed memory ordering, given 94 * the next two rules: 95 * 96 * o In order to mutate so_lock, the lock pointed to by the current value 97 * of so_lock must be held: i.e., the socket must be held locked by the 98 * changing thread. The thread must issue membar_exit() to prevent 99 * memory accesses being reordered, and can set so_lock to the desired 100 * value. If the lock pointed to by the new value of so_lock is not 101 * held by the changing thread, the socket must then be considered 102 * unlocked. 103 * 104 * o If so_lock is mutated, and the previous lock referred to by so_lock 105 * could still be visible to other threads in the system (e.g. via file 106 * descriptor or protocol-internal reference), then the old lock must 107 * remain valid until the socket and/or protocol control block has been 108 * torn down. 109 * 110 * o If a socket has a non-NULL so_head value (i.e. is in the process of 111 * connecting), then locking the socket must also lock the socket pointed 112 * to by so_head: their lock pointers must match. 113 * 114 * o If a socket has connections in progress (so_q, so_q0 not empty) then 115 * locking the socket must also lock the sockets attached to both queues. 116 * Again, their lock pointers must match. 117 * 118 * o Beyond the initial lock assigment in socreate(), assigning locks to 119 * sockets is the responsibility of the individual protocols / protocol 120 * domains. 121 */ 122 123static pool_cache_t socket_cache; 124 125u_long sb_max = SB_MAX; /* maximum socket buffer size */ 126static u_long sb_max_adj; /* adjusted sb_max */ 127 128/* 129 * Procedures to manipulate state flags of socket 130 * and do appropriate wakeups. Normal sequence from the 131 * active (originating) side is that soisconnecting() is 132 * called during processing of connect() call, 133 * resulting in an eventual call to soisconnected() if/when the 134 * connection is established. When the connection is torn down 135 * soisdisconnecting() is called during processing of disconnect() call, 136 * and soisdisconnected() is called when the connection to the peer 137 * is totally severed. The semantics of these routines are such that 138 * connectionless protocols can call soisconnected() and soisdisconnected() 139 * only, bypassing the in-progress calls when setting up a ``connection'' 140 * takes no time. 141 * 142 * From the passive side, a socket is created with 143 * two queues of sockets: so_q0 for connections in progress 144 * and so_q for connections already made and awaiting user acceptance. 145 * As a protocol is preparing incoming connections, it creates a socket 146 * structure queued on so_q0 by calling sonewconn(). When the connection 147 * is established, soisconnected() is called, and transfers the 148 * socket structure to so_q, making it available to accept(). 149 * 150 * If a socket is closed with sockets on either 151 * so_q0 or so_q, these sockets are dropped. 152 * 153 * If higher level protocols are implemented in 154 * the kernel, the wakeups done here will sometimes 155 * cause software-interrupt process scheduling. 156 */ 157 158void 159soisconnecting(struct socket *so) 160{ 161 162 KASSERT(solocked(so)); 163 164 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 165 so->so_state |= SS_ISCONNECTING; 166} 167 168void 169soisconnected(struct socket *so) 170{ 171 struct socket *head; 172 173 head = so->so_head; 174 175 KASSERT(solocked(so)); 176 KASSERT(head == NULL || solocked2(so, head)); 177 178 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 179 so->so_state |= SS_ISCONNECTED; 180 if (head && soqremque(so, 0)) { 181 soqinsque(head, so, 1); 182 sorwakeup(head); 183 cv_broadcast(&head->so_cv); 184 } else { 185 cv_broadcast(&so->so_cv); 186 sorwakeup(so); 187 sowwakeup(so); 188 } 189} 190 191void 192soisdisconnecting(struct socket *so) 193{ 194 195 KASSERT(solocked(so)); 196 197 so->so_state &= ~SS_ISCONNECTING; 198 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 199 cv_broadcast(&so->so_cv); 200 sowwakeup(so); 201 sorwakeup(so); 202} 203 204void 205soisdisconnected(struct socket *so) 206{ 207 208 KASSERT(solocked(so)); 209 210 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 211 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 212 cv_broadcast(&so->so_cv); 213 sowwakeup(so); 214 sorwakeup(so); 215} 216 217void 218soinit2(void) 219{ 220 221 socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0, 222 "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL); 223} 224 225/* 226 * When an attempt at a new connection is noted on a socket 227 * which accepts connections, sonewconn is called. If the 228 * connection is possible (subject to space constraints, etc.) 229 * then we allocate a new structure, propoerly linked into the 230 * data structure of the original socket, and return this. 231 * Connstatus may be 0, SS_ISCONFIRMING, or SS_ISCONNECTED. 232 */ 233struct socket * 234sonewconn(struct socket *head, int connstatus) 235{ 236 struct socket *so; 237 int soqueue, error; 238 239 KASSERT(solocked(head)); 240 241 soqueue = connstatus ? 1 : 0; 242 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 243 return ((struct socket *)0); 244 so = soget(false); 245 if (so == NULL) 246 return (NULL); 247 mutex_obj_hold(head->so_lock); 248 so->so_lock = head->so_lock; 249 so->so_type = head->so_type; 250 so->so_options = head->so_options &~ SO_ACCEPTCONN; 251 so->so_linger = head->so_linger; 252 so->so_state = head->so_state | SS_NOFDREF; 253 so->so_nbio = head->so_nbio; 254 so->so_proto = head->so_proto; 255 so->so_timeo = head->so_timeo; 256 so->so_pgid = head->so_pgid; 257 so->so_send = head->so_send; 258 so->so_receive = head->so_receive; 259 so->so_uidinfo = head->so_uidinfo; 260 so->so_egid = head->so_egid; 261 so->so_cpid = head->so_cpid; 262#ifdef MBUFTRACE 263 so->so_mowner = head->so_mowner; 264 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 265 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 266#endif 267 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 268 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 269 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 270 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 271 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 272 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; 273 so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; 274 soqinsque(head, so, soqueue); 275 error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, 276 NULL, NULL); 277 KASSERT(solocked(so)); 278 if (error != 0) { 279 (void) soqremque(so, soqueue); 280 soput(so); 281 return (NULL); 282 } 283 if (connstatus) { 284 sorwakeup(head); 285 cv_broadcast(&head->so_cv); 286 so->so_state |= connstatus; 287 } 288 return (so); 289} 290 291struct socket * 292soget(bool waitok) 293{ 294 struct socket *so; 295 296 so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT)); 297 if (__predict_false(so == NULL)) 298 return (NULL); 299 memset(so, 0, sizeof(*so)); 300 TAILQ_INIT(&so->so_q0); 301 TAILQ_INIT(&so->so_q); 302 cv_init(&so->so_cv, "socket"); 303 cv_init(&so->so_rcv.sb_cv, "netio"); 304 cv_init(&so->so_snd.sb_cv, "netio"); 305 selinit(&so->so_rcv.sb_sel); 306 selinit(&so->so_snd.sb_sel); 307 so->so_rcv.sb_so = so; 308 so->so_snd.sb_so = so; 309 return so; 310} 311 312void 313soput(struct socket *so) 314{ 315 316 KASSERT(!cv_has_waiters(&so->so_cv)); 317 KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); 318 KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); 319 seldestroy(&so->so_rcv.sb_sel); 320 seldestroy(&so->so_snd.sb_sel); 321 mutex_obj_free(so->so_lock); 322 cv_destroy(&so->so_cv); 323 cv_destroy(&so->so_rcv.sb_cv); 324 cv_destroy(&so->so_snd.sb_cv); 325 pool_cache_put(socket_cache, so); 326} 327 328void 329soqinsque(struct socket *head, struct socket *so, int q) 330{ 331 332 KASSERT(solocked2(head, so)); 333 334#ifdef DIAGNOSTIC 335 if (so->so_onq != NULL) 336 panic("soqinsque"); 337#endif 338 339 so->so_head = head; 340 if (q == 0) { 341 head->so_q0len++; 342 so->so_onq = &head->so_q0; 343 } else { 344 head->so_qlen++; 345 so->so_onq = &head->so_q; 346 } 347 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 348} 349 350int 351soqremque(struct socket *so, int q) 352{ 353 struct socket *head; 354 355 head = so->so_head; 356 357 KASSERT(solocked(so)); 358 if (q == 0) { 359 if (so->so_onq != &head->so_q0) 360 return (0); 361 head->so_q0len--; 362 } else { 363 if (so->so_onq != &head->so_q) 364 return (0); 365 head->so_qlen--; 366 } 367 KASSERT(solocked2(so, head)); 368 TAILQ_REMOVE(so->so_onq, so, so_qe); 369 so->so_onq = NULL; 370 so->so_head = NULL; 371 return (1); 372} 373 374/* 375 * Socantsendmore indicates that no more data will be sent on the 376 * socket; it would normally be applied to a socket when the user 377 * informs the system that no more data is to be sent, by the protocol 378 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 379 * will be received, and will normally be applied to the socket by a 380 * protocol when it detects that the peer will send no more data. 381 * Data queued for reading in the socket may yet be read. 382 */ 383 384void 385socantsendmore(struct socket *so) 386{ 387 388 KASSERT(solocked(so)); 389 390 so->so_state |= SS_CANTSENDMORE; 391 sowwakeup(so); 392} 393 394void 395socantrcvmore(struct socket *so) 396{ 397 398 KASSERT(solocked(so)); 399 400 so->so_state |= SS_CANTRCVMORE; 401 sorwakeup(so); 402} 403 404/* 405 * Wait for data to arrive at/drain from a socket buffer. 406 */ 407int 408sbwait(struct sockbuf *sb) 409{ 410 struct socket *so; 411 kmutex_t *lock; 412 int error; 413 414 so = sb->sb_so; 415 416 KASSERT(solocked(so)); 417 418 sb->sb_flags |= SB_NOTIFY; 419 lock = so->so_lock; 420 if ((sb->sb_flags & SB_NOINTR) != 0) 421 error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo); 422 else 423 error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo); 424 if (__predict_false(lock != so->so_lock)) 425 solockretry(so, lock); 426 return error; 427} 428 429/* 430 * Wakeup processes waiting on a socket buffer. 431 * Do asynchronous notification via SIGIO 432 * if the socket buffer has the SB_ASYNC flag set. 433 */ 434void 435sowakeup(struct socket *so, struct sockbuf *sb, int code) 436{ 437 int band; 438 439 KASSERT(solocked(so)); 440 KASSERT(sb->sb_so == so); 441 442 if (code == POLL_IN) 443 band = POLLIN|POLLRDNORM; 444 else 445 band = POLLOUT|POLLWRNORM; 446 sb->sb_flags &= ~SB_NOTIFY; 447 selnotify(&sb->sb_sel, band, NOTE_SUBMIT); 448 cv_broadcast(&sb->sb_cv); 449 if (sb->sb_flags & SB_ASYNC) 450 fownsignal(so->so_pgid, SIGIO, code, band, so); 451 if (sb->sb_flags & SB_UPCALL) 452 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 453} 454 455/* 456 * Reset a socket's lock pointer. Wake all threads waiting on the 457 * socket's condition variables so that they can restart their waits 458 * using the new lock. The existing lock must be held. 459 */ 460void 461solockreset(struct socket *so, kmutex_t *lock) 462{ 463 464 KASSERT(solocked(so)); 465 466 so->so_lock = lock; 467 cv_broadcast(&so->so_snd.sb_cv); 468 cv_broadcast(&so->so_rcv.sb_cv); 469 cv_broadcast(&so->so_cv); 470} 471 472/* 473 * Socket buffer (struct sockbuf) utility routines. 474 * 475 * Each socket contains two socket buffers: one for sending data and 476 * one for receiving data. Each buffer contains a queue of mbufs, 477 * information about the number of mbufs and amount of data in the 478 * queue, and other fields allowing poll() statements and notification 479 * on data availability to be implemented. 480 * 481 * Data stored in a socket buffer is maintained as a list of records. 482 * Each record is a list of mbufs chained together with the m_next 483 * field. Records are chained together with the m_nextpkt field. The upper 484 * level routine soreceive() expects the following conventions to be 485 * observed when placing information in the receive buffer: 486 * 487 * 1. If the protocol requires each message be preceded by the sender's 488 * name, then a record containing that name must be present before 489 * any associated data (mbuf's must be of type MT_SONAME). 490 * 2. If the protocol supports the exchange of ``access rights'' (really 491 * just additional data associated with the message), and there are 492 * ``rights'' to be received, then a record containing this data 493 * should be present (mbuf's must be of type MT_CONTROL). 494 * 3. If a name or rights record exists, then it must be followed by 495 * a data record, perhaps of zero length. 496 * 497 * Before using a new socket structure it is first necessary to reserve 498 * buffer space to the socket, by calling sbreserve(). This should commit 499 * some of the available buffer space in the system buffer pool for the 500 * socket (currently, it does nothing but enforce limits). The space 501 * should be released by calling sbrelease() when the socket is destroyed. 502 */ 503 504int 505sb_max_set(u_long new_sbmax) 506{ 507 int s; 508 509 if (new_sbmax < (16 * 1024)) 510 return (EINVAL); 511 512 s = splsoftnet(); 513 sb_max = new_sbmax; 514 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 515 splx(s); 516 517 return (0); 518} 519 520int 521soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 522{ 523 524 KASSERT(so->so_lock == NULL || solocked(so)); 525 526 /* 527 * there's at least one application (a configure script of screen) 528 * which expects a fifo is writable even if it has "some" bytes 529 * in its buffer. 530 * so we want to make sure (hiwat - lowat) >= (some bytes). 531 * 532 * PIPE_BUF here is an arbitrary value chosen as (some bytes) above. 533 * we expect it's large enough for such applications. 534 */ 535 u_long lowat = MAX(sock_loan_thresh, MCLBYTES); 536 u_long hiwat = lowat + PIPE_BUF; 537 538 if (sndcc < hiwat) 539 sndcc = hiwat; 540 if (sbreserve(&so->so_snd, sndcc, so) == 0) 541 goto bad; 542 if (sbreserve(&so->so_rcv, rcvcc, so) == 0) 543 goto bad2; 544 if (so->so_rcv.sb_lowat == 0) 545 so->so_rcv.sb_lowat = 1; 546 if (so->so_snd.sb_lowat == 0) 547 so->so_snd.sb_lowat = lowat; 548 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 549 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 550 return (0); 551 bad2: 552 sbrelease(&so->so_snd, so); 553 bad: 554 return (ENOBUFS); 555} 556 557/* 558 * Allot mbufs to a sockbuf. 559 * Attempt to scale mbmax so that mbcnt doesn't become limiting 560 * if buffering efficiency is near the normal case. 561 */ 562int 563sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) 564{ 565 struct lwp *l = curlwp; /* XXX */ 566 rlim_t maxcc; 567 struct uidinfo *uidinfo; 568 569 KASSERT(so->so_lock == NULL || solocked(so)); 570 KASSERT(sb->sb_so == so); 571 KASSERT(sb_max_adj != 0); 572 573 if (cc == 0 || cc > sb_max_adj) 574 return (0); 575 576 if (kauth_cred_geteuid(l->l_cred) == so->so_uidinfo->ui_uid) 577 maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur; 578 else 579 maxcc = RLIM_INFINITY; 580 581 uidinfo = so->so_uidinfo; 582 if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc)) 583 return 0; 584 sb->sb_mbmax = min(cc * 2, sb_max); 585 if (sb->sb_lowat > sb->sb_hiwat) 586 sb->sb_lowat = sb->sb_hiwat; 587 return (1); 588} 589 590/* 591 * Free mbufs held by a socket, and reserved mbuf space. We do not assert 592 * that the socket is held locked here: see sorflush(). 593 */ 594void 595sbrelease(struct sockbuf *sb, struct socket *so) 596{ 597 598 KASSERT(sb->sb_so == so); 599 600 sbflush(sb); 601 (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); 602 sb->sb_mbmax = 0; 603} 604 605/* 606 * Routines to add and remove 607 * data from an mbuf queue. 608 * 609 * The routines sbappend() or sbappendrecord() are normally called to 610 * append new mbufs to a socket buffer, after checking that adequate 611 * space is available, comparing the function sbspace() with the amount 612 * of data to be added. sbappendrecord() differs from sbappend() in 613 * that data supplied is treated as the beginning of a new record. 614 * To place a sender's address, optional access rights, and data in a 615 * socket receive buffer, sbappendaddr() should be used. To place 616 * access rights and data in a socket receive buffer, sbappendrights() 617 * should be used. In either case, the new data begins a new record. 618 * Note that unlike sbappend() and sbappendrecord(), these routines check 619 * for the caller that there will be enough space to store the data. 620 * Each fails if there is not enough space, or if it cannot find mbufs 621 * to store additional information in. 622 * 623 * Reliable protocols may use the socket send buffer to hold data 624 * awaiting acknowledgement. Data is normally copied from a socket 625 * send buffer in a protocol with m_copy for output to a peer, 626 * and then removing the data from the socket buffer with sbdrop() 627 * or sbdroprecord() when the data is acknowledged by the peer. 628 */ 629 630#ifdef SOCKBUF_DEBUG 631void 632sblastrecordchk(struct sockbuf *sb, const char *where) 633{ 634 struct mbuf *m = sb->sb_mb; 635 636 KASSERT(solocked(sb->sb_so)); 637 638 while (m && m->m_nextpkt) 639 m = m->m_nextpkt; 640 641 if (m != sb->sb_lastrecord) { 642 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 643 sb->sb_mb, sb->sb_lastrecord, m); 644 printf("packet chain:\n"); 645 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 646 printf("\t%p\n", m); 647 panic("sblastrecordchk from %s", where); 648 } 649} 650 651void 652sblastmbufchk(struct sockbuf *sb, const char *where) 653{ 654 struct mbuf *m = sb->sb_mb; 655 struct mbuf *n; 656 657 KASSERT(solocked(sb->sb_so)); 658 659 while (m && m->m_nextpkt) 660 m = m->m_nextpkt; 661 662 while (m && m->m_next) 663 m = m->m_next; 664 665 if (m != sb->sb_mbtail) { 666 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 667 sb->sb_mb, sb->sb_mbtail, m); 668 printf("packet tree:\n"); 669 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 670 printf("\t"); 671 for (n = m; n != NULL; n = n->m_next) 672 printf("%p ", n); 673 printf("\n"); 674 } 675 panic("sblastmbufchk from %s", where); 676 } 677} 678#endif /* SOCKBUF_DEBUG */ 679 680/* 681 * Link a chain of records onto a socket buffer 682 */ 683#define SBLINKRECORDCHAIN(sb, m0, mlast) \ 684do { \ 685 if ((sb)->sb_lastrecord != NULL) \ 686 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 687 else \ 688 (sb)->sb_mb = (m0); \ 689 (sb)->sb_lastrecord = (mlast); \ 690} while (/*CONSTCOND*/0) 691 692 693#define SBLINKRECORD(sb, m0) \ 694 SBLINKRECORDCHAIN(sb, m0, m0) 695 696/* 697 * Append mbuf chain m to the last record in the 698 * socket buffer sb. The additional space associated 699 * the mbuf chain is recorded in sb. Empty mbufs are 700 * discarded and mbufs are compacted where possible. 701 */ 702void 703sbappend(struct sockbuf *sb, struct mbuf *m) 704{ 705 struct mbuf *n; 706 707 KASSERT(solocked(sb->sb_so)); 708 709 if (m == 0) 710 return; 711 712#ifdef MBUFTRACE 713 m_claimm(m, sb->sb_mowner); 714#endif 715 716 SBLASTRECORDCHK(sb, "sbappend 1"); 717 718 if ((n = sb->sb_lastrecord) != NULL) { 719 /* 720 * XXX Would like to simply use sb_mbtail here, but 721 * XXX I need to verify that I won't miss an EOR that 722 * XXX way. 723 */ 724 do { 725 if (n->m_flags & M_EOR) { 726 sbappendrecord(sb, m); /* XXXXXX!!!! */ 727 return; 728 } 729 } while (n->m_next && (n = n->m_next)); 730 } else { 731 /* 732 * If this is the first record in the socket buffer, it's 733 * also the last record. 734 */ 735 sb->sb_lastrecord = m; 736 } 737 sbcompress(sb, m, n); 738 SBLASTRECORDCHK(sb, "sbappend 2"); 739} 740 741/* 742 * This version of sbappend() should only be used when the caller 743 * absolutely knows that there will never be more than one record 744 * in the socket buffer, that is, a stream protocol (such as TCP). 745 */ 746void 747sbappendstream(struct sockbuf *sb, struct mbuf *m) 748{ 749 750 KASSERT(solocked(sb->sb_so)); 751 KDASSERT(m->m_nextpkt == NULL); 752 KASSERT(sb->sb_mb == sb->sb_lastrecord); 753 754 SBLASTMBUFCHK(sb, __func__); 755 756#ifdef MBUFTRACE 757 m_claimm(m, sb->sb_mowner); 758#endif 759 760 sbcompress(sb, m, sb->sb_mbtail); 761 762 sb->sb_lastrecord = sb->sb_mb; 763 SBLASTRECORDCHK(sb, __func__); 764} 765 766#ifdef SOCKBUF_DEBUG 767void 768sbcheck(struct sockbuf *sb) 769{ 770 struct mbuf *m, *m2; 771 u_long len, mbcnt; 772 773 KASSERT(solocked(sb->sb_so)); 774 775 len = 0; 776 mbcnt = 0; 777 for (m = sb->sb_mb; m; m = m->m_nextpkt) { 778 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 779 len += m2->m_len; 780 mbcnt += MSIZE; 781 if (m2->m_flags & M_EXT) 782 mbcnt += m2->m_ext.ext_size; 783 if (m2->m_nextpkt != NULL) 784 panic("sbcheck nextpkt"); 785 } 786 } 787 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 788 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 789 mbcnt, sb->sb_mbcnt); 790 panic("sbcheck"); 791 } 792} 793#endif 794 795/* 796 * As above, except the mbuf chain 797 * begins a new record. 798 */ 799void 800sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 801{ 802 struct mbuf *m; 803 804 KASSERT(solocked(sb->sb_so)); 805 806 if (m0 == 0) 807 return; 808 809#ifdef MBUFTRACE 810 m_claimm(m0, sb->sb_mowner); 811#endif 812 /* 813 * Put the first mbuf on the queue. 814 * Note this permits zero length records. 815 */ 816 sballoc(sb, m0); 817 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 818 SBLINKRECORD(sb, m0); 819 m = m0->m_next; 820 m0->m_next = 0; 821 if (m && (m0->m_flags & M_EOR)) { 822 m0->m_flags &= ~M_EOR; 823 m->m_flags |= M_EOR; 824 } 825 sbcompress(sb, m, m0); 826 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 827} 828 829/* 830 * As above except that OOB data 831 * is inserted at the beginning of the sockbuf, 832 * but after any other OOB data. 833 */ 834void 835sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 836{ 837 struct mbuf *m, **mp; 838 839 KASSERT(solocked(sb->sb_so)); 840 841 if (m0 == 0) 842 return; 843 844 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 845 846 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 847 again: 848 switch (m->m_type) { 849 850 case MT_OOBDATA: 851 continue; /* WANT next train */ 852 853 case MT_CONTROL: 854 if ((m = m->m_next) != NULL) 855 goto again; /* inspect THIS train further */ 856 } 857 break; 858 } 859 /* 860 * Put the first mbuf on the queue. 861 * Note this permits zero length records. 862 */ 863 sballoc(sb, m0); 864 m0->m_nextpkt = *mp; 865 if (*mp == NULL) { 866 /* m0 is actually the new tail */ 867 sb->sb_lastrecord = m0; 868 } 869 *mp = m0; 870 m = m0->m_next; 871 m0->m_next = 0; 872 if (m && (m0->m_flags & M_EOR)) { 873 m0->m_flags &= ~M_EOR; 874 m->m_flags |= M_EOR; 875 } 876 sbcompress(sb, m, m0); 877 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 878} 879 880/* 881 * Append address and data, and optionally, control (ancillary) data 882 * to the receive queue of a socket. If present, 883 * m0 must include a packet header with total length. 884 * Returns 0 if no space in sockbuf or insufficient mbufs. 885 */ 886int 887sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, 888 struct mbuf *control) 889{ 890 struct mbuf *m, *n, *nlast; 891 int space, len; 892 893 KASSERT(solocked(sb->sb_so)); 894 895 space = asa->sa_len; 896 897 if (m0 != NULL) { 898 if ((m0->m_flags & M_PKTHDR) == 0) 899 panic("sbappendaddr"); 900 space += m0->m_pkthdr.len; 901#ifdef MBUFTRACE 902 m_claimm(m0, sb->sb_mowner); 903#endif 904 } 905 for (n = control; n; n = n->m_next) { 906 space += n->m_len; 907 MCLAIM(n, sb->sb_mowner); 908 if (n->m_next == 0) /* keep pointer to last control buf */ 909 break; 910 } 911 if (space > sbspace(sb)) 912 return (0); 913 MGET(m, M_DONTWAIT, MT_SONAME); 914 if (m == 0) 915 return (0); 916 MCLAIM(m, sb->sb_mowner); 917 /* 918 * XXX avoid 'comparison always true' warning which isn't easily 919 * avoided. 920 */ 921 len = asa->sa_len; 922 if (len > MLEN) { 923 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 924 if ((m->m_flags & M_EXT) == 0) { 925 m_free(m); 926 return (0); 927 } 928 } 929 m->m_len = asa->sa_len; 930 memcpy(mtod(m, void *), asa, asa->sa_len); 931 if (n) 932 n->m_next = m0; /* concatenate data to control */ 933 else 934 control = m0; 935 m->m_next = control; 936 937 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 938 939 for (n = m; n->m_next != NULL; n = n->m_next) 940 sballoc(sb, n); 941 sballoc(sb, n); 942 nlast = n; 943 SBLINKRECORD(sb, m); 944 945 sb->sb_mbtail = nlast; 946 SBLASTMBUFCHK(sb, "sbappendaddr"); 947 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 948 949 return (1); 950} 951 952/* 953 * Helper for sbappendchainaddr: prepend a struct sockaddr* to 954 * an mbuf chain. 955 */ 956static inline struct mbuf * 957m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0, 958 const struct sockaddr *asa) 959{ 960 struct mbuf *m; 961 const int salen = asa->sa_len; 962 963 KASSERT(solocked(sb->sb_so)); 964 965 /* only the first in each chain need be a pkthdr */ 966 MGETHDR(m, M_DONTWAIT, MT_SONAME); 967 if (m == 0) 968 return (0); 969 MCLAIM(m, sb->sb_mowner); 970#ifdef notyet 971 if (salen > MHLEN) { 972 MEXTMALLOC(m, salen, M_NOWAIT); 973 if ((m->m_flags & M_EXT) == 0) { 974 m_free(m); 975 return (0); 976 } 977 } 978#else 979 KASSERT(salen <= MHLEN); 980#endif 981 m->m_len = salen; 982 memcpy(mtod(m, void *), asa, salen); 983 m->m_next = m0; 984 m->m_pkthdr.len = salen + m0->m_pkthdr.len; 985 986 return m; 987} 988 989int 990sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa, 991 struct mbuf *m0, int sbprio) 992{ 993 int space; 994 struct mbuf *m, *n, *n0, *nlast; 995 int error; 996 997 KASSERT(solocked(sb->sb_so)); 998 999 /* 1000 * XXX sbprio reserved for encoding priority of this* request: 1001 * SB_PRIO_NONE --> honour normal sb limits 1002 * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space, 1003 * take whole chain. Intended for large requests 1004 * that should be delivered atomically (all, or none). 1005 * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow 1006 * over normal socket limits, for messages indicating 1007 * buffer overflow in earlier normal/lower-priority messages 1008 * SB_PRIO_BESTEFFORT --> ignore limits entirely. 1009 * Intended for kernel-generated messages only. 1010 * Up to generator to avoid total mbuf resource exhaustion. 1011 */ 1012 (void)sbprio; 1013 1014 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 1015 panic("sbappendaddrchain"); 1016 1017 space = sbspace(sb); 1018 1019#ifdef notyet 1020 /* 1021 * Enforce SB_PRIO_* limits as described above. 1022 */ 1023#endif 1024 1025 n0 = NULL; 1026 nlast = NULL; 1027 for (m = m0; m; m = m->m_nextpkt) { 1028 struct mbuf *np; 1029 1030#ifdef MBUFTRACE 1031 m_claimm(m, sb->sb_mowner); 1032#endif 1033 1034 /* Prepend sockaddr to this record (m) of input chain m0 */ 1035 n = m_prepend_sockaddr(sb, m, asa); 1036 if (n == NULL) { 1037 error = ENOBUFS; 1038 goto bad; 1039 } 1040 1041 /* Append record (asa+m) to end of new chain n0 */ 1042 if (n0 == NULL) { 1043 n0 = n; 1044 } else { 1045 nlast->m_nextpkt = n; 1046 } 1047 /* Keep track of last record on new chain */ 1048 nlast = n; 1049 1050 for (np = n; np; np = np->m_next) 1051 sballoc(sb, np); 1052 } 1053 1054 SBLASTRECORDCHK(sb, "sbappendaddrchain 1"); 1055 1056 /* Drop the entire chain of (asa+m) records onto the socket */ 1057 SBLINKRECORDCHAIN(sb, n0, nlast); 1058 1059 SBLASTRECORDCHK(sb, "sbappendaddrchain 2"); 1060 1061 for (m = nlast; m->m_next; m = m->m_next) 1062 ; 1063 sb->sb_mbtail = m; 1064 SBLASTMBUFCHK(sb, "sbappendaddrchain"); 1065 1066 return (1); 1067 1068bad: 1069 /* 1070 * On error, free the prepended addreseses. For consistency 1071 * with sbappendaddr(), leave it to our caller to free 1072 * the input record chain passed to us as m0. 1073 */ 1074 while ((n = n0) != NULL) { 1075 struct mbuf *np; 1076 1077 /* Undo the sballoc() of this record */ 1078 for (np = n; np; np = np->m_next) 1079 sbfree(sb, np); 1080 1081 n0 = n->m_nextpkt; /* iterate at next prepended address */ 1082 MFREE(n, np); /* free prepended address (not data) */ 1083 } 1084 return 0; 1085} 1086 1087 1088int 1089sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 1090{ 1091 struct mbuf *m, *mlast, *n; 1092 int space; 1093 1094 KASSERT(solocked(sb->sb_so)); 1095 1096 space = 0; 1097 if (control == 0) 1098 panic("sbappendcontrol"); 1099 for (m = control; ; m = m->m_next) { 1100 space += m->m_len; 1101 MCLAIM(m, sb->sb_mowner); 1102 if (m->m_next == 0) 1103 break; 1104 } 1105 n = m; /* save pointer to last control buffer */ 1106 for (m = m0; m; m = m->m_next) { 1107 MCLAIM(m, sb->sb_mowner); 1108 space += m->m_len; 1109 } 1110 if (space > sbspace(sb)) 1111 return (0); 1112 n->m_next = m0; /* concatenate data to control */ 1113 1114 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 1115 1116 for (m = control; m->m_next != NULL; m = m->m_next) 1117 sballoc(sb, m); 1118 sballoc(sb, m); 1119 mlast = m; 1120 SBLINKRECORD(sb, control); 1121 1122 sb->sb_mbtail = mlast; 1123 SBLASTMBUFCHK(sb, "sbappendcontrol"); 1124 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 1125 1126 return (1); 1127} 1128 1129/* 1130 * Compress mbuf chain m into the socket 1131 * buffer sb following mbuf n. If n 1132 * is null, the buffer is presumed empty. 1133 */ 1134void 1135sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1136{ 1137 int eor; 1138 struct mbuf *o; 1139 1140 KASSERT(solocked(sb->sb_so)); 1141 1142 eor = 0; 1143 while (m) { 1144 eor |= m->m_flags & M_EOR; 1145 if (m->m_len == 0 && 1146 (eor == 0 || 1147 (((o = m->m_next) || (o = n)) && 1148 o->m_type == m->m_type))) { 1149 if (sb->sb_lastrecord == m) 1150 sb->sb_lastrecord = m->m_next; 1151 m = m_free(m); 1152 continue; 1153 } 1154 if (n && (n->m_flags & M_EOR) == 0 && 1155 /* M_TRAILINGSPACE() checks buffer writeability */ 1156 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 1157 m->m_len <= M_TRAILINGSPACE(n) && 1158 n->m_type == m->m_type) { 1159 memcpy(mtod(n, char *) + n->m_len, mtod(m, void *), 1160 (unsigned)m->m_len); 1161 n->m_len += m->m_len; 1162 sb->sb_cc += m->m_len; 1163 m = m_free(m); 1164 continue; 1165 } 1166 if (n) 1167 n->m_next = m; 1168 else 1169 sb->sb_mb = m; 1170 sb->sb_mbtail = m; 1171 sballoc(sb, m); 1172 n = m; 1173 m->m_flags &= ~M_EOR; 1174 m = m->m_next; 1175 n->m_next = 0; 1176 } 1177 if (eor) { 1178 if (n) 1179 n->m_flags |= eor; 1180 else 1181 printf("semi-panic: sbcompress\n"); 1182 } 1183 SBLASTMBUFCHK(sb, __func__); 1184} 1185 1186/* 1187 * Free all mbufs in a sockbuf. 1188 * Check that all resources are reclaimed. 1189 */ 1190void 1191sbflush(struct sockbuf *sb) 1192{ 1193 1194 KASSERT(solocked(sb->sb_so)); 1195 KASSERT((sb->sb_flags & SB_LOCK) == 0); 1196 1197 while (sb->sb_mbcnt) 1198 sbdrop(sb, (int)sb->sb_cc); 1199 1200 KASSERT(sb->sb_cc == 0); 1201 KASSERT(sb->sb_mb == NULL); 1202 KASSERT(sb->sb_mbtail == NULL); 1203 KASSERT(sb->sb_lastrecord == NULL); 1204} 1205 1206/* 1207 * Drop data from (the front of) a sockbuf. 1208 */ 1209void 1210sbdrop(struct sockbuf *sb, int len) 1211{ 1212 struct mbuf *m, *mn, *next; 1213 1214 KASSERT(solocked(sb->sb_so)); 1215 1216 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 1217 while (len > 0) { 1218 if (m == 0) { 1219 if (next == 0) 1220 panic("sbdrop"); 1221 m = next; 1222 next = m->m_nextpkt; 1223 continue; 1224 } 1225 if (m->m_len > len) { 1226 m->m_len -= len; 1227 m->m_data += len; 1228 sb->sb_cc -= len; 1229 break; 1230 } 1231 len -= m->m_len; 1232 sbfree(sb, m); 1233 MFREE(m, mn); 1234 m = mn; 1235 } 1236 while (m && m->m_len == 0) { 1237 sbfree(sb, m); 1238 MFREE(m, mn); 1239 m = mn; 1240 } 1241 if (m) { 1242 sb->sb_mb = m; 1243 m->m_nextpkt = next; 1244 } else 1245 sb->sb_mb = next; 1246 /* 1247 * First part is an inline SB_EMPTY_FIXUP(). Second part 1248 * makes sure sb_lastrecord is up-to-date if we dropped 1249 * part of the last record. 1250 */ 1251 m = sb->sb_mb; 1252 if (m == NULL) { 1253 sb->sb_mbtail = NULL; 1254 sb->sb_lastrecord = NULL; 1255 } else if (m->m_nextpkt == NULL) 1256 sb->sb_lastrecord = m; 1257} 1258 1259/* 1260 * Drop a record off the front of a sockbuf 1261 * and move the next record to the front. 1262 */ 1263void 1264sbdroprecord(struct sockbuf *sb) 1265{ 1266 struct mbuf *m, *mn; 1267 1268 KASSERT(solocked(sb->sb_so)); 1269 1270 m = sb->sb_mb; 1271 if (m) { 1272 sb->sb_mb = m->m_nextpkt; 1273 do { 1274 sbfree(sb, m); 1275 MFREE(m, mn); 1276 } while ((m = mn) != NULL); 1277 } 1278 SB_EMPTY_FIXUP(sb); 1279} 1280 1281/* 1282 * Create a "control" mbuf containing the specified data 1283 * with the specified type for presentation on a socket buffer. 1284 */ 1285struct mbuf * 1286sbcreatecontrol(void *p, int size, int type, int level) 1287{ 1288 struct cmsghdr *cp; 1289 struct mbuf *m; 1290 1291 if (CMSG_SPACE(size) > MCLBYTES) { 1292 printf("sbcreatecontrol: message too large %d\n", size); 1293 return NULL; 1294 } 1295 1296 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 1297 return ((struct mbuf *) NULL); 1298 if (CMSG_SPACE(size) > MLEN) { 1299 MCLGET(m, M_DONTWAIT); 1300 if ((m->m_flags & M_EXT) == 0) { 1301 m_free(m); 1302 return NULL; 1303 } 1304 } 1305 cp = mtod(m, struct cmsghdr *); 1306 memcpy(CMSG_DATA(cp), p, size); 1307 m->m_len = CMSG_SPACE(size); 1308 cp->cmsg_len = CMSG_LEN(size); 1309 cp->cmsg_level = level; 1310 cp->cmsg_type = type; 1311 return (m); 1312} 1313 1314void 1315solockretry(struct socket *so, kmutex_t *lock) 1316{ 1317 1318 while (lock != so->so_lock) { 1319 mutex_exit(lock); 1320 lock = so->so_lock; 1321 mutex_enter(lock); 1322 } 1323} 1324 1325bool 1326solocked(struct socket *so) 1327{ 1328 1329 return mutex_owned(so->so_lock); 1330} 1331 1332bool 1333solocked2(struct socket *so1, struct socket *so2) 1334{ 1335 kmutex_t *lock; 1336 1337 lock = so1->so_lock; 1338 if (lock != so2->so_lock) 1339 return false; 1340 return mutex_owned(lock); 1341} 1342 1343/* 1344 * Assign a default lock to a new socket. For PRU_ATTACH, and done by 1345 * protocols that do not have special locking requirements. 1346 */ 1347void 1348sosetlock(struct socket *so) 1349{ 1350 kmutex_t *lock; 1351 1352 if (so->so_lock == NULL) { 1353 lock = softnet_lock; 1354 so->so_lock = lock; 1355 mutex_obj_hold(lock); 1356 mutex_enter(lock); 1357 } 1358 1359 /* In all cases, lock must be held on return from PRU_ATTACH. */ 1360 KASSERT(solocked(so)); 1361} 1362 1363/* 1364 * Set lock on sockbuf sb; sleep if lock is already held. 1365 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. 1366 * Returns error without lock if sleep is interrupted. 1367 */ 1368int 1369sblock(struct sockbuf *sb, int wf) 1370{ 1371 struct socket *so; 1372 kmutex_t *lock; 1373 int error; 1374 1375 KASSERT(solocked(sb->sb_so)); 1376 1377 for (;;) { 1378 if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) { 1379 sb->sb_flags |= SB_LOCK; 1380 return 0; 1381 } 1382 if (wf != M_WAITOK) 1383 return EWOULDBLOCK; 1384 so = sb->sb_so; 1385 lock = so->so_lock; 1386 if ((sb->sb_flags & SB_NOINTR) != 0) { 1387 cv_wait(&so->so_cv, lock); 1388 error = 0; 1389 } else 1390 error = cv_wait_sig(&so->so_cv, lock); 1391 if (__predict_false(lock != so->so_lock)) 1392 solockretry(so, lock); 1393 if (error != 0) 1394 return error; 1395 } 1396} 1397 1398void 1399sbunlock(struct sockbuf *sb) 1400{ 1401 struct socket *so; 1402 1403 so = sb->sb_so; 1404 1405 KASSERT(solocked(so)); 1406 KASSERT((sb->sb_flags & SB_LOCK) != 0); 1407 1408 sb->sb_flags &= ~SB_LOCK; 1409 cv_broadcast(&so->so_cv); 1410} 1411 1412int 1413sowait(struct socket *so, int timo) 1414{ 1415 kmutex_t *lock; 1416 int error; 1417 1418 KASSERT(solocked(so)); 1419 1420 lock = so->so_lock; 1421 error = cv_timedwait_sig(&so->so_cv, lock, timo); 1422 if (__predict_false(lock != so->so_lock)) 1423 solockretry(so, lock); 1424 return error; 1425} 1426