uipc_socket2.c revision 1.115
1/* $NetBSD: uipc_socket2.c,v 1.115 2013/10/08 19:58:25 christos Exp $ */ 2 3/*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 * Copyright (c) 1982, 1986, 1988, 1990, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 58 */ 59 60#include <sys/cdefs.h> 61__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.115 2013/10/08 19:58:25 christos Exp $"); 62 63#include "opt_mbuftrace.h" 64#include "opt_sb_max.h" 65 66#include <sys/param.h> 67#include <sys/systm.h> 68#include <sys/proc.h> 69#include <sys/file.h> 70#include <sys/buf.h> 71#include <sys/mbuf.h> 72#include <sys/protosw.h> 73#include <sys/domain.h> 74#include <sys/poll.h> 75#include <sys/socket.h> 76#include <sys/socketvar.h> 77#include <sys/signalvar.h> 78#include <sys/kauth.h> 79#include <sys/pool.h> 80#include <sys/uidinfo.h> 81 82/* 83 * Primitive routines for operating on sockets and socket buffers. 84 * 85 * Locking rules and assumptions: 86 * 87 * o socket::so_lock can change on the fly. The low level routines used 88 * to lock sockets are aware of this. When so_lock is acquired, the 89 * routine locking must check to see if so_lock still points to the 90 * lock that was acquired. If so_lock has changed in the meantime, the 91 * now irellevant lock that was acquired must be dropped and the lock 92 * operation retried. Although not proven here, this is completely safe 93 * on a multiprocessor system, even with relaxed memory ordering, given 94 * the next two rules: 95 * 96 * o In order to mutate so_lock, the lock pointed to by the current value 97 * of so_lock must be held: i.e., the socket must be held locked by the 98 * changing thread. The thread must issue membar_exit() to prevent 99 * memory accesses being reordered, and can set so_lock to the desired 100 * value. If the lock pointed to by the new value of so_lock is not 101 * held by the changing thread, the socket must then be considered 102 * unlocked. 103 * 104 * o If so_lock is mutated, and the previous lock referred to by so_lock 105 * could still be visible to other threads in the system (e.g. via file 106 * descriptor or protocol-internal reference), then the old lock must 107 * remain valid until the socket and/or protocol control block has been 108 * torn down. 109 * 110 * o If a socket has a non-NULL so_head value (i.e. is in the process of 111 * connecting), then locking the socket must also lock the socket pointed 112 * to by so_head: their lock pointers must match. 113 * 114 * o If a socket has connections in progress (so_q, so_q0 not empty) then 115 * locking the socket must also lock the sockets attached to both queues. 116 * Again, their lock pointers must match. 117 * 118 * o Beyond the initial lock assigment in socreate(), assigning locks to 119 * sockets is the responsibility of the individual protocols / protocol 120 * domains. 121 */ 122 123static pool_cache_t socket_cache; 124 125u_long sb_max = SB_MAX; /* maximum socket buffer size */ 126static u_long sb_max_adj; /* adjusted sb_max */ 127 128/* 129 * Procedures to manipulate state flags of socket 130 * and do appropriate wakeups. Normal sequence from the 131 * active (originating) side is that soisconnecting() is 132 * called during processing of connect() call, 133 * resulting in an eventual call to soisconnected() if/when the 134 * connection is established. When the connection is torn down 135 * soisdisconnecting() is called during processing of disconnect() call, 136 * and soisdisconnected() is called when the connection to the peer 137 * is totally severed. The semantics of these routines are such that 138 * connectionless protocols can call soisconnected() and soisdisconnected() 139 * only, bypassing the in-progress calls when setting up a ``connection'' 140 * takes no time. 141 * 142 * From the passive side, a socket is created with 143 * two queues of sockets: so_q0 for connections in progress 144 * and so_q for connections already made and awaiting user acceptance. 145 * As a protocol is preparing incoming connections, it creates a socket 146 * structure queued on so_q0 by calling sonewconn(). When the connection 147 * is established, soisconnected() is called, and transfers the 148 * socket structure to so_q, making it available to accept(). 149 * 150 * If a socket is closed with sockets on either 151 * so_q0 or so_q, these sockets are dropped. 152 * 153 * If higher level protocols are implemented in 154 * the kernel, the wakeups done here will sometimes 155 * cause software-interrupt process scheduling. 156 */ 157 158void 159soisconnecting(struct socket *so) 160{ 161 162 KASSERT(solocked(so)); 163 164 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 165 so->so_state |= SS_ISCONNECTING; 166} 167 168void 169soisconnected(struct socket *so) 170{ 171 struct socket *head; 172 173 head = so->so_head; 174 175 KASSERT(solocked(so)); 176 KASSERT(head == NULL || solocked2(so, head)); 177 178 so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING); 179 so->so_state |= SS_ISCONNECTED; 180 if (head && so->so_onq == &head->so_q0) { 181 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 182 soqremque(so, 0); 183 soqinsque(head, so, 1); 184 sorwakeup(head); 185 cv_broadcast(&head->so_cv); 186 } else { 187 so->so_upcall = 188 head->so_accf->so_accept_filter->accf_callback; 189 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 190 so->so_rcv.sb_flags |= SB_UPCALL; 191 so->so_options &= ~SO_ACCEPTFILTER; 192 (*so->so_upcall)(so, so->so_upcallarg, 193 POLLIN|POLLRDNORM, M_DONTWAIT); 194 } 195 } else { 196 cv_broadcast(&so->so_cv); 197 sorwakeup(so); 198 sowwakeup(so); 199 } 200} 201 202void 203soisdisconnecting(struct socket *so) 204{ 205 206 KASSERT(solocked(so)); 207 208 so->so_state &= ~SS_ISCONNECTING; 209 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 210 cv_broadcast(&so->so_cv); 211 sowwakeup(so); 212 sorwakeup(so); 213} 214 215void 216soisdisconnected(struct socket *so) 217{ 218 219 KASSERT(solocked(so)); 220 221 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 222 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 223 cv_broadcast(&so->so_cv); 224 sowwakeup(so); 225 sorwakeup(so); 226} 227 228void 229soinit2(void) 230{ 231 232 socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0, 233 "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL); 234} 235 236/* 237 * When an attempt at a new connection is noted on a socket 238 * which accepts connections, sonewconn is called. If the 239 * connection is possible (subject to space constraints, etc.) 240 * then we allocate a new structure, propoerly linked into the 241 * data structure of the original socket, and return this. 242 */ 243struct socket * 244sonewconn(struct socket *head, bool conncomplete) 245{ 246 struct socket *so; 247 int soqueue, error; 248 249 KASSERT(solocked(head)); 250 251 if ((head->so_options & SO_ACCEPTFILTER) != 0) 252 conncomplete = false; 253 soqueue = conncomplete ? 1 : 0; 254 255 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 256 return NULL; 257 so = soget(false); 258 if (so == NULL) 259 return NULL; 260 mutex_obj_hold(head->so_lock); 261 so->so_lock = head->so_lock; 262 so->so_type = head->so_type; 263 so->so_options = head->so_options &~ SO_ACCEPTCONN; 264 so->so_linger = head->so_linger; 265 so->so_state = head->so_state | SS_NOFDREF; 266 so->so_proto = head->so_proto; 267 so->so_timeo = head->so_timeo; 268 so->so_pgid = head->so_pgid; 269 so->so_send = head->so_send; 270 so->so_receive = head->so_receive; 271 so->so_uidinfo = head->so_uidinfo; 272 so->so_cpid = head->so_cpid; 273#ifdef MBUFTRACE 274 so->so_mowner = head->so_mowner; 275 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 276 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 277#endif 278 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) != 0) 279 goto out; 280 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 281 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 282 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 283 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 284 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & (SB_AUTOSIZE | SB_ASYNC); 285 so->so_snd.sb_flags |= head->so_snd.sb_flags & (SB_AUTOSIZE | SB_ASYNC); 286 soqinsque(head, so, soqueue); 287 error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, 288 NULL, NULL); 289 KASSERT(solocked(so)); 290 if (error != 0) { 291 (void) soqremque(so, soqueue); 292out: 293 /* 294 * Remove acccept filter if one is present. 295 * XXX Is this really needed? 296 */ 297 if (so->so_accf != NULL) 298 (void)accept_filt_clear(so); 299 soput(so); 300 return NULL; 301 } 302 if (conncomplete) { 303 sorwakeup(head); 304 cv_broadcast(&head->so_cv); 305 so->so_state |= SS_ISCONNECTED; 306 } 307 return so; 308} 309 310struct socket * 311soget(bool waitok) 312{ 313 struct socket *so; 314 315 so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT)); 316 if (__predict_false(so == NULL)) 317 return (NULL); 318 memset(so, 0, sizeof(*so)); 319 TAILQ_INIT(&so->so_q0); 320 TAILQ_INIT(&so->so_q); 321 cv_init(&so->so_cv, "socket"); 322 cv_init(&so->so_rcv.sb_cv, "netio"); 323 cv_init(&so->so_snd.sb_cv, "netio"); 324 selinit(&so->so_rcv.sb_sel); 325 selinit(&so->so_snd.sb_sel); 326 so->so_rcv.sb_so = so; 327 so->so_snd.sb_so = so; 328 return so; 329} 330 331void 332soput(struct socket *so) 333{ 334 335 KASSERT(!cv_has_waiters(&so->so_cv)); 336 KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); 337 KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); 338 seldestroy(&so->so_rcv.sb_sel); 339 seldestroy(&so->so_snd.sb_sel); 340 mutex_obj_free(so->so_lock); 341 cv_destroy(&so->so_cv); 342 cv_destroy(&so->so_rcv.sb_cv); 343 cv_destroy(&so->so_snd.sb_cv); 344 pool_cache_put(socket_cache, so); 345} 346 347void 348soqinsque(struct socket *head, struct socket *so, int q) 349{ 350 351 KASSERT(solocked2(head, so)); 352 353#ifdef DIAGNOSTIC 354 if (so->so_onq != NULL) 355 panic("soqinsque"); 356#endif 357 358 so->so_head = head; 359 if (q == 0) { 360 head->so_q0len++; 361 so->so_onq = &head->so_q0; 362 } else { 363 head->so_qlen++; 364 so->so_onq = &head->so_q; 365 } 366 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 367} 368 369int 370soqremque(struct socket *so, int q) 371{ 372 struct socket *head; 373 374 head = so->so_head; 375 376 KASSERT(solocked(so)); 377 if (q == 0) { 378 if (so->so_onq != &head->so_q0) 379 return (0); 380 head->so_q0len--; 381 } else { 382 if (so->so_onq != &head->so_q) 383 return (0); 384 head->so_qlen--; 385 } 386 KASSERT(solocked2(so, head)); 387 TAILQ_REMOVE(so->so_onq, so, so_qe); 388 so->so_onq = NULL; 389 so->so_head = NULL; 390 return (1); 391} 392 393/* 394 * Socantsendmore indicates that no more data will be sent on the 395 * socket; it would normally be applied to a socket when the user 396 * informs the system that no more data is to be sent, by the protocol 397 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 398 * will be received, and will normally be applied to the socket by a 399 * protocol when it detects that the peer will send no more data. 400 * Data queued for reading in the socket may yet be read. 401 */ 402 403void 404socantsendmore(struct socket *so) 405{ 406 407 KASSERT(solocked(so)); 408 409 so->so_state |= SS_CANTSENDMORE; 410 sowwakeup(so); 411} 412 413void 414socantrcvmore(struct socket *so) 415{ 416 417 KASSERT(solocked(so)); 418 419 so->so_state |= SS_CANTRCVMORE; 420 sorwakeup(so); 421} 422 423/* 424 * Wait for data to arrive at/drain from a socket buffer. 425 */ 426int 427sbwait(struct sockbuf *sb) 428{ 429 struct socket *so; 430 kmutex_t *lock; 431 int error; 432 433 so = sb->sb_so; 434 435 KASSERT(solocked(so)); 436 437 sb->sb_flags |= SB_NOTIFY; 438 lock = so->so_lock; 439 if ((sb->sb_flags & SB_NOINTR) != 0) 440 error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo); 441 else 442 error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo); 443 if (__predict_false(lock != so->so_lock)) 444 solockretry(so, lock); 445 return error; 446} 447 448/* 449 * Wakeup processes waiting on a socket buffer. 450 * Do asynchronous notification via SIGIO 451 * if the socket buffer has the SB_ASYNC flag set. 452 */ 453void 454sowakeup(struct socket *so, struct sockbuf *sb, int code) 455{ 456 int band; 457 458 KASSERT(solocked(so)); 459 KASSERT(sb->sb_so == so); 460 461 if (code == POLL_IN) 462 band = POLLIN|POLLRDNORM; 463 else 464 band = POLLOUT|POLLWRNORM; 465 sb->sb_flags &= ~SB_NOTIFY; 466 selnotify(&sb->sb_sel, band, NOTE_SUBMIT); 467 cv_broadcast(&sb->sb_cv); 468 if (sb->sb_flags & SB_ASYNC) 469 fownsignal(so->so_pgid, SIGIO, code, band, so); 470 if (sb->sb_flags & SB_UPCALL) 471 (*so->so_upcall)(so, so->so_upcallarg, band, M_DONTWAIT); 472} 473 474/* 475 * Reset a socket's lock pointer. Wake all threads waiting on the 476 * socket's condition variables so that they can restart their waits 477 * using the new lock. The existing lock must be held. 478 */ 479void 480solockreset(struct socket *so, kmutex_t *lock) 481{ 482 483 KASSERT(solocked(so)); 484 485 so->so_lock = lock; 486 cv_broadcast(&so->so_snd.sb_cv); 487 cv_broadcast(&so->so_rcv.sb_cv); 488 cv_broadcast(&so->so_cv); 489} 490 491/* 492 * Socket buffer (struct sockbuf) utility routines. 493 * 494 * Each socket contains two socket buffers: one for sending data and 495 * one for receiving data. Each buffer contains a queue of mbufs, 496 * information about the number of mbufs and amount of data in the 497 * queue, and other fields allowing poll() statements and notification 498 * on data availability to be implemented. 499 * 500 * Data stored in a socket buffer is maintained as a list of records. 501 * Each record is a list of mbufs chained together with the m_next 502 * field. Records are chained together with the m_nextpkt field. The upper 503 * level routine soreceive() expects the following conventions to be 504 * observed when placing information in the receive buffer: 505 * 506 * 1. If the protocol requires each message be preceded by the sender's 507 * name, then a record containing that name must be present before 508 * any associated data (mbuf's must be of type MT_SONAME). 509 * 2. If the protocol supports the exchange of ``access rights'' (really 510 * just additional data associated with the message), and there are 511 * ``rights'' to be received, then a record containing this data 512 * should be present (mbuf's must be of type MT_CONTROL). 513 * 3. If a name or rights record exists, then it must be followed by 514 * a data record, perhaps of zero length. 515 * 516 * Before using a new socket structure it is first necessary to reserve 517 * buffer space to the socket, by calling sbreserve(). This should commit 518 * some of the available buffer space in the system buffer pool for the 519 * socket (currently, it does nothing but enforce limits). The space 520 * should be released by calling sbrelease() when the socket is destroyed. 521 */ 522 523int 524sb_max_set(u_long new_sbmax) 525{ 526 int s; 527 528 if (new_sbmax < (16 * 1024)) 529 return (EINVAL); 530 531 s = splsoftnet(); 532 sb_max = new_sbmax; 533 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 534 splx(s); 535 536 return (0); 537} 538 539int 540soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 541{ 542 543 KASSERT(so->so_lock == NULL || solocked(so)); 544 545 /* 546 * there's at least one application (a configure script of screen) 547 * which expects a fifo is writable even if it has "some" bytes 548 * in its buffer. 549 * so we want to make sure (hiwat - lowat) >= (some bytes). 550 * 551 * PIPE_BUF here is an arbitrary value chosen as (some bytes) above. 552 * we expect it's large enough for such applications. 553 */ 554 u_long lowat = MAX(sock_loan_thresh, MCLBYTES); 555 u_long hiwat = lowat + PIPE_BUF; 556 557 if (sndcc < hiwat) 558 sndcc = hiwat; 559 if (sbreserve(&so->so_snd, sndcc, so) == 0) 560 goto bad; 561 if (sbreserve(&so->so_rcv, rcvcc, so) == 0) 562 goto bad2; 563 if (so->so_rcv.sb_lowat == 0) 564 so->so_rcv.sb_lowat = 1; 565 if (so->so_snd.sb_lowat == 0) 566 so->so_snd.sb_lowat = lowat; 567 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 568 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 569 return (0); 570 bad2: 571 sbrelease(&so->so_snd, so); 572 bad: 573 return (ENOBUFS); 574} 575 576/* 577 * Allot mbufs to a sockbuf. 578 * Attempt to scale mbmax so that mbcnt doesn't become limiting 579 * if buffering efficiency is near the normal case. 580 */ 581int 582sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) 583{ 584 struct lwp *l = curlwp; /* XXX */ 585 rlim_t maxcc; 586 struct uidinfo *uidinfo; 587 588 KASSERT(so->so_lock == NULL || solocked(so)); 589 KASSERT(sb->sb_so == so); 590 KASSERT(sb_max_adj != 0); 591 592 if (cc == 0 || cc > sb_max_adj) 593 return (0); 594 595 maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur; 596 597 uidinfo = so->so_uidinfo; 598 if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc)) 599 return 0; 600 sb->sb_mbmax = min(cc * 2, sb_max); 601 if (sb->sb_lowat > sb->sb_hiwat) 602 sb->sb_lowat = sb->sb_hiwat; 603 return (1); 604} 605 606/* 607 * Free mbufs held by a socket, and reserved mbuf space. We do not assert 608 * that the socket is held locked here: see sorflush(). 609 */ 610void 611sbrelease(struct sockbuf *sb, struct socket *so) 612{ 613 614 KASSERT(sb->sb_so == so); 615 616 sbflush(sb); 617 (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); 618 sb->sb_mbmax = 0; 619} 620 621/* 622 * Routines to add and remove 623 * data from an mbuf queue. 624 * 625 * The routines sbappend() or sbappendrecord() are normally called to 626 * append new mbufs to a socket buffer, after checking that adequate 627 * space is available, comparing the function sbspace() with the amount 628 * of data to be added. sbappendrecord() differs from sbappend() in 629 * that data supplied is treated as the beginning of a new record. 630 * To place a sender's address, optional access rights, and data in a 631 * socket receive buffer, sbappendaddr() should be used. To place 632 * access rights and data in a socket receive buffer, sbappendrights() 633 * should be used. In either case, the new data begins a new record. 634 * Note that unlike sbappend() and sbappendrecord(), these routines check 635 * for the caller that there will be enough space to store the data. 636 * Each fails if there is not enough space, or if it cannot find mbufs 637 * to store additional information in. 638 * 639 * Reliable protocols may use the socket send buffer to hold data 640 * awaiting acknowledgement. Data is normally copied from a socket 641 * send buffer in a protocol with m_copy for output to a peer, 642 * and then removing the data from the socket buffer with sbdrop() 643 * or sbdroprecord() when the data is acknowledged by the peer. 644 */ 645 646#ifdef SOCKBUF_DEBUG 647void 648sblastrecordchk(struct sockbuf *sb, const char *where) 649{ 650 struct mbuf *m = sb->sb_mb; 651 652 KASSERT(solocked(sb->sb_so)); 653 654 while (m && m->m_nextpkt) 655 m = m->m_nextpkt; 656 657 if (m != sb->sb_lastrecord) { 658 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 659 sb->sb_mb, sb->sb_lastrecord, m); 660 printf("packet chain:\n"); 661 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 662 printf("\t%p\n", m); 663 panic("sblastrecordchk from %s", where); 664 } 665} 666 667void 668sblastmbufchk(struct sockbuf *sb, const char *where) 669{ 670 struct mbuf *m = sb->sb_mb; 671 struct mbuf *n; 672 673 KASSERT(solocked(sb->sb_so)); 674 675 while (m && m->m_nextpkt) 676 m = m->m_nextpkt; 677 678 while (m && m->m_next) 679 m = m->m_next; 680 681 if (m != sb->sb_mbtail) { 682 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 683 sb->sb_mb, sb->sb_mbtail, m); 684 printf("packet tree:\n"); 685 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 686 printf("\t"); 687 for (n = m; n != NULL; n = n->m_next) 688 printf("%p ", n); 689 printf("\n"); 690 } 691 panic("sblastmbufchk from %s", where); 692 } 693} 694#endif /* SOCKBUF_DEBUG */ 695 696/* 697 * Link a chain of records onto a socket buffer 698 */ 699#define SBLINKRECORDCHAIN(sb, m0, mlast) \ 700do { \ 701 if ((sb)->sb_lastrecord != NULL) \ 702 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 703 else \ 704 (sb)->sb_mb = (m0); \ 705 (sb)->sb_lastrecord = (mlast); \ 706} while (/*CONSTCOND*/0) 707 708 709#define SBLINKRECORD(sb, m0) \ 710 SBLINKRECORDCHAIN(sb, m0, m0) 711 712/* 713 * Append mbuf chain m to the last record in the 714 * socket buffer sb. The additional space associated 715 * the mbuf chain is recorded in sb. Empty mbufs are 716 * discarded and mbufs are compacted where possible. 717 */ 718void 719sbappend(struct sockbuf *sb, struct mbuf *m) 720{ 721 struct mbuf *n; 722 723 KASSERT(solocked(sb->sb_so)); 724 725 if (m == NULL) 726 return; 727 728#ifdef MBUFTRACE 729 m_claimm(m, sb->sb_mowner); 730#endif 731 732 SBLASTRECORDCHK(sb, "sbappend 1"); 733 734 if ((n = sb->sb_lastrecord) != NULL) { 735 /* 736 * XXX Would like to simply use sb_mbtail here, but 737 * XXX I need to verify that I won't miss an EOR that 738 * XXX way. 739 */ 740 do { 741 if (n->m_flags & M_EOR) { 742 sbappendrecord(sb, m); /* XXXXXX!!!! */ 743 return; 744 } 745 } while (n->m_next && (n = n->m_next)); 746 } else { 747 /* 748 * If this is the first record in the socket buffer, it's 749 * also the last record. 750 */ 751 sb->sb_lastrecord = m; 752 } 753 sbcompress(sb, m, n); 754 SBLASTRECORDCHK(sb, "sbappend 2"); 755} 756 757/* 758 * This version of sbappend() should only be used when the caller 759 * absolutely knows that there will never be more than one record 760 * in the socket buffer, that is, a stream protocol (such as TCP). 761 */ 762void 763sbappendstream(struct sockbuf *sb, struct mbuf *m) 764{ 765 766 KASSERT(solocked(sb->sb_so)); 767 KDASSERT(m->m_nextpkt == NULL); 768 KASSERT(sb->sb_mb == sb->sb_lastrecord); 769 770 SBLASTMBUFCHK(sb, __func__); 771 772#ifdef MBUFTRACE 773 m_claimm(m, sb->sb_mowner); 774#endif 775 776 sbcompress(sb, m, sb->sb_mbtail); 777 778 sb->sb_lastrecord = sb->sb_mb; 779 SBLASTRECORDCHK(sb, __func__); 780} 781 782#ifdef SOCKBUF_DEBUG 783void 784sbcheck(struct sockbuf *sb) 785{ 786 struct mbuf *m, *m2; 787 u_long len, mbcnt; 788 789 KASSERT(solocked(sb->sb_so)); 790 791 len = 0; 792 mbcnt = 0; 793 for (m = sb->sb_mb; m; m = m->m_nextpkt) { 794 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 795 len += m2->m_len; 796 mbcnt += MSIZE; 797 if (m2->m_flags & M_EXT) 798 mbcnt += m2->m_ext.ext_size; 799 if (m2->m_nextpkt != NULL) 800 panic("sbcheck nextpkt"); 801 } 802 } 803 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 804 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 805 mbcnt, sb->sb_mbcnt); 806 panic("sbcheck"); 807 } 808} 809#endif 810 811/* 812 * As above, except the mbuf chain 813 * begins a new record. 814 */ 815void 816sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 817{ 818 struct mbuf *m; 819 820 KASSERT(solocked(sb->sb_so)); 821 822 if (m0 == NULL) 823 return; 824 825#ifdef MBUFTRACE 826 m_claimm(m0, sb->sb_mowner); 827#endif 828 /* 829 * Put the first mbuf on the queue. 830 * Note this permits zero length records. 831 */ 832 sballoc(sb, m0); 833 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 834 SBLINKRECORD(sb, m0); 835 m = m0->m_next; 836 m0->m_next = 0; 837 if (m && (m0->m_flags & M_EOR)) { 838 m0->m_flags &= ~M_EOR; 839 m->m_flags |= M_EOR; 840 } 841 sbcompress(sb, m, m0); 842 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 843} 844 845/* 846 * As above except that OOB data 847 * is inserted at the beginning of the sockbuf, 848 * but after any other OOB data. 849 */ 850void 851sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 852{ 853 struct mbuf *m, **mp; 854 855 KASSERT(solocked(sb->sb_so)); 856 857 if (m0 == NULL) 858 return; 859 860 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 861 862 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 863 again: 864 switch (m->m_type) { 865 866 case MT_OOBDATA: 867 continue; /* WANT next train */ 868 869 case MT_CONTROL: 870 if ((m = m->m_next) != NULL) 871 goto again; /* inspect THIS train further */ 872 } 873 break; 874 } 875 /* 876 * Put the first mbuf on the queue. 877 * Note this permits zero length records. 878 */ 879 sballoc(sb, m0); 880 m0->m_nextpkt = *mp; 881 if (*mp == NULL) { 882 /* m0 is actually the new tail */ 883 sb->sb_lastrecord = m0; 884 } 885 *mp = m0; 886 m = m0->m_next; 887 m0->m_next = 0; 888 if (m && (m0->m_flags & M_EOR)) { 889 m0->m_flags &= ~M_EOR; 890 m->m_flags |= M_EOR; 891 } 892 sbcompress(sb, m, m0); 893 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 894} 895 896/* 897 * Append address and data, and optionally, control (ancillary) data 898 * to the receive queue of a socket. If present, 899 * m0 must include a packet header with total length. 900 * Returns 0 if no space in sockbuf or insufficient mbufs. 901 */ 902int 903sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, 904 struct mbuf *control) 905{ 906 struct mbuf *m, *n, *nlast; 907 int space, len; 908 909 KASSERT(solocked(sb->sb_so)); 910 911 space = asa->sa_len; 912 913 if (m0 != NULL) { 914 if ((m0->m_flags & M_PKTHDR) == 0) 915 panic("sbappendaddr"); 916 space += m0->m_pkthdr.len; 917#ifdef MBUFTRACE 918 m_claimm(m0, sb->sb_mowner); 919#endif 920 } 921 for (n = control; n; n = n->m_next) { 922 space += n->m_len; 923 MCLAIM(n, sb->sb_mowner); 924 if (n->m_next == NULL) /* keep pointer to last control buf */ 925 break; 926 } 927 if (space > sbspace(sb)) 928 return (0); 929 m = m_get(M_DONTWAIT, MT_SONAME); 930 if (m == NULL) 931 return (0); 932 MCLAIM(m, sb->sb_mowner); 933 /* 934 * XXX avoid 'comparison always true' warning which isn't easily 935 * avoided. 936 */ 937 len = asa->sa_len; 938 if (len > MLEN) { 939 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 940 if ((m->m_flags & M_EXT) == 0) { 941 m_free(m); 942 return (0); 943 } 944 } 945 m->m_len = asa->sa_len; 946 memcpy(mtod(m, void *), asa, asa->sa_len); 947 if (n) 948 n->m_next = m0; /* concatenate data to control */ 949 else 950 control = m0; 951 m->m_next = control; 952 953 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 954 955 for (n = m; n->m_next != NULL; n = n->m_next) 956 sballoc(sb, n); 957 sballoc(sb, n); 958 nlast = n; 959 SBLINKRECORD(sb, m); 960 961 sb->sb_mbtail = nlast; 962 SBLASTMBUFCHK(sb, "sbappendaddr"); 963 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 964 965 return (1); 966} 967 968/* 969 * Helper for sbappendchainaddr: prepend a struct sockaddr* to 970 * an mbuf chain. 971 */ 972static inline struct mbuf * 973m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0, 974 const struct sockaddr *asa) 975{ 976 struct mbuf *m; 977 const int salen = asa->sa_len; 978 979 KASSERT(solocked(sb->sb_so)); 980 981 /* only the first in each chain need be a pkthdr */ 982 m = m_gethdr(M_DONTWAIT, MT_SONAME); 983 if (m == NULL) 984 return NULL; 985 MCLAIM(m, sb->sb_mowner); 986#ifdef notyet 987 if (salen > MHLEN) { 988 MEXTMALLOC(m, salen, M_NOWAIT); 989 if ((m->m_flags & M_EXT) == 0) { 990 m_free(m); 991 return NULL; 992 } 993 } 994#else 995 KASSERT(salen <= MHLEN); 996#endif 997 m->m_len = salen; 998 memcpy(mtod(m, void *), asa, salen); 999 m->m_next = m0; 1000 m->m_pkthdr.len = salen + m0->m_pkthdr.len; 1001 1002 return m; 1003} 1004 1005int 1006sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa, 1007 struct mbuf *m0, int sbprio) 1008{ 1009 struct mbuf *m, *n, *n0, *nlast; 1010 int error; 1011 1012 KASSERT(solocked(sb->sb_so)); 1013 1014 /* 1015 * XXX sbprio reserved for encoding priority of this* request: 1016 * SB_PRIO_NONE --> honour normal sb limits 1017 * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space, 1018 * take whole chain. Intended for large requests 1019 * that should be delivered atomically (all, or none). 1020 * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow 1021 * over normal socket limits, for messages indicating 1022 * buffer overflow in earlier normal/lower-priority messages 1023 * SB_PRIO_BESTEFFORT --> ignore limits entirely. 1024 * Intended for kernel-generated messages only. 1025 * Up to generator to avoid total mbuf resource exhaustion. 1026 */ 1027 (void)sbprio; 1028 1029 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 1030 panic("sbappendaddrchain"); 1031 1032#ifdef notyet 1033 space = sbspace(sb); 1034 1035 /* 1036 * Enforce SB_PRIO_* limits as described above. 1037 */ 1038#endif 1039 1040 n0 = NULL; 1041 nlast = NULL; 1042 for (m = m0; m; m = m->m_nextpkt) { 1043 struct mbuf *np; 1044 1045#ifdef MBUFTRACE 1046 m_claimm(m, sb->sb_mowner); 1047#endif 1048 1049 /* Prepend sockaddr to this record (m) of input chain m0 */ 1050 n = m_prepend_sockaddr(sb, m, asa); 1051 if (n == NULL) { 1052 error = ENOBUFS; 1053 goto bad; 1054 } 1055 1056 /* Append record (asa+m) to end of new chain n0 */ 1057 if (n0 == NULL) { 1058 n0 = n; 1059 } else { 1060 nlast->m_nextpkt = n; 1061 } 1062 /* Keep track of last record on new chain */ 1063 nlast = n; 1064 1065 for (np = n; np; np = np->m_next) 1066 sballoc(sb, np); 1067 } 1068 1069 SBLASTRECORDCHK(sb, "sbappendaddrchain 1"); 1070 1071 /* Drop the entire chain of (asa+m) records onto the socket */ 1072 SBLINKRECORDCHAIN(sb, n0, nlast); 1073 1074 SBLASTRECORDCHK(sb, "sbappendaddrchain 2"); 1075 1076 for (m = nlast; m->m_next; m = m->m_next) 1077 ; 1078 sb->sb_mbtail = m; 1079 SBLASTMBUFCHK(sb, "sbappendaddrchain"); 1080 1081 return (1); 1082 1083bad: 1084 /* 1085 * On error, free the prepended addreseses. For consistency 1086 * with sbappendaddr(), leave it to our caller to free 1087 * the input record chain passed to us as m0. 1088 */ 1089 while ((n = n0) != NULL) { 1090 struct mbuf *np; 1091 1092 /* Undo the sballoc() of this record */ 1093 for (np = n; np; np = np->m_next) 1094 sbfree(sb, np); 1095 1096 n0 = n->m_nextpkt; /* iterate at next prepended address */ 1097 MFREE(n, np); /* free prepended address (not data) */ 1098 } 1099 return error; 1100} 1101 1102 1103int 1104sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 1105{ 1106 struct mbuf *m, *mlast, *n; 1107 int space; 1108 1109 KASSERT(solocked(sb->sb_so)); 1110 1111 space = 0; 1112 if (control == NULL) 1113 panic("sbappendcontrol"); 1114 for (m = control; ; m = m->m_next) { 1115 space += m->m_len; 1116 MCLAIM(m, sb->sb_mowner); 1117 if (m->m_next == NULL) 1118 break; 1119 } 1120 n = m; /* save pointer to last control buffer */ 1121 for (m = m0; m; m = m->m_next) { 1122 MCLAIM(m, sb->sb_mowner); 1123 space += m->m_len; 1124 } 1125 if (space > sbspace(sb)) 1126 return (0); 1127 n->m_next = m0; /* concatenate data to control */ 1128 1129 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 1130 1131 for (m = control; m->m_next != NULL; m = m->m_next) 1132 sballoc(sb, m); 1133 sballoc(sb, m); 1134 mlast = m; 1135 SBLINKRECORD(sb, control); 1136 1137 sb->sb_mbtail = mlast; 1138 SBLASTMBUFCHK(sb, "sbappendcontrol"); 1139 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 1140 1141 return (1); 1142} 1143 1144/* 1145 * Compress mbuf chain m into the socket 1146 * buffer sb following mbuf n. If n 1147 * is null, the buffer is presumed empty. 1148 */ 1149void 1150sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1151{ 1152 int eor; 1153 struct mbuf *o; 1154 1155 KASSERT(solocked(sb->sb_so)); 1156 1157 eor = 0; 1158 while (m) { 1159 eor |= m->m_flags & M_EOR; 1160 if (m->m_len == 0 && 1161 (eor == 0 || 1162 (((o = m->m_next) || (o = n)) && 1163 o->m_type == m->m_type))) { 1164 if (sb->sb_lastrecord == m) 1165 sb->sb_lastrecord = m->m_next; 1166 m = m_free(m); 1167 continue; 1168 } 1169 if (n && (n->m_flags & M_EOR) == 0 && 1170 /* M_TRAILINGSPACE() checks buffer writeability */ 1171 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 1172 m->m_len <= M_TRAILINGSPACE(n) && 1173 n->m_type == m->m_type) { 1174 memcpy(mtod(n, char *) + n->m_len, mtod(m, void *), 1175 (unsigned)m->m_len); 1176 n->m_len += m->m_len; 1177 sb->sb_cc += m->m_len; 1178 m = m_free(m); 1179 continue; 1180 } 1181 if (n) 1182 n->m_next = m; 1183 else 1184 sb->sb_mb = m; 1185 sb->sb_mbtail = m; 1186 sballoc(sb, m); 1187 n = m; 1188 m->m_flags &= ~M_EOR; 1189 m = m->m_next; 1190 n->m_next = 0; 1191 } 1192 if (eor) { 1193 if (n) 1194 n->m_flags |= eor; 1195 else 1196 printf("semi-panic: sbcompress\n"); 1197 } 1198 SBLASTMBUFCHK(sb, __func__); 1199} 1200 1201/* 1202 * Free all mbufs in a sockbuf. 1203 * Check that all resources are reclaimed. 1204 */ 1205void 1206sbflush(struct sockbuf *sb) 1207{ 1208 1209 KASSERT(solocked(sb->sb_so)); 1210 KASSERT((sb->sb_flags & SB_LOCK) == 0); 1211 1212 while (sb->sb_mbcnt) 1213 sbdrop(sb, (int)sb->sb_cc); 1214 1215 KASSERT(sb->sb_cc == 0); 1216 KASSERT(sb->sb_mb == NULL); 1217 KASSERT(sb->sb_mbtail == NULL); 1218 KASSERT(sb->sb_lastrecord == NULL); 1219} 1220 1221/* 1222 * Drop data from (the front of) a sockbuf. 1223 */ 1224void 1225sbdrop(struct sockbuf *sb, int len) 1226{ 1227 struct mbuf *m, *mn, *next; 1228 1229 KASSERT(solocked(sb->sb_so)); 1230 1231 next = (m = sb->sb_mb) ? m->m_nextpkt : NULL; 1232 while (len > 0) { 1233 if (m == NULL) { 1234 if (next == NULL) 1235 panic("sbdrop(%p,%d): cc=%lu", 1236 sb, len, sb->sb_cc); 1237 m = next; 1238 next = m->m_nextpkt; 1239 continue; 1240 } 1241 if (m->m_len > len) { 1242 m->m_len -= len; 1243 m->m_data += len; 1244 sb->sb_cc -= len; 1245 break; 1246 } 1247 len -= m->m_len; 1248 sbfree(sb, m); 1249 MFREE(m, mn); 1250 m = mn; 1251 } 1252 while (m && m->m_len == 0) { 1253 sbfree(sb, m); 1254 MFREE(m, mn); 1255 m = mn; 1256 } 1257 if (m) { 1258 sb->sb_mb = m; 1259 m->m_nextpkt = next; 1260 } else 1261 sb->sb_mb = next; 1262 /* 1263 * First part is an inline SB_EMPTY_FIXUP(). Second part 1264 * makes sure sb_lastrecord is up-to-date if we dropped 1265 * part of the last record. 1266 */ 1267 m = sb->sb_mb; 1268 if (m == NULL) { 1269 sb->sb_mbtail = NULL; 1270 sb->sb_lastrecord = NULL; 1271 } else if (m->m_nextpkt == NULL) 1272 sb->sb_lastrecord = m; 1273} 1274 1275/* 1276 * Drop a record off the front of a sockbuf 1277 * and move the next record to the front. 1278 */ 1279void 1280sbdroprecord(struct sockbuf *sb) 1281{ 1282 struct mbuf *m, *mn; 1283 1284 KASSERT(solocked(sb->sb_so)); 1285 1286 m = sb->sb_mb; 1287 if (m) { 1288 sb->sb_mb = m->m_nextpkt; 1289 do { 1290 sbfree(sb, m); 1291 MFREE(m, mn); 1292 } while ((m = mn) != NULL); 1293 } 1294 SB_EMPTY_FIXUP(sb); 1295} 1296 1297/* 1298 * Create a "control" mbuf containing the specified data 1299 * with the specified type for presentation on a socket buffer. 1300 */ 1301struct mbuf * 1302sbcreatecontrol1(void **p, int size, int type, int level, int flags) 1303{ 1304 struct cmsghdr *cp; 1305 struct mbuf *m; 1306 int space = CMSG_SPACE(size); 1307 1308 if ((flags & M_DONTWAIT) && space > MCLBYTES) { 1309 printf("%s: message too large %d\n", __func__, space); 1310 return NULL; 1311 } 1312 1313 if ((m = m_get(flags, MT_CONTROL)) == NULL) 1314 return NULL; 1315 if (space > MLEN) { 1316 if (space > MCLBYTES) 1317 MEXTMALLOC(m, space, M_WAITOK); 1318 else 1319 MCLGET(m, flags); 1320 if ((m->m_flags & M_EXT) == 0) { 1321 m_free(m); 1322 return NULL; 1323 } 1324 } 1325 cp = mtod(m, struct cmsghdr *); 1326 *p = CMSG_DATA(cp); 1327 m->m_len = space; 1328 cp->cmsg_len = CMSG_LEN(size); 1329 cp->cmsg_level = level; 1330 cp->cmsg_type = type; 1331 return m; 1332} 1333 1334struct mbuf * 1335sbcreatecontrol(void *p, int size, int type, int level) 1336{ 1337 struct mbuf *m; 1338 void *v; 1339 1340 m = sbcreatecontrol1(&v, size, type, level, M_DONTWAIT); 1341 if (m == NULL) 1342 return NULL; 1343 memcpy(v, p, size); 1344 return m; 1345} 1346 1347void 1348solockretry(struct socket *so, kmutex_t *lock) 1349{ 1350 1351 while (lock != so->so_lock) { 1352 mutex_exit(lock); 1353 lock = so->so_lock; 1354 mutex_enter(lock); 1355 } 1356} 1357 1358bool 1359solocked(struct socket *so) 1360{ 1361 1362 return mutex_owned(so->so_lock); 1363} 1364 1365bool 1366solocked2(struct socket *so1, struct socket *so2) 1367{ 1368 kmutex_t *lock; 1369 1370 lock = so1->so_lock; 1371 if (lock != so2->so_lock) 1372 return false; 1373 return mutex_owned(lock); 1374} 1375 1376/* 1377 * Assign a default lock to a new socket. For PRU_ATTACH, and done by 1378 * protocols that do not have special locking requirements. 1379 */ 1380void 1381sosetlock(struct socket *so) 1382{ 1383 kmutex_t *lock; 1384 1385 if (so->so_lock == NULL) { 1386 lock = softnet_lock; 1387 so->so_lock = lock; 1388 mutex_obj_hold(lock); 1389 mutex_enter(lock); 1390 } 1391 1392 /* In all cases, lock must be held on return from PRU_ATTACH. */ 1393 KASSERT(solocked(so)); 1394} 1395 1396/* 1397 * Set lock on sockbuf sb; sleep if lock is already held. 1398 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. 1399 * Returns error without lock if sleep is interrupted. 1400 */ 1401int 1402sblock(struct sockbuf *sb, int wf) 1403{ 1404 struct socket *so; 1405 kmutex_t *lock; 1406 int error; 1407 1408 KASSERT(solocked(sb->sb_so)); 1409 1410 for (;;) { 1411 if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) { 1412 sb->sb_flags |= SB_LOCK; 1413 return 0; 1414 } 1415 if (wf != M_WAITOK) 1416 return EWOULDBLOCK; 1417 so = sb->sb_so; 1418 lock = so->so_lock; 1419 if ((sb->sb_flags & SB_NOINTR) != 0) { 1420 cv_wait(&so->so_cv, lock); 1421 error = 0; 1422 } else 1423 error = cv_wait_sig(&so->so_cv, lock); 1424 if (__predict_false(lock != so->so_lock)) 1425 solockretry(so, lock); 1426 if (error != 0) 1427 return error; 1428 } 1429} 1430 1431void 1432sbunlock(struct sockbuf *sb) 1433{ 1434 struct socket *so; 1435 1436 so = sb->sb_so; 1437 1438 KASSERT(solocked(so)); 1439 KASSERT((sb->sb_flags & SB_LOCK) != 0); 1440 1441 sb->sb_flags &= ~SB_LOCK; 1442 cv_broadcast(&so->so_cv); 1443} 1444 1445int 1446sowait(struct socket *so, bool catch, int timo) 1447{ 1448 kmutex_t *lock; 1449 int error; 1450 1451 KASSERT(solocked(so)); 1452 KASSERT(catch || timo != 0); 1453 1454 lock = so->so_lock; 1455 if (catch) 1456 error = cv_timedwait_sig(&so->so_cv, lock, timo); 1457 else 1458 error = cv_timedwait(&so->so_cv, lock, timo); 1459 if (__predict_false(lock != so->so_lock)) 1460 solockretry(so, lock); 1461 return error; 1462} 1463