uipc_socket2.c revision 1.103
1/* $NetBSD: uipc_socket2.c,v 1.103 2009/07/24 01:09:49 christos Exp $ */ 2 3/*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 * Copyright (c) 1982, 1986, 1988, 1990, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 58 */ 59 60#include <sys/cdefs.h> 61__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.103 2009/07/24 01:09:49 christos Exp $"); 62 63#include "opt_mbuftrace.h" 64#include "opt_sb_max.h" 65 66#include <sys/param.h> 67#include <sys/systm.h> 68#include <sys/proc.h> 69#include <sys/file.h> 70#include <sys/buf.h> 71#include <sys/malloc.h> 72#include <sys/mbuf.h> 73#include <sys/protosw.h> 74#include <sys/domain.h> 75#include <sys/poll.h> 76#include <sys/socket.h> 77#include <sys/socketvar.h> 78#include <sys/signalvar.h> 79#include <sys/kauth.h> 80#include <sys/pool.h> 81#include <sys/uidinfo.h> 82 83/* 84 * Primitive routines for operating on sockets and socket buffers. 85 * 86 * Locking rules and assumptions: 87 * 88 * o socket::so_lock can change on the fly. The low level routines used 89 * to lock sockets are aware of this. When so_lock is acquired, the 90 * routine locking must check to see if so_lock still points to the 91 * lock that was acquired. If so_lock has changed in the meantime, the 92 * now irellevant lock that was acquired must be dropped and the lock 93 * operation retried. Although not proven here, this is completely safe 94 * on a multiprocessor system, even with relaxed memory ordering, given 95 * the next two rules: 96 * 97 * o In order to mutate so_lock, the lock pointed to by the current value 98 * of so_lock must be held: i.e., the socket must be held locked by the 99 * changing thread. The thread must issue membar_exit() to prevent 100 * memory accesses being reordered, and can set so_lock to the desired 101 * value. If the lock pointed to by the new value of so_lock is not 102 * held by the changing thread, the socket must then be considered 103 * unlocked. 104 * 105 * o If so_lock is mutated, and the previous lock referred to by so_lock 106 * could still be visible to other threads in the system (e.g. via file 107 * descriptor or protocol-internal reference), then the old lock must 108 * remain valid until the socket and/or protocol control block has been 109 * torn down. 110 * 111 * o If a socket has a non-NULL so_head value (i.e. is in the process of 112 * connecting), then locking the socket must also lock the socket pointed 113 * to by so_head: their lock pointers must match. 114 * 115 * o If a socket has connections in progress (so_q, so_q0 not empty) then 116 * locking the socket must also lock the sockets attached to both queues. 117 * Again, their lock pointers must match. 118 * 119 * o Beyond the initial lock assigment in socreate(), assigning locks to 120 * sockets is the responsibility of the individual protocols / protocol 121 * domains. 122 */ 123 124static pool_cache_t socket_cache; 125 126u_long sb_max = SB_MAX; /* maximum socket buffer size */ 127static u_long sb_max_adj; /* adjusted sb_max */ 128 129/* 130 * Procedures to manipulate state flags of socket 131 * and do appropriate wakeups. Normal sequence from the 132 * active (originating) side is that soisconnecting() is 133 * called during processing of connect() call, 134 * resulting in an eventual call to soisconnected() if/when the 135 * connection is established. When the connection is torn down 136 * soisdisconnecting() is called during processing of disconnect() call, 137 * and soisdisconnected() is called when the connection to the peer 138 * is totally severed. The semantics of these routines are such that 139 * connectionless protocols can call soisconnected() and soisdisconnected() 140 * only, bypassing the in-progress calls when setting up a ``connection'' 141 * takes no time. 142 * 143 * From the passive side, a socket is created with 144 * two queues of sockets: so_q0 for connections in progress 145 * and so_q for connections already made and awaiting user acceptance. 146 * As a protocol is preparing incoming connections, it creates a socket 147 * structure queued on so_q0 by calling sonewconn(). When the connection 148 * is established, soisconnected() is called, and transfers the 149 * socket structure to so_q, making it available to accept(). 150 * 151 * If a socket is closed with sockets on either 152 * so_q0 or so_q, these sockets are dropped. 153 * 154 * If higher level protocols are implemented in 155 * the kernel, the wakeups done here will sometimes 156 * cause software-interrupt process scheduling. 157 */ 158 159void 160soisconnecting(struct socket *so) 161{ 162 163 KASSERT(solocked(so)); 164 165 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 166 so->so_state |= SS_ISCONNECTING; 167} 168 169void 170soisconnected(struct socket *so) 171{ 172 struct socket *head; 173 174 head = so->so_head; 175 176 KASSERT(solocked(so)); 177 KASSERT(head == NULL || solocked2(so, head)); 178 179 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 180 so->so_state |= SS_ISCONNECTED; 181 if (head && so->so_onq == &head->so_q0) { 182 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 183 soqremque(so, 0); 184 soqinsque(head, so, 1); 185 sorwakeup(head); 186 cv_broadcast(&head->so_cv); 187 } else { 188 so->so_upcall = 189 head->so_accf->so_accept_filter->accf_callback; 190 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 191 so->so_rcv.sb_flags |= SB_UPCALL; 192 so->so_options &= ~SO_ACCEPTFILTER; 193 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 194 } 195 } else { 196 cv_broadcast(&so->so_cv); 197 sorwakeup(so); 198 sowwakeup(so); 199 } 200} 201 202void 203soisdisconnecting(struct socket *so) 204{ 205 206 KASSERT(solocked(so)); 207 208 so->so_state &= ~SS_ISCONNECTING; 209 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 210 cv_broadcast(&so->so_cv); 211 sowwakeup(so); 212 sorwakeup(so); 213} 214 215void 216soisdisconnected(struct socket *so) 217{ 218 219 KASSERT(solocked(so)); 220 221 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 222 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 223 cv_broadcast(&so->so_cv); 224 sowwakeup(so); 225 sorwakeup(so); 226} 227 228void 229soinit2(void) 230{ 231 232 socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0, 233 "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL); 234} 235 236/* 237 * When an attempt at a new connection is noted on a socket 238 * which accepts connections, sonewconn is called. If the 239 * connection is possible (subject to space constraints, etc.) 240 * then we allocate a new structure, propoerly linked into the 241 * data structure of the original socket, and return this. 242 * Connstatus may be 0, SS_ISCONFIRMING, or SS_ISCONNECTED. 243 */ 244struct socket * 245sonewconn(struct socket *head, int connstatus) 246{ 247 struct socket *so; 248 int soqueue, error; 249 250 KASSERT(connstatus == 0 || connstatus == SS_ISCONFIRMING || 251 connstatus == SS_ISCONNECTED); 252 KASSERT(solocked(head)); 253 254 if ((head->so_options & SO_ACCEPTFILTER) != 0) 255 connstatus = 0; 256 soqueue = connstatus ? 1 : 0; 257 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 258 return NULL; 259 so = soget(false); 260 if (so == NULL) 261 return NULL; 262 mutex_obj_hold(head->so_lock); 263 so->so_lock = head->so_lock; 264 so->so_type = head->so_type; 265 so->so_options = head->so_options &~ SO_ACCEPTCONN; 266 so->so_linger = head->so_linger; 267 so->so_state = head->so_state | SS_NOFDREF; 268 so->so_nbio = head->so_nbio; 269 so->so_proto = head->so_proto; 270 so->so_timeo = head->so_timeo; 271 so->so_pgid = head->so_pgid; 272 so->so_send = head->so_send; 273 so->so_receive = head->so_receive; 274 so->so_uidinfo = head->so_uidinfo; 275 so->so_egid = head->so_egid; 276 so->so_cpid = head->so_cpid; 277#ifdef MBUFTRACE 278 so->so_mowner = head->so_mowner; 279 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 280 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 281#endif 282 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) != 0) 283 goto out; 284 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 285 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 286 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 287 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 288 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; 289 so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; 290 soqinsque(head, so, soqueue); 291 error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, 292 NULL, NULL); 293 KASSERT(solocked(so)); 294 if (error != 0) { 295 (void) soqremque(so, soqueue); 296out: 297 /* 298 * Remove acccept filter if one is present. 299 * XXX Is this really needed? 300 */ 301 if (so->so_accf != NULL) 302 (void)accept_filt_clear(so); 303 soput(so); 304 return NULL; 305 } 306 if (connstatus) { 307 sorwakeup(head); 308 cv_broadcast(&head->so_cv); 309 so->so_state |= connstatus; 310 } 311 return so; 312} 313 314struct socket * 315soget(bool waitok) 316{ 317 struct socket *so; 318 319 so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT)); 320 if (__predict_false(so == NULL)) 321 return (NULL); 322 memset(so, 0, sizeof(*so)); 323 TAILQ_INIT(&so->so_q0); 324 TAILQ_INIT(&so->so_q); 325 cv_init(&so->so_cv, "socket"); 326 cv_init(&so->so_rcv.sb_cv, "netio"); 327 cv_init(&so->so_snd.sb_cv, "netio"); 328 selinit(&so->so_rcv.sb_sel); 329 selinit(&so->so_snd.sb_sel); 330 so->so_rcv.sb_so = so; 331 so->so_snd.sb_so = so; 332 return so; 333} 334 335void 336soput(struct socket *so) 337{ 338 339 KASSERT(!cv_has_waiters(&so->so_cv)); 340 KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); 341 KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); 342 seldestroy(&so->so_rcv.sb_sel); 343 seldestroy(&so->so_snd.sb_sel); 344 mutex_obj_free(so->so_lock); 345 cv_destroy(&so->so_cv); 346 cv_destroy(&so->so_rcv.sb_cv); 347 cv_destroy(&so->so_snd.sb_cv); 348 pool_cache_put(socket_cache, so); 349} 350 351void 352soqinsque(struct socket *head, struct socket *so, int q) 353{ 354 355 KASSERT(solocked2(head, so)); 356 357#ifdef DIAGNOSTIC 358 if (so->so_onq != NULL) 359 panic("soqinsque"); 360#endif 361 362 so->so_head = head; 363 if (q == 0) { 364 head->so_q0len++; 365 so->so_onq = &head->so_q0; 366 } else { 367 head->so_qlen++; 368 so->so_onq = &head->so_q; 369 } 370 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 371} 372 373int 374soqremque(struct socket *so, int q) 375{ 376 struct socket *head; 377 378 head = so->so_head; 379 380 KASSERT(solocked(so)); 381 if (q == 0) { 382 if (so->so_onq != &head->so_q0) 383 return (0); 384 head->so_q0len--; 385 } else { 386 if (so->so_onq != &head->so_q) 387 return (0); 388 head->so_qlen--; 389 } 390 KASSERT(solocked2(so, head)); 391 TAILQ_REMOVE(so->so_onq, so, so_qe); 392 so->so_onq = NULL; 393 so->so_head = NULL; 394 return (1); 395} 396 397/* 398 * Socantsendmore indicates that no more data will be sent on the 399 * socket; it would normally be applied to a socket when the user 400 * informs the system that no more data is to be sent, by the protocol 401 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 402 * will be received, and will normally be applied to the socket by a 403 * protocol when it detects that the peer will send no more data. 404 * Data queued for reading in the socket may yet be read. 405 */ 406 407void 408socantsendmore(struct socket *so) 409{ 410 411 KASSERT(solocked(so)); 412 413 so->so_state |= SS_CANTSENDMORE; 414 sowwakeup(so); 415} 416 417void 418socantrcvmore(struct socket *so) 419{ 420 421 KASSERT(solocked(so)); 422 423 so->so_state |= SS_CANTRCVMORE; 424 sorwakeup(so); 425} 426 427/* 428 * Wait for data to arrive at/drain from a socket buffer. 429 */ 430int 431sbwait(struct sockbuf *sb) 432{ 433 struct socket *so; 434 kmutex_t *lock; 435 int error; 436 437 so = sb->sb_so; 438 439 KASSERT(solocked(so)); 440 441 sb->sb_flags |= SB_NOTIFY; 442 lock = so->so_lock; 443 if ((sb->sb_flags & SB_NOINTR) != 0) 444 error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo); 445 else 446 error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo); 447 if (__predict_false(lock != so->so_lock)) 448 solockretry(so, lock); 449 return error; 450} 451 452/* 453 * Wakeup processes waiting on a socket buffer. 454 * Do asynchronous notification via SIGIO 455 * if the socket buffer has the SB_ASYNC flag set. 456 */ 457void 458sowakeup(struct socket *so, struct sockbuf *sb, int code) 459{ 460 int band; 461 462 KASSERT(solocked(so)); 463 KASSERT(sb->sb_so == so); 464 465 if (code == POLL_IN) 466 band = POLLIN|POLLRDNORM; 467 else 468 band = POLLOUT|POLLWRNORM; 469 sb->sb_flags &= ~SB_NOTIFY; 470 selnotify(&sb->sb_sel, band, NOTE_SUBMIT); 471 cv_broadcast(&sb->sb_cv); 472 if (sb->sb_flags & SB_ASYNC) 473 fownsignal(so->so_pgid, SIGIO, code, band, so); 474 if (sb->sb_flags & SB_UPCALL) 475 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 476} 477 478/* 479 * Reset a socket's lock pointer. Wake all threads waiting on the 480 * socket's condition variables so that they can restart their waits 481 * using the new lock. The existing lock must be held. 482 */ 483void 484solockreset(struct socket *so, kmutex_t *lock) 485{ 486 487 KASSERT(solocked(so)); 488 489 so->so_lock = lock; 490 cv_broadcast(&so->so_snd.sb_cv); 491 cv_broadcast(&so->so_rcv.sb_cv); 492 cv_broadcast(&so->so_cv); 493} 494 495/* 496 * Socket buffer (struct sockbuf) utility routines. 497 * 498 * Each socket contains two socket buffers: one for sending data and 499 * one for receiving data. Each buffer contains a queue of mbufs, 500 * information about the number of mbufs and amount of data in the 501 * queue, and other fields allowing poll() statements and notification 502 * on data availability to be implemented. 503 * 504 * Data stored in a socket buffer is maintained as a list of records. 505 * Each record is a list of mbufs chained together with the m_next 506 * field. Records are chained together with the m_nextpkt field. The upper 507 * level routine soreceive() expects the following conventions to be 508 * observed when placing information in the receive buffer: 509 * 510 * 1. If the protocol requires each message be preceded by the sender's 511 * name, then a record containing that name must be present before 512 * any associated data (mbuf's must be of type MT_SONAME). 513 * 2. If the protocol supports the exchange of ``access rights'' (really 514 * just additional data associated with the message), and there are 515 * ``rights'' to be received, then a record containing this data 516 * should be present (mbuf's must be of type MT_CONTROL). 517 * 3. If a name or rights record exists, then it must be followed by 518 * a data record, perhaps of zero length. 519 * 520 * Before using a new socket structure it is first necessary to reserve 521 * buffer space to the socket, by calling sbreserve(). This should commit 522 * some of the available buffer space in the system buffer pool for the 523 * socket (currently, it does nothing but enforce limits). The space 524 * should be released by calling sbrelease() when the socket is destroyed. 525 */ 526 527int 528sb_max_set(u_long new_sbmax) 529{ 530 int s; 531 532 if (new_sbmax < (16 * 1024)) 533 return (EINVAL); 534 535 s = splsoftnet(); 536 sb_max = new_sbmax; 537 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 538 splx(s); 539 540 return (0); 541} 542 543int 544soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 545{ 546 547 KASSERT(so->so_lock == NULL || solocked(so)); 548 549 /* 550 * there's at least one application (a configure script of screen) 551 * which expects a fifo is writable even if it has "some" bytes 552 * in its buffer. 553 * so we want to make sure (hiwat - lowat) >= (some bytes). 554 * 555 * PIPE_BUF here is an arbitrary value chosen as (some bytes) above. 556 * we expect it's large enough for such applications. 557 */ 558 u_long lowat = MAX(sock_loan_thresh, MCLBYTES); 559 u_long hiwat = lowat + PIPE_BUF; 560 561 if (sndcc < hiwat) 562 sndcc = hiwat; 563 if (sbreserve(&so->so_snd, sndcc, so) == 0) 564 goto bad; 565 if (sbreserve(&so->so_rcv, rcvcc, so) == 0) 566 goto bad2; 567 if (so->so_rcv.sb_lowat == 0) 568 so->so_rcv.sb_lowat = 1; 569 if (so->so_snd.sb_lowat == 0) 570 so->so_snd.sb_lowat = lowat; 571 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 572 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 573 return (0); 574 bad2: 575 sbrelease(&so->so_snd, so); 576 bad: 577 return (ENOBUFS); 578} 579 580/* 581 * Allot mbufs to a sockbuf. 582 * Attempt to scale mbmax so that mbcnt doesn't become limiting 583 * if buffering efficiency is near the normal case. 584 */ 585int 586sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) 587{ 588 struct lwp *l = curlwp; /* XXX */ 589 rlim_t maxcc; 590 struct uidinfo *uidinfo; 591 592 KASSERT(so->so_lock == NULL || solocked(so)); 593 KASSERT(sb->sb_so == so); 594 KASSERT(sb_max_adj != 0); 595 596 if (cc == 0 || cc > sb_max_adj) 597 return (0); 598 599 if (kauth_cred_geteuid(l->l_cred) == so->so_uidinfo->ui_uid) 600 maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur; 601 else 602 maxcc = RLIM_INFINITY; 603 604 uidinfo = so->so_uidinfo; 605 if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc)) 606 return 0; 607 sb->sb_mbmax = min(cc * 2, sb_max); 608 if (sb->sb_lowat > sb->sb_hiwat) 609 sb->sb_lowat = sb->sb_hiwat; 610 return (1); 611} 612 613/* 614 * Free mbufs held by a socket, and reserved mbuf space. We do not assert 615 * that the socket is held locked here: see sorflush(). 616 */ 617void 618sbrelease(struct sockbuf *sb, struct socket *so) 619{ 620 621 KASSERT(sb->sb_so == so); 622 623 sbflush(sb); 624 (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); 625 sb->sb_mbmax = 0; 626} 627 628/* 629 * Routines to add and remove 630 * data from an mbuf queue. 631 * 632 * The routines sbappend() or sbappendrecord() are normally called to 633 * append new mbufs to a socket buffer, after checking that adequate 634 * space is available, comparing the function sbspace() with the amount 635 * of data to be added. sbappendrecord() differs from sbappend() in 636 * that data supplied is treated as the beginning of a new record. 637 * To place a sender's address, optional access rights, and data in a 638 * socket receive buffer, sbappendaddr() should be used. To place 639 * access rights and data in a socket receive buffer, sbappendrights() 640 * should be used. In either case, the new data begins a new record. 641 * Note that unlike sbappend() and sbappendrecord(), these routines check 642 * for the caller that there will be enough space to store the data. 643 * Each fails if there is not enough space, or if it cannot find mbufs 644 * to store additional information in. 645 * 646 * Reliable protocols may use the socket send buffer to hold data 647 * awaiting acknowledgement. Data is normally copied from a socket 648 * send buffer in a protocol with m_copy for output to a peer, 649 * and then removing the data from the socket buffer with sbdrop() 650 * or sbdroprecord() when the data is acknowledged by the peer. 651 */ 652 653#ifdef SOCKBUF_DEBUG 654void 655sblastrecordchk(struct sockbuf *sb, const char *where) 656{ 657 struct mbuf *m = sb->sb_mb; 658 659 KASSERT(solocked(sb->sb_so)); 660 661 while (m && m->m_nextpkt) 662 m = m->m_nextpkt; 663 664 if (m != sb->sb_lastrecord) { 665 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 666 sb->sb_mb, sb->sb_lastrecord, m); 667 printf("packet chain:\n"); 668 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 669 printf("\t%p\n", m); 670 panic("sblastrecordchk from %s", where); 671 } 672} 673 674void 675sblastmbufchk(struct sockbuf *sb, const char *where) 676{ 677 struct mbuf *m = sb->sb_mb; 678 struct mbuf *n; 679 680 KASSERT(solocked(sb->sb_so)); 681 682 while (m && m->m_nextpkt) 683 m = m->m_nextpkt; 684 685 while (m && m->m_next) 686 m = m->m_next; 687 688 if (m != sb->sb_mbtail) { 689 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 690 sb->sb_mb, sb->sb_mbtail, m); 691 printf("packet tree:\n"); 692 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 693 printf("\t"); 694 for (n = m; n != NULL; n = n->m_next) 695 printf("%p ", n); 696 printf("\n"); 697 } 698 panic("sblastmbufchk from %s", where); 699 } 700} 701#endif /* SOCKBUF_DEBUG */ 702 703/* 704 * Link a chain of records onto a socket buffer 705 */ 706#define SBLINKRECORDCHAIN(sb, m0, mlast) \ 707do { \ 708 if ((sb)->sb_lastrecord != NULL) \ 709 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 710 else \ 711 (sb)->sb_mb = (m0); \ 712 (sb)->sb_lastrecord = (mlast); \ 713} while (/*CONSTCOND*/0) 714 715 716#define SBLINKRECORD(sb, m0) \ 717 SBLINKRECORDCHAIN(sb, m0, m0) 718 719/* 720 * Append mbuf chain m to the last record in the 721 * socket buffer sb. The additional space associated 722 * the mbuf chain is recorded in sb. Empty mbufs are 723 * discarded and mbufs are compacted where possible. 724 */ 725void 726sbappend(struct sockbuf *sb, struct mbuf *m) 727{ 728 struct mbuf *n; 729 730 KASSERT(solocked(sb->sb_so)); 731 732 if (m == 0) 733 return; 734 735#ifdef MBUFTRACE 736 m_claimm(m, sb->sb_mowner); 737#endif 738 739 SBLASTRECORDCHK(sb, "sbappend 1"); 740 741 if ((n = sb->sb_lastrecord) != NULL) { 742 /* 743 * XXX Would like to simply use sb_mbtail here, but 744 * XXX I need to verify that I won't miss an EOR that 745 * XXX way. 746 */ 747 do { 748 if (n->m_flags & M_EOR) { 749 sbappendrecord(sb, m); /* XXXXXX!!!! */ 750 return; 751 } 752 } while (n->m_next && (n = n->m_next)); 753 } else { 754 /* 755 * If this is the first record in the socket buffer, it's 756 * also the last record. 757 */ 758 sb->sb_lastrecord = m; 759 } 760 sbcompress(sb, m, n); 761 SBLASTRECORDCHK(sb, "sbappend 2"); 762} 763 764/* 765 * This version of sbappend() should only be used when the caller 766 * absolutely knows that there will never be more than one record 767 * in the socket buffer, that is, a stream protocol (such as TCP). 768 */ 769void 770sbappendstream(struct sockbuf *sb, struct mbuf *m) 771{ 772 773 KASSERT(solocked(sb->sb_so)); 774 KDASSERT(m->m_nextpkt == NULL); 775 KASSERT(sb->sb_mb == sb->sb_lastrecord); 776 777 SBLASTMBUFCHK(sb, __func__); 778 779#ifdef MBUFTRACE 780 m_claimm(m, sb->sb_mowner); 781#endif 782 783 sbcompress(sb, m, sb->sb_mbtail); 784 785 sb->sb_lastrecord = sb->sb_mb; 786 SBLASTRECORDCHK(sb, __func__); 787} 788 789#ifdef SOCKBUF_DEBUG 790void 791sbcheck(struct sockbuf *sb) 792{ 793 struct mbuf *m, *m2; 794 u_long len, mbcnt; 795 796 KASSERT(solocked(sb->sb_so)); 797 798 len = 0; 799 mbcnt = 0; 800 for (m = sb->sb_mb; m; m = m->m_nextpkt) { 801 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 802 len += m2->m_len; 803 mbcnt += MSIZE; 804 if (m2->m_flags & M_EXT) 805 mbcnt += m2->m_ext.ext_size; 806 if (m2->m_nextpkt != NULL) 807 panic("sbcheck nextpkt"); 808 } 809 } 810 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 811 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 812 mbcnt, sb->sb_mbcnt); 813 panic("sbcheck"); 814 } 815} 816#endif 817 818/* 819 * As above, except the mbuf chain 820 * begins a new record. 821 */ 822void 823sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 824{ 825 struct mbuf *m; 826 827 KASSERT(solocked(sb->sb_so)); 828 829 if (m0 == 0) 830 return; 831 832#ifdef MBUFTRACE 833 m_claimm(m0, sb->sb_mowner); 834#endif 835 /* 836 * Put the first mbuf on the queue. 837 * Note this permits zero length records. 838 */ 839 sballoc(sb, m0); 840 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 841 SBLINKRECORD(sb, m0); 842 m = m0->m_next; 843 m0->m_next = 0; 844 if (m && (m0->m_flags & M_EOR)) { 845 m0->m_flags &= ~M_EOR; 846 m->m_flags |= M_EOR; 847 } 848 sbcompress(sb, m, m0); 849 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 850} 851 852/* 853 * As above except that OOB data 854 * is inserted at the beginning of the sockbuf, 855 * but after any other OOB data. 856 */ 857void 858sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 859{ 860 struct mbuf *m, **mp; 861 862 KASSERT(solocked(sb->sb_so)); 863 864 if (m0 == 0) 865 return; 866 867 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 868 869 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 870 again: 871 switch (m->m_type) { 872 873 case MT_OOBDATA: 874 continue; /* WANT next train */ 875 876 case MT_CONTROL: 877 if ((m = m->m_next) != NULL) 878 goto again; /* inspect THIS train further */ 879 } 880 break; 881 } 882 /* 883 * Put the first mbuf on the queue. 884 * Note this permits zero length records. 885 */ 886 sballoc(sb, m0); 887 m0->m_nextpkt = *mp; 888 if (*mp == NULL) { 889 /* m0 is actually the new tail */ 890 sb->sb_lastrecord = m0; 891 } 892 *mp = m0; 893 m = m0->m_next; 894 m0->m_next = 0; 895 if (m && (m0->m_flags & M_EOR)) { 896 m0->m_flags &= ~M_EOR; 897 m->m_flags |= M_EOR; 898 } 899 sbcompress(sb, m, m0); 900 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 901} 902 903/* 904 * Append address and data, and optionally, control (ancillary) data 905 * to the receive queue of a socket. If present, 906 * m0 must include a packet header with total length. 907 * Returns 0 if no space in sockbuf or insufficient mbufs. 908 */ 909int 910sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, 911 struct mbuf *control) 912{ 913 struct mbuf *m, *n, *nlast; 914 int space, len; 915 916 KASSERT(solocked(sb->sb_so)); 917 918 space = asa->sa_len; 919 920 if (m0 != NULL) { 921 if ((m0->m_flags & M_PKTHDR) == 0) 922 panic("sbappendaddr"); 923 space += m0->m_pkthdr.len; 924#ifdef MBUFTRACE 925 m_claimm(m0, sb->sb_mowner); 926#endif 927 } 928 for (n = control; n; n = n->m_next) { 929 space += n->m_len; 930 MCLAIM(n, sb->sb_mowner); 931 if (n->m_next == 0) /* keep pointer to last control buf */ 932 break; 933 } 934 if (space > sbspace(sb)) 935 return (0); 936 MGET(m, M_DONTWAIT, MT_SONAME); 937 if (m == 0) 938 return (0); 939 MCLAIM(m, sb->sb_mowner); 940 /* 941 * XXX avoid 'comparison always true' warning which isn't easily 942 * avoided. 943 */ 944 len = asa->sa_len; 945 if (len > MLEN) { 946 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 947 if ((m->m_flags & M_EXT) == 0) { 948 m_free(m); 949 return (0); 950 } 951 } 952 m->m_len = asa->sa_len; 953 memcpy(mtod(m, void *), asa, asa->sa_len); 954 if (n) 955 n->m_next = m0; /* concatenate data to control */ 956 else 957 control = m0; 958 m->m_next = control; 959 960 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 961 962 for (n = m; n->m_next != NULL; n = n->m_next) 963 sballoc(sb, n); 964 sballoc(sb, n); 965 nlast = n; 966 SBLINKRECORD(sb, m); 967 968 sb->sb_mbtail = nlast; 969 SBLASTMBUFCHK(sb, "sbappendaddr"); 970 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 971 972 return (1); 973} 974 975/* 976 * Helper for sbappendchainaddr: prepend a struct sockaddr* to 977 * an mbuf chain. 978 */ 979static inline struct mbuf * 980m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0, 981 const struct sockaddr *asa) 982{ 983 struct mbuf *m; 984 const int salen = asa->sa_len; 985 986 KASSERT(solocked(sb->sb_so)); 987 988 /* only the first in each chain need be a pkthdr */ 989 MGETHDR(m, M_DONTWAIT, MT_SONAME); 990 if (m == 0) 991 return (0); 992 MCLAIM(m, sb->sb_mowner); 993#ifdef notyet 994 if (salen > MHLEN) { 995 MEXTMALLOC(m, salen, M_NOWAIT); 996 if ((m->m_flags & M_EXT) == 0) { 997 m_free(m); 998 return (0); 999 } 1000 } 1001#else 1002 KASSERT(salen <= MHLEN); 1003#endif 1004 m->m_len = salen; 1005 memcpy(mtod(m, void *), asa, salen); 1006 m->m_next = m0; 1007 m->m_pkthdr.len = salen + m0->m_pkthdr.len; 1008 1009 return m; 1010} 1011 1012int 1013sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa, 1014 struct mbuf *m0, int sbprio) 1015{ 1016 int space; 1017 struct mbuf *m, *n, *n0, *nlast; 1018 int error; 1019 1020 KASSERT(solocked(sb->sb_so)); 1021 1022 /* 1023 * XXX sbprio reserved for encoding priority of this* request: 1024 * SB_PRIO_NONE --> honour normal sb limits 1025 * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space, 1026 * take whole chain. Intended for large requests 1027 * that should be delivered atomically (all, or none). 1028 * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow 1029 * over normal socket limits, for messages indicating 1030 * buffer overflow in earlier normal/lower-priority messages 1031 * SB_PRIO_BESTEFFORT --> ignore limits entirely. 1032 * Intended for kernel-generated messages only. 1033 * Up to generator to avoid total mbuf resource exhaustion. 1034 */ 1035 (void)sbprio; 1036 1037 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 1038 panic("sbappendaddrchain"); 1039 1040 space = sbspace(sb); 1041 1042#ifdef notyet 1043 /* 1044 * Enforce SB_PRIO_* limits as described above. 1045 */ 1046#endif 1047 1048 n0 = NULL; 1049 nlast = NULL; 1050 for (m = m0; m; m = m->m_nextpkt) { 1051 struct mbuf *np; 1052 1053#ifdef MBUFTRACE 1054 m_claimm(m, sb->sb_mowner); 1055#endif 1056 1057 /* Prepend sockaddr to this record (m) of input chain m0 */ 1058 n = m_prepend_sockaddr(sb, m, asa); 1059 if (n == NULL) { 1060 error = ENOBUFS; 1061 goto bad; 1062 } 1063 1064 /* Append record (asa+m) to end of new chain n0 */ 1065 if (n0 == NULL) { 1066 n0 = n; 1067 } else { 1068 nlast->m_nextpkt = n; 1069 } 1070 /* Keep track of last record on new chain */ 1071 nlast = n; 1072 1073 for (np = n; np; np = np->m_next) 1074 sballoc(sb, np); 1075 } 1076 1077 SBLASTRECORDCHK(sb, "sbappendaddrchain 1"); 1078 1079 /* Drop the entire chain of (asa+m) records onto the socket */ 1080 SBLINKRECORDCHAIN(sb, n0, nlast); 1081 1082 SBLASTRECORDCHK(sb, "sbappendaddrchain 2"); 1083 1084 for (m = nlast; m->m_next; m = m->m_next) 1085 ; 1086 sb->sb_mbtail = m; 1087 SBLASTMBUFCHK(sb, "sbappendaddrchain"); 1088 1089 return (1); 1090 1091bad: 1092 /* 1093 * On error, free the prepended addreseses. For consistency 1094 * with sbappendaddr(), leave it to our caller to free 1095 * the input record chain passed to us as m0. 1096 */ 1097 while ((n = n0) != NULL) { 1098 struct mbuf *np; 1099 1100 /* Undo the sballoc() of this record */ 1101 for (np = n; np; np = np->m_next) 1102 sbfree(sb, np); 1103 1104 n0 = n->m_nextpkt; /* iterate at next prepended address */ 1105 MFREE(n, np); /* free prepended address (not data) */ 1106 } 1107 return 0; 1108} 1109 1110 1111int 1112sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 1113{ 1114 struct mbuf *m, *mlast, *n; 1115 int space; 1116 1117 KASSERT(solocked(sb->sb_so)); 1118 1119 space = 0; 1120 if (control == 0) 1121 panic("sbappendcontrol"); 1122 for (m = control; ; m = m->m_next) { 1123 space += m->m_len; 1124 MCLAIM(m, sb->sb_mowner); 1125 if (m->m_next == 0) 1126 break; 1127 } 1128 n = m; /* save pointer to last control buffer */ 1129 for (m = m0; m; m = m->m_next) { 1130 MCLAIM(m, sb->sb_mowner); 1131 space += m->m_len; 1132 } 1133 if (space > sbspace(sb)) 1134 return (0); 1135 n->m_next = m0; /* concatenate data to control */ 1136 1137 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 1138 1139 for (m = control; m->m_next != NULL; m = m->m_next) 1140 sballoc(sb, m); 1141 sballoc(sb, m); 1142 mlast = m; 1143 SBLINKRECORD(sb, control); 1144 1145 sb->sb_mbtail = mlast; 1146 SBLASTMBUFCHK(sb, "sbappendcontrol"); 1147 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 1148 1149 return (1); 1150} 1151 1152/* 1153 * Compress mbuf chain m into the socket 1154 * buffer sb following mbuf n. If n 1155 * is null, the buffer is presumed empty. 1156 */ 1157void 1158sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1159{ 1160 int eor; 1161 struct mbuf *o; 1162 1163 KASSERT(solocked(sb->sb_so)); 1164 1165 eor = 0; 1166 while (m) { 1167 eor |= m->m_flags & M_EOR; 1168 if (m->m_len == 0 && 1169 (eor == 0 || 1170 (((o = m->m_next) || (o = n)) && 1171 o->m_type == m->m_type))) { 1172 if (sb->sb_lastrecord == m) 1173 sb->sb_lastrecord = m->m_next; 1174 m = m_free(m); 1175 continue; 1176 } 1177 if (n && (n->m_flags & M_EOR) == 0 && 1178 /* M_TRAILINGSPACE() checks buffer writeability */ 1179 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 1180 m->m_len <= M_TRAILINGSPACE(n) && 1181 n->m_type == m->m_type) { 1182 memcpy(mtod(n, char *) + n->m_len, mtod(m, void *), 1183 (unsigned)m->m_len); 1184 n->m_len += m->m_len; 1185 sb->sb_cc += m->m_len; 1186 m = m_free(m); 1187 continue; 1188 } 1189 if (n) 1190 n->m_next = m; 1191 else 1192 sb->sb_mb = m; 1193 sb->sb_mbtail = m; 1194 sballoc(sb, m); 1195 n = m; 1196 m->m_flags &= ~M_EOR; 1197 m = m->m_next; 1198 n->m_next = 0; 1199 } 1200 if (eor) { 1201 if (n) 1202 n->m_flags |= eor; 1203 else 1204 printf("semi-panic: sbcompress\n"); 1205 } 1206 SBLASTMBUFCHK(sb, __func__); 1207} 1208 1209/* 1210 * Free all mbufs in a sockbuf. 1211 * Check that all resources are reclaimed. 1212 */ 1213void 1214sbflush(struct sockbuf *sb) 1215{ 1216 1217 KASSERT(solocked(sb->sb_so)); 1218 KASSERT((sb->sb_flags & SB_LOCK) == 0); 1219 1220 while (sb->sb_mbcnt) 1221 sbdrop(sb, (int)sb->sb_cc); 1222 1223 KASSERT(sb->sb_cc == 0); 1224 KASSERT(sb->sb_mb == NULL); 1225 KASSERT(sb->sb_mbtail == NULL); 1226 KASSERT(sb->sb_lastrecord == NULL); 1227} 1228 1229/* 1230 * Drop data from (the front of) a sockbuf. 1231 */ 1232void 1233sbdrop(struct sockbuf *sb, int len) 1234{ 1235 struct mbuf *m, *mn, *next; 1236 1237 KASSERT(solocked(sb->sb_so)); 1238 1239 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 1240 while (len > 0) { 1241 if (m == 0) { 1242 if (next == 0) 1243 panic("sbdrop"); 1244 m = next; 1245 next = m->m_nextpkt; 1246 continue; 1247 } 1248 if (m->m_len > len) { 1249 m->m_len -= len; 1250 m->m_data += len; 1251 sb->sb_cc -= len; 1252 break; 1253 } 1254 len -= m->m_len; 1255 sbfree(sb, m); 1256 MFREE(m, mn); 1257 m = mn; 1258 } 1259 while (m && m->m_len == 0) { 1260 sbfree(sb, m); 1261 MFREE(m, mn); 1262 m = mn; 1263 } 1264 if (m) { 1265 sb->sb_mb = m; 1266 m->m_nextpkt = next; 1267 } else 1268 sb->sb_mb = next; 1269 /* 1270 * First part is an inline SB_EMPTY_FIXUP(). Second part 1271 * makes sure sb_lastrecord is up-to-date if we dropped 1272 * part of the last record. 1273 */ 1274 m = sb->sb_mb; 1275 if (m == NULL) { 1276 sb->sb_mbtail = NULL; 1277 sb->sb_lastrecord = NULL; 1278 } else if (m->m_nextpkt == NULL) 1279 sb->sb_lastrecord = m; 1280} 1281 1282/* 1283 * Drop a record off the front of a sockbuf 1284 * and move the next record to the front. 1285 */ 1286void 1287sbdroprecord(struct sockbuf *sb) 1288{ 1289 struct mbuf *m, *mn; 1290 1291 KASSERT(solocked(sb->sb_so)); 1292 1293 m = sb->sb_mb; 1294 if (m) { 1295 sb->sb_mb = m->m_nextpkt; 1296 do { 1297 sbfree(sb, m); 1298 MFREE(m, mn); 1299 } while ((m = mn) != NULL); 1300 } 1301 SB_EMPTY_FIXUP(sb); 1302} 1303 1304/* 1305 * Create a "control" mbuf containing the specified data 1306 * with the specified type for presentation on a socket buffer. 1307 */ 1308struct mbuf * 1309sbcreatecontrol(void *p, int size, int type, int level) 1310{ 1311 struct cmsghdr *cp; 1312 struct mbuf *m; 1313 1314 if (CMSG_SPACE(size) > MCLBYTES) { 1315 printf("sbcreatecontrol: message too large %d\n", size); 1316 return NULL; 1317 } 1318 1319 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 1320 return ((struct mbuf *) NULL); 1321 if (CMSG_SPACE(size) > MLEN) { 1322 MCLGET(m, M_DONTWAIT); 1323 if ((m->m_flags & M_EXT) == 0) { 1324 m_free(m); 1325 return NULL; 1326 } 1327 } 1328 cp = mtod(m, struct cmsghdr *); 1329 memcpy(CMSG_DATA(cp), p, size); 1330 m->m_len = CMSG_SPACE(size); 1331 cp->cmsg_len = CMSG_LEN(size); 1332 cp->cmsg_level = level; 1333 cp->cmsg_type = type; 1334 return (m); 1335} 1336 1337void 1338solockretry(struct socket *so, kmutex_t *lock) 1339{ 1340 1341 while (lock != so->so_lock) { 1342 mutex_exit(lock); 1343 lock = so->so_lock; 1344 mutex_enter(lock); 1345 } 1346} 1347 1348bool 1349solocked(struct socket *so) 1350{ 1351 1352 return mutex_owned(so->so_lock); 1353} 1354 1355bool 1356solocked2(struct socket *so1, struct socket *so2) 1357{ 1358 kmutex_t *lock; 1359 1360 lock = so1->so_lock; 1361 if (lock != so2->so_lock) 1362 return false; 1363 return mutex_owned(lock); 1364} 1365 1366/* 1367 * Assign a default lock to a new socket. For PRU_ATTACH, and done by 1368 * protocols that do not have special locking requirements. 1369 */ 1370void 1371sosetlock(struct socket *so) 1372{ 1373 kmutex_t *lock; 1374 1375 if (so->so_lock == NULL) { 1376 lock = softnet_lock; 1377 so->so_lock = lock; 1378 mutex_obj_hold(lock); 1379 mutex_enter(lock); 1380 } 1381 1382 /* In all cases, lock must be held on return from PRU_ATTACH. */ 1383 KASSERT(solocked(so)); 1384} 1385 1386/* 1387 * Set lock on sockbuf sb; sleep if lock is already held. 1388 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. 1389 * Returns error without lock if sleep is interrupted. 1390 */ 1391int 1392sblock(struct sockbuf *sb, int wf) 1393{ 1394 struct socket *so; 1395 kmutex_t *lock; 1396 int error; 1397 1398 KASSERT(solocked(sb->sb_so)); 1399 1400 for (;;) { 1401 if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) { 1402 sb->sb_flags |= SB_LOCK; 1403 return 0; 1404 } 1405 if (wf != M_WAITOK) 1406 return EWOULDBLOCK; 1407 so = sb->sb_so; 1408 lock = so->so_lock; 1409 if ((sb->sb_flags & SB_NOINTR) != 0) { 1410 cv_wait(&so->so_cv, lock); 1411 error = 0; 1412 } else 1413 error = cv_wait_sig(&so->so_cv, lock); 1414 if (__predict_false(lock != so->so_lock)) 1415 solockretry(so, lock); 1416 if (error != 0) 1417 return error; 1418 } 1419} 1420 1421void 1422sbunlock(struct sockbuf *sb) 1423{ 1424 struct socket *so; 1425 1426 so = sb->sb_so; 1427 1428 KASSERT(solocked(so)); 1429 KASSERT((sb->sb_flags & SB_LOCK) != 0); 1430 1431 sb->sb_flags &= ~SB_LOCK; 1432 cv_broadcast(&so->so_cv); 1433} 1434 1435int 1436sowait(struct socket *so, bool catch, int timo) 1437{ 1438 kmutex_t *lock; 1439 int error; 1440 1441 KASSERT(solocked(so)); 1442 KASSERT(catch || timo != 0); 1443 1444 lock = so->so_lock; 1445 if (catch) 1446 error = cv_timedwait_sig(&so->so_cv, lock, timo); 1447 else 1448 error = cv_timedwait(&so->so_cv, lock, timo); 1449 if (__predict_false(lock != so->so_lock)) 1450 solockretry(so, lock); 1451 return error; 1452} 1453