uipc_socket2.c revision 1.116
1/* $NetBSD: uipc_socket2.c,v 1.116 2014/05/17 22:52:36 rmind Exp $ */ 2 3/*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 * Copyright (c) 1982, 1986, 1988, 1990, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 58 */ 59 60#include <sys/cdefs.h> 61__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.116 2014/05/17 22:52:36 rmind Exp $"); 62 63#include "opt_mbuftrace.h" 64#include "opt_sb_max.h" 65 66#include <sys/param.h> 67#include <sys/systm.h> 68#include <sys/proc.h> 69#include <sys/file.h> 70#include <sys/buf.h> 71#include <sys/mbuf.h> 72#include <sys/protosw.h> 73#include <sys/domain.h> 74#include <sys/poll.h> 75#include <sys/socket.h> 76#include <sys/socketvar.h> 77#include <sys/signalvar.h> 78#include <sys/kauth.h> 79#include <sys/pool.h> 80#include <sys/uidinfo.h> 81 82/* 83 * Primitive routines for operating on sockets and socket buffers. 84 * 85 * Connection life-cycle: 86 * 87 * Normal sequence from the active (originating) side: 88 * 89 * - soisconnecting() is called during processing of connect() call, 90 * - resulting in an eventual call to soisconnected() if/when the 91 * connection is established. 92 * 93 * When the connection is torn down during processing of disconnect(): 94 * 95 * - soisdisconnecting() is called and, 96 * - soisdisconnected() is called when the connection to the peer 97 * is totally severed. 98 * 99 * The semantics of these routines are such that connectionless protocols 100 * can call soisconnected() and soisdisconnected() only, bypassing the 101 * in-progress calls when setting up a ``connection'' takes no time. 102 * 103 * From the passive side, a socket is created with two queues of sockets: 104 * 105 * - so_q0 (0) for partial connections (i.e. connections in progress) 106 * - so_q (1) for connections already made and awaiting user acceptance. 107 * 108 * As a protocol is preparing incoming connections, it creates a socket 109 * structure queued on so_q0 by calling sonewconn(). When the connection 110 * is established, soisconnected() is called, and transfers the 111 * socket structure to so_q, making it available to accept(). 112 * 113 * If a socket is closed with sockets on either so_q0 or so_q, these 114 * sockets are dropped. 115 * 116 * Locking rules and assumptions: 117 * 118 * o socket::so_lock can change on the fly. The low level routines used 119 * to lock sockets are aware of this. When so_lock is acquired, the 120 * routine locking must check to see if so_lock still points to the 121 * lock that was acquired. If so_lock has changed in the meantime, the 122 * now irrelevant lock that was acquired must be dropped and the lock 123 * operation retried. Although not proven here, this is completely safe 124 * on a multiprocessor system, even with relaxed memory ordering, given 125 * the next two rules: 126 * 127 * o In order to mutate so_lock, the lock pointed to by the current value 128 * of so_lock must be held: i.e., the socket must be held locked by the 129 * changing thread. The thread must issue membar_exit() to prevent 130 * memory accesses being reordered, and can set so_lock to the desired 131 * value. If the lock pointed to by the new value of so_lock is not 132 * held by the changing thread, the socket must then be considered 133 * unlocked. 134 * 135 * o If so_lock is mutated, and the previous lock referred to by so_lock 136 * could still be visible to other threads in the system (e.g. via file 137 * descriptor or protocol-internal reference), then the old lock must 138 * remain valid until the socket and/or protocol control block has been 139 * torn down. 140 * 141 * o If a socket has a non-NULL so_head value (i.e. is in the process of 142 * connecting), then locking the socket must also lock the socket pointed 143 * to by so_head: their lock pointers must match. 144 * 145 * o If a socket has connections in progress (so_q, so_q0 not empty) then 146 * locking the socket must also lock the sockets attached to both queues. 147 * Again, their lock pointers must match. 148 * 149 * o Beyond the initial lock assignment in socreate(), assigning locks to 150 * sockets is the responsibility of the individual protocols / protocol 151 * domains. 152 */ 153 154static pool_cache_t socket_cache; 155u_long sb_max = SB_MAX;/* maximum socket buffer size */ 156static u_long sb_max_adj; /* adjusted sb_max */ 157 158void 159soisconnecting(struct socket *so) 160{ 161 162 KASSERT(solocked(so)); 163 164 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 165 so->so_state |= SS_ISCONNECTING; 166} 167 168void 169soisconnected(struct socket *so) 170{ 171 struct socket *head; 172 173 head = so->so_head; 174 175 KASSERT(solocked(so)); 176 KASSERT(head == NULL || solocked2(so, head)); 177 178 so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING); 179 so->so_state |= SS_ISCONNECTED; 180 if (head && so->so_onq == &head->so_q0) { 181 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 182 /* 183 * Re-enqueue and wake up any waiters, e.g. 184 * processes blocking on accept(). 185 */ 186 soqremque(so, 0); 187 soqinsque(head, so, 1); 188 sorwakeup(head); 189 cv_broadcast(&head->so_cv); 190 } else { 191 so->so_upcall = 192 head->so_accf->so_accept_filter->accf_callback; 193 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 194 so->so_rcv.sb_flags |= SB_UPCALL; 195 so->so_options &= ~SO_ACCEPTFILTER; 196 (*so->so_upcall)(so, so->so_upcallarg, 197 POLLIN|POLLRDNORM, M_DONTWAIT); 198 } 199 } else { 200 cv_broadcast(&so->so_cv); 201 sorwakeup(so); 202 sowwakeup(so); 203 } 204} 205 206void 207soisdisconnecting(struct socket *so) 208{ 209 210 KASSERT(solocked(so)); 211 212 so->so_state &= ~SS_ISCONNECTING; 213 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 214 cv_broadcast(&so->so_cv); 215 sowwakeup(so); 216 sorwakeup(so); 217} 218 219void 220soisdisconnected(struct socket *so) 221{ 222 223 KASSERT(solocked(so)); 224 225 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 226 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 227 cv_broadcast(&so->so_cv); 228 sowwakeup(so); 229 sorwakeup(so); 230} 231 232void 233soinit2(void) 234{ 235 236 socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0, 237 "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL); 238} 239 240/* 241 * sonewconn: accept a new connection. 242 * 243 * When an attempt at a new connection is noted on a socket which accepts 244 * connections, sonewconn(9) is called. If the connection is possible 245 * (subject to space constraints, etc) then we allocate a new structure, 246 * properly linked into the data structure of the original socket. 247 * 248 * => If 'soready' is true, then socket will become ready for accept() i.e. 249 * inserted into the so_q queue, SS_ISCONNECTED set and waiters awoken. 250 * => May be called from soft-interrupt context. 251 * => Listening socket should be locked. 252 * => Returns the new socket locked. 253 */ 254struct socket * 255sonewconn(struct socket *head, bool soready) 256{ 257 struct socket *so; 258 int soqueue, error; 259 260 KASSERT(solocked(head)); 261 262 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) { 263 /* Listen queue overflow. */ 264 return NULL; 265 } 266 if ((head->so_options & SO_ACCEPTFILTER) != 0) { 267 soready = false; 268 } 269 soqueue = soready ? 1 : 0; 270 271 if ((so = soget(false)) == NULL) { 272 return NULL; 273 } 274 so->so_type = head->so_type; 275 so->so_options = head->so_options & ~SO_ACCEPTCONN; 276 so->so_linger = head->so_linger; 277 so->so_state = head->so_state | SS_NOFDREF; 278 so->so_proto = head->so_proto; 279 so->so_timeo = head->so_timeo; 280 so->so_pgid = head->so_pgid; 281 so->so_send = head->so_send; 282 so->so_receive = head->so_receive; 283 so->so_uidinfo = head->so_uidinfo; 284 so->so_cpid = head->so_cpid; 285#ifdef MBUFTRACE 286 so->so_mowner = head->so_mowner; 287 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 288 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 289#endif 290 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) != 0) 291 goto out; 292 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 293 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 294 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 295 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 296 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & (SB_AUTOSIZE | SB_ASYNC); 297 so->so_snd.sb_flags |= head->so_snd.sb_flags & (SB_AUTOSIZE | SB_ASYNC); 298 299 /* 300 * Share the lock with the listening-socket, it may get unshared 301 * once the connection is complete. 302 */ 303 mutex_obj_hold(head->so_lock); 304 so->so_lock = head->so_lock; 305 soqinsque(head, so, soqueue); 306 307 error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, 308 NULL, NULL); 309 KASSERT(solocked(so)); 310 if (error) { 311 (void) soqremque(so, soqueue); 312out: 313 KASSERT(so->so_accf == NULL); 314 soput(so); 315 316 /* Note: the listening socket shall stay locked. */ 317 KASSERT(solocked(head)); 318 return NULL; 319 } 320 321 /* 322 * Update the connection status and wake up any waiters, 323 * e.g. processes blocking on accept(). 324 */ 325 if (soready) { 326 so->so_state |= SS_ISCONNECTED; 327 sorwakeup(head); 328 cv_broadcast(&head->so_cv); 329 } 330 KASSERT(solocked2(head, so)); 331 return so; 332} 333 334struct socket * 335soget(bool waitok) 336{ 337 struct socket *so; 338 339 so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT)); 340 if (__predict_false(so == NULL)) 341 return (NULL); 342 memset(so, 0, sizeof(*so)); 343 TAILQ_INIT(&so->so_q0); 344 TAILQ_INIT(&so->so_q); 345 cv_init(&so->so_cv, "socket"); 346 cv_init(&so->so_rcv.sb_cv, "netio"); 347 cv_init(&so->so_snd.sb_cv, "netio"); 348 selinit(&so->so_rcv.sb_sel); 349 selinit(&so->so_snd.sb_sel); 350 so->so_rcv.sb_so = so; 351 so->so_snd.sb_so = so; 352 return so; 353} 354 355void 356soput(struct socket *so) 357{ 358 359 KASSERT(!cv_has_waiters(&so->so_cv)); 360 KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); 361 KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); 362 seldestroy(&so->so_rcv.sb_sel); 363 seldestroy(&so->so_snd.sb_sel); 364 mutex_obj_free(so->so_lock); 365 cv_destroy(&so->so_cv); 366 cv_destroy(&so->so_rcv.sb_cv); 367 cv_destroy(&so->so_snd.sb_cv); 368 pool_cache_put(socket_cache, so); 369} 370 371/* 372 * soqinsque: insert socket of a new connection into the specified 373 * accept queue of the listening socket (head). 374 * 375 * q = 0: queue of partial connections 376 * q = 1: queue of incoming connections 377 */ 378void 379soqinsque(struct socket *head, struct socket *so, int q) 380{ 381 KASSERT(q == 0 || q == 1); 382 KASSERT(solocked2(head, so)); 383 KASSERT(so->so_onq == NULL); 384 KASSERT(so->so_head == NULL); 385 386 so->so_head = head; 387 if (q == 0) { 388 head->so_q0len++; 389 so->so_onq = &head->so_q0; 390 } else { 391 head->so_qlen++; 392 so->so_onq = &head->so_q; 393 } 394 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 395} 396 397/* 398 * soqremque: remove socket from the specified queue. 399 * 400 * => Returns true if socket was removed from the specified queue. 401 * => False if socket was not removed (because it was in other queue). 402 */ 403bool 404soqremque(struct socket *so, int q) 405{ 406 struct socket *head = so->so_head; 407 408 KASSERT(q == 0 || q == 1); 409 KASSERT(solocked(so)); 410 KASSERT(so->so_onq != NULL); 411 KASSERT(head != NULL); 412 413 if (q == 0) { 414 if (so->so_onq != &head->so_q0) 415 return false; 416 head->so_q0len--; 417 } else { 418 if (so->so_onq != &head->so_q) 419 return false; 420 head->so_qlen--; 421 } 422 KASSERT(solocked2(so, head)); 423 TAILQ_REMOVE(so->so_onq, so, so_qe); 424 so->so_onq = NULL; 425 so->so_head = NULL; 426 return true; 427} 428 429/* 430 * socantsendmore: indicates that no more data will be sent on the 431 * socket; it would normally be applied to a socket when the user 432 * informs the system that no more data is to be sent, by the protocol 433 * code (in case PRU_SHUTDOWN). 434 */ 435void 436socantsendmore(struct socket *so) 437{ 438 KASSERT(solocked(so)); 439 440 so->so_state |= SS_CANTSENDMORE; 441 sowwakeup(so); 442} 443 444/* 445 * socantrcvmore(): indicates that no more data will be received and 446 * will normally be applied to the socket by a protocol when it detects 447 * that the peer will send no more data. Data queued for reading in 448 * the socket may yet be read. 449 */ 450void 451socantrcvmore(struct socket *so) 452{ 453 KASSERT(solocked(so)); 454 455 so->so_state |= SS_CANTRCVMORE; 456 sorwakeup(so); 457} 458 459/* 460 * Wait for data to arrive at/drain from a socket buffer. 461 */ 462int 463sbwait(struct sockbuf *sb) 464{ 465 struct socket *so; 466 kmutex_t *lock; 467 int error; 468 469 so = sb->sb_so; 470 471 KASSERT(solocked(so)); 472 473 sb->sb_flags |= SB_NOTIFY; 474 lock = so->so_lock; 475 if ((sb->sb_flags & SB_NOINTR) != 0) 476 error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo); 477 else 478 error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo); 479 if (__predict_false(lock != so->so_lock)) 480 solockretry(so, lock); 481 return error; 482} 483 484/* 485 * Wakeup processes waiting on a socket buffer. 486 * Do asynchronous notification via SIGIO 487 * if the socket buffer has the SB_ASYNC flag set. 488 */ 489void 490sowakeup(struct socket *so, struct sockbuf *sb, int code) 491{ 492 int band; 493 494 KASSERT(solocked(so)); 495 KASSERT(sb->sb_so == so); 496 497 if (code == POLL_IN) 498 band = POLLIN|POLLRDNORM; 499 else 500 band = POLLOUT|POLLWRNORM; 501 sb->sb_flags &= ~SB_NOTIFY; 502 selnotify(&sb->sb_sel, band, NOTE_SUBMIT); 503 cv_broadcast(&sb->sb_cv); 504 if (sb->sb_flags & SB_ASYNC) 505 fownsignal(so->so_pgid, SIGIO, code, band, so); 506 if (sb->sb_flags & SB_UPCALL) 507 (*so->so_upcall)(so, so->so_upcallarg, band, M_DONTWAIT); 508} 509 510/* 511 * Reset a socket's lock pointer. Wake all threads waiting on the 512 * socket's condition variables so that they can restart their waits 513 * using the new lock. The existing lock must be held. 514 */ 515void 516solockreset(struct socket *so, kmutex_t *lock) 517{ 518 519 KASSERT(solocked(so)); 520 521 so->so_lock = lock; 522 cv_broadcast(&so->so_snd.sb_cv); 523 cv_broadcast(&so->so_rcv.sb_cv); 524 cv_broadcast(&so->so_cv); 525} 526 527/* 528 * Socket buffer (struct sockbuf) utility routines. 529 * 530 * Each socket contains two socket buffers: one for sending data and 531 * one for receiving data. Each buffer contains a queue of mbufs, 532 * information about the number of mbufs and amount of data in the 533 * queue, and other fields allowing poll() statements and notification 534 * on data availability to be implemented. 535 * 536 * Data stored in a socket buffer is maintained as a list of records. 537 * Each record is a list of mbufs chained together with the m_next 538 * field. Records are chained together with the m_nextpkt field. The upper 539 * level routine soreceive() expects the following conventions to be 540 * observed when placing information in the receive buffer: 541 * 542 * 1. If the protocol requires each message be preceded by the sender's 543 * name, then a record containing that name must be present before 544 * any associated data (mbuf's must be of type MT_SONAME). 545 * 2. If the protocol supports the exchange of ``access rights'' (really 546 * just additional data associated with the message), and there are 547 * ``rights'' to be received, then a record containing this data 548 * should be present (mbuf's must be of type MT_CONTROL). 549 * 3. If a name or rights record exists, then it must be followed by 550 * a data record, perhaps of zero length. 551 * 552 * Before using a new socket structure it is first necessary to reserve 553 * buffer space to the socket, by calling sbreserve(). This should commit 554 * some of the available buffer space in the system buffer pool for the 555 * socket (currently, it does nothing but enforce limits). The space 556 * should be released by calling sbrelease() when the socket is destroyed. 557 */ 558 559int 560sb_max_set(u_long new_sbmax) 561{ 562 int s; 563 564 if (new_sbmax < (16 * 1024)) 565 return (EINVAL); 566 567 s = splsoftnet(); 568 sb_max = new_sbmax; 569 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 570 splx(s); 571 572 return (0); 573} 574 575int 576soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 577{ 578 KASSERT(so->so_pcb == NULL || solocked(so)); 579 580 /* 581 * there's at least one application (a configure script of screen) 582 * which expects a fifo is writable even if it has "some" bytes 583 * in its buffer. 584 * so we want to make sure (hiwat - lowat) >= (some bytes). 585 * 586 * PIPE_BUF here is an arbitrary value chosen as (some bytes) above. 587 * we expect it's large enough for such applications. 588 */ 589 u_long lowat = MAX(sock_loan_thresh, MCLBYTES); 590 u_long hiwat = lowat + PIPE_BUF; 591 592 if (sndcc < hiwat) 593 sndcc = hiwat; 594 if (sbreserve(&so->so_snd, sndcc, so) == 0) 595 goto bad; 596 if (sbreserve(&so->so_rcv, rcvcc, so) == 0) 597 goto bad2; 598 if (so->so_rcv.sb_lowat == 0) 599 so->so_rcv.sb_lowat = 1; 600 if (so->so_snd.sb_lowat == 0) 601 so->so_snd.sb_lowat = lowat; 602 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 603 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 604 return (0); 605 bad2: 606 sbrelease(&so->so_snd, so); 607 bad: 608 return (ENOBUFS); 609} 610 611/* 612 * Allot mbufs to a sockbuf. 613 * Attempt to scale mbmax so that mbcnt doesn't become limiting 614 * if buffering efficiency is near the normal case. 615 */ 616int 617sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) 618{ 619 struct lwp *l = curlwp; /* XXX */ 620 rlim_t maxcc; 621 struct uidinfo *uidinfo; 622 623 KASSERT(so->so_pcb == NULL || solocked(so)); 624 KASSERT(sb->sb_so == so); 625 KASSERT(sb_max_adj != 0); 626 627 if (cc == 0 || cc > sb_max_adj) 628 return (0); 629 630 maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur; 631 632 uidinfo = so->so_uidinfo; 633 if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc)) 634 return 0; 635 sb->sb_mbmax = min(cc * 2, sb_max); 636 if (sb->sb_lowat > sb->sb_hiwat) 637 sb->sb_lowat = sb->sb_hiwat; 638 return (1); 639} 640 641/* 642 * Free mbufs held by a socket, and reserved mbuf space. We do not assert 643 * that the socket is held locked here: see sorflush(). 644 */ 645void 646sbrelease(struct sockbuf *sb, struct socket *so) 647{ 648 649 KASSERT(sb->sb_so == so); 650 651 sbflush(sb); 652 (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); 653 sb->sb_mbmax = 0; 654} 655 656/* 657 * Routines to add and remove 658 * data from an mbuf queue. 659 * 660 * The routines sbappend() or sbappendrecord() are normally called to 661 * append new mbufs to a socket buffer, after checking that adequate 662 * space is available, comparing the function sbspace() with the amount 663 * of data to be added. sbappendrecord() differs from sbappend() in 664 * that data supplied is treated as the beginning of a new record. 665 * To place a sender's address, optional access rights, and data in a 666 * socket receive buffer, sbappendaddr() should be used. To place 667 * access rights and data in a socket receive buffer, sbappendrights() 668 * should be used. In either case, the new data begins a new record. 669 * Note that unlike sbappend() and sbappendrecord(), these routines check 670 * for the caller that there will be enough space to store the data. 671 * Each fails if there is not enough space, or if it cannot find mbufs 672 * to store additional information in. 673 * 674 * Reliable protocols may use the socket send buffer to hold data 675 * awaiting acknowledgement. Data is normally copied from a socket 676 * send buffer in a protocol with m_copy for output to a peer, 677 * and then removing the data from the socket buffer with sbdrop() 678 * or sbdroprecord() when the data is acknowledged by the peer. 679 */ 680 681#ifdef SOCKBUF_DEBUG 682void 683sblastrecordchk(struct sockbuf *sb, const char *where) 684{ 685 struct mbuf *m = sb->sb_mb; 686 687 KASSERT(solocked(sb->sb_so)); 688 689 while (m && m->m_nextpkt) 690 m = m->m_nextpkt; 691 692 if (m != sb->sb_lastrecord) { 693 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 694 sb->sb_mb, sb->sb_lastrecord, m); 695 printf("packet chain:\n"); 696 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 697 printf("\t%p\n", m); 698 panic("sblastrecordchk from %s", where); 699 } 700} 701 702void 703sblastmbufchk(struct sockbuf *sb, const char *where) 704{ 705 struct mbuf *m = sb->sb_mb; 706 struct mbuf *n; 707 708 KASSERT(solocked(sb->sb_so)); 709 710 while (m && m->m_nextpkt) 711 m = m->m_nextpkt; 712 713 while (m && m->m_next) 714 m = m->m_next; 715 716 if (m != sb->sb_mbtail) { 717 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 718 sb->sb_mb, sb->sb_mbtail, m); 719 printf("packet tree:\n"); 720 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 721 printf("\t"); 722 for (n = m; n != NULL; n = n->m_next) 723 printf("%p ", n); 724 printf("\n"); 725 } 726 panic("sblastmbufchk from %s", where); 727 } 728} 729#endif /* SOCKBUF_DEBUG */ 730 731/* 732 * Link a chain of records onto a socket buffer 733 */ 734#define SBLINKRECORDCHAIN(sb, m0, mlast) \ 735do { \ 736 if ((sb)->sb_lastrecord != NULL) \ 737 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 738 else \ 739 (sb)->sb_mb = (m0); \ 740 (sb)->sb_lastrecord = (mlast); \ 741} while (/*CONSTCOND*/0) 742 743 744#define SBLINKRECORD(sb, m0) \ 745 SBLINKRECORDCHAIN(sb, m0, m0) 746 747/* 748 * Append mbuf chain m to the last record in the 749 * socket buffer sb. The additional space associated 750 * the mbuf chain is recorded in sb. Empty mbufs are 751 * discarded and mbufs are compacted where possible. 752 */ 753void 754sbappend(struct sockbuf *sb, struct mbuf *m) 755{ 756 struct mbuf *n; 757 758 KASSERT(solocked(sb->sb_so)); 759 760 if (m == NULL) 761 return; 762 763#ifdef MBUFTRACE 764 m_claimm(m, sb->sb_mowner); 765#endif 766 767 SBLASTRECORDCHK(sb, "sbappend 1"); 768 769 if ((n = sb->sb_lastrecord) != NULL) { 770 /* 771 * XXX Would like to simply use sb_mbtail here, but 772 * XXX I need to verify that I won't miss an EOR that 773 * XXX way. 774 */ 775 do { 776 if (n->m_flags & M_EOR) { 777 sbappendrecord(sb, m); /* XXXXXX!!!! */ 778 return; 779 } 780 } while (n->m_next && (n = n->m_next)); 781 } else { 782 /* 783 * If this is the first record in the socket buffer, it's 784 * also the last record. 785 */ 786 sb->sb_lastrecord = m; 787 } 788 sbcompress(sb, m, n); 789 SBLASTRECORDCHK(sb, "sbappend 2"); 790} 791 792/* 793 * This version of sbappend() should only be used when the caller 794 * absolutely knows that there will never be more than one record 795 * in the socket buffer, that is, a stream protocol (such as TCP). 796 */ 797void 798sbappendstream(struct sockbuf *sb, struct mbuf *m) 799{ 800 801 KASSERT(solocked(sb->sb_so)); 802 KDASSERT(m->m_nextpkt == NULL); 803 KASSERT(sb->sb_mb == sb->sb_lastrecord); 804 805 SBLASTMBUFCHK(sb, __func__); 806 807#ifdef MBUFTRACE 808 m_claimm(m, sb->sb_mowner); 809#endif 810 811 sbcompress(sb, m, sb->sb_mbtail); 812 813 sb->sb_lastrecord = sb->sb_mb; 814 SBLASTRECORDCHK(sb, __func__); 815} 816 817#ifdef SOCKBUF_DEBUG 818void 819sbcheck(struct sockbuf *sb) 820{ 821 struct mbuf *m, *m2; 822 u_long len, mbcnt; 823 824 KASSERT(solocked(sb->sb_so)); 825 826 len = 0; 827 mbcnt = 0; 828 for (m = sb->sb_mb; m; m = m->m_nextpkt) { 829 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 830 len += m2->m_len; 831 mbcnt += MSIZE; 832 if (m2->m_flags & M_EXT) 833 mbcnt += m2->m_ext.ext_size; 834 if (m2->m_nextpkt != NULL) 835 panic("sbcheck nextpkt"); 836 } 837 } 838 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 839 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 840 mbcnt, sb->sb_mbcnt); 841 panic("sbcheck"); 842 } 843} 844#endif 845 846/* 847 * As above, except the mbuf chain 848 * begins a new record. 849 */ 850void 851sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 852{ 853 struct mbuf *m; 854 855 KASSERT(solocked(sb->sb_so)); 856 857 if (m0 == NULL) 858 return; 859 860#ifdef MBUFTRACE 861 m_claimm(m0, sb->sb_mowner); 862#endif 863 /* 864 * Put the first mbuf on the queue. 865 * Note this permits zero length records. 866 */ 867 sballoc(sb, m0); 868 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 869 SBLINKRECORD(sb, m0); 870 m = m0->m_next; 871 m0->m_next = 0; 872 if (m && (m0->m_flags & M_EOR)) { 873 m0->m_flags &= ~M_EOR; 874 m->m_flags |= M_EOR; 875 } 876 sbcompress(sb, m, m0); 877 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 878} 879 880/* 881 * As above except that OOB data 882 * is inserted at the beginning of the sockbuf, 883 * but after any other OOB data. 884 */ 885void 886sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 887{ 888 struct mbuf *m, **mp; 889 890 KASSERT(solocked(sb->sb_so)); 891 892 if (m0 == NULL) 893 return; 894 895 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 896 897 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 898 again: 899 switch (m->m_type) { 900 901 case MT_OOBDATA: 902 continue; /* WANT next train */ 903 904 case MT_CONTROL: 905 if ((m = m->m_next) != NULL) 906 goto again; /* inspect THIS train further */ 907 } 908 break; 909 } 910 /* 911 * Put the first mbuf on the queue. 912 * Note this permits zero length records. 913 */ 914 sballoc(sb, m0); 915 m0->m_nextpkt = *mp; 916 if (*mp == NULL) { 917 /* m0 is actually the new tail */ 918 sb->sb_lastrecord = m0; 919 } 920 *mp = m0; 921 m = m0->m_next; 922 m0->m_next = 0; 923 if (m && (m0->m_flags & M_EOR)) { 924 m0->m_flags &= ~M_EOR; 925 m->m_flags |= M_EOR; 926 } 927 sbcompress(sb, m, m0); 928 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 929} 930 931/* 932 * Append address and data, and optionally, control (ancillary) data 933 * to the receive queue of a socket. If present, 934 * m0 must include a packet header with total length. 935 * Returns 0 if no space in sockbuf or insufficient mbufs. 936 */ 937int 938sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, 939 struct mbuf *control) 940{ 941 struct mbuf *m, *n, *nlast; 942 int space, len; 943 944 KASSERT(solocked(sb->sb_so)); 945 946 space = asa->sa_len; 947 948 if (m0 != NULL) { 949 if ((m0->m_flags & M_PKTHDR) == 0) 950 panic("sbappendaddr"); 951 space += m0->m_pkthdr.len; 952#ifdef MBUFTRACE 953 m_claimm(m0, sb->sb_mowner); 954#endif 955 } 956 for (n = control; n; n = n->m_next) { 957 space += n->m_len; 958 MCLAIM(n, sb->sb_mowner); 959 if (n->m_next == NULL) /* keep pointer to last control buf */ 960 break; 961 } 962 if (space > sbspace(sb)) 963 return (0); 964 m = m_get(M_DONTWAIT, MT_SONAME); 965 if (m == NULL) 966 return (0); 967 MCLAIM(m, sb->sb_mowner); 968 /* 969 * XXX avoid 'comparison always true' warning which isn't easily 970 * avoided. 971 */ 972 len = asa->sa_len; 973 if (len > MLEN) { 974 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 975 if ((m->m_flags & M_EXT) == 0) { 976 m_free(m); 977 return (0); 978 } 979 } 980 m->m_len = asa->sa_len; 981 memcpy(mtod(m, void *), asa, asa->sa_len); 982 if (n) 983 n->m_next = m0; /* concatenate data to control */ 984 else 985 control = m0; 986 m->m_next = control; 987 988 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 989 990 for (n = m; n->m_next != NULL; n = n->m_next) 991 sballoc(sb, n); 992 sballoc(sb, n); 993 nlast = n; 994 SBLINKRECORD(sb, m); 995 996 sb->sb_mbtail = nlast; 997 SBLASTMBUFCHK(sb, "sbappendaddr"); 998 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 999 1000 return (1); 1001} 1002 1003/* 1004 * Helper for sbappendchainaddr: prepend a struct sockaddr* to 1005 * an mbuf chain. 1006 */ 1007static inline struct mbuf * 1008m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0, 1009 const struct sockaddr *asa) 1010{ 1011 struct mbuf *m; 1012 const int salen = asa->sa_len; 1013 1014 KASSERT(solocked(sb->sb_so)); 1015 1016 /* only the first in each chain need be a pkthdr */ 1017 m = m_gethdr(M_DONTWAIT, MT_SONAME); 1018 if (m == NULL) 1019 return NULL; 1020 MCLAIM(m, sb->sb_mowner); 1021#ifdef notyet 1022 if (salen > MHLEN) { 1023 MEXTMALLOC(m, salen, M_NOWAIT); 1024 if ((m->m_flags & M_EXT) == 0) { 1025 m_free(m); 1026 return NULL; 1027 } 1028 } 1029#else 1030 KASSERT(salen <= MHLEN); 1031#endif 1032 m->m_len = salen; 1033 memcpy(mtod(m, void *), asa, salen); 1034 m->m_next = m0; 1035 m->m_pkthdr.len = salen + m0->m_pkthdr.len; 1036 1037 return m; 1038} 1039 1040int 1041sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa, 1042 struct mbuf *m0, int sbprio) 1043{ 1044 struct mbuf *m, *n, *n0, *nlast; 1045 int error; 1046 1047 KASSERT(solocked(sb->sb_so)); 1048 1049 /* 1050 * XXX sbprio reserved for encoding priority of this* request: 1051 * SB_PRIO_NONE --> honour normal sb limits 1052 * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space, 1053 * take whole chain. Intended for large requests 1054 * that should be delivered atomically (all, or none). 1055 * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow 1056 * over normal socket limits, for messages indicating 1057 * buffer overflow in earlier normal/lower-priority messages 1058 * SB_PRIO_BESTEFFORT --> ignore limits entirely. 1059 * Intended for kernel-generated messages only. 1060 * Up to generator to avoid total mbuf resource exhaustion. 1061 */ 1062 (void)sbprio; 1063 1064 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 1065 panic("sbappendaddrchain"); 1066 1067#ifdef notyet 1068 space = sbspace(sb); 1069 1070 /* 1071 * Enforce SB_PRIO_* limits as described above. 1072 */ 1073#endif 1074 1075 n0 = NULL; 1076 nlast = NULL; 1077 for (m = m0; m; m = m->m_nextpkt) { 1078 struct mbuf *np; 1079 1080#ifdef MBUFTRACE 1081 m_claimm(m, sb->sb_mowner); 1082#endif 1083 1084 /* Prepend sockaddr to this record (m) of input chain m0 */ 1085 n = m_prepend_sockaddr(sb, m, asa); 1086 if (n == NULL) { 1087 error = ENOBUFS; 1088 goto bad; 1089 } 1090 1091 /* Append record (asa+m) to end of new chain n0 */ 1092 if (n0 == NULL) { 1093 n0 = n; 1094 } else { 1095 nlast->m_nextpkt = n; 1096 } 1097 /* Keep track of last record on new chain */ 1098 nlast = n; 1099 1100 for (np = n; np; np = np->m_next) 1101 sballoc(sb, np); 1102 } 1103 1104 SBLASTRECORDCHK(sb, "sbappendaddrchain 1"); 1105 1106 /* Drop the entire chain of (asa+m) records onto the socket */ 1107 SBLINKRECORDCHAIN(sb, n0, nlast); 1108 1109 SBLASTRECORDCHK(sb, "sbappendaddrchain 2"); 1110 1111 for (m = nlast; m->m_next; m = m->m_next) 1112 ; 1113 sb->sb_mbtail = m; 1114 SBLASTMBUFCHK(sb, "sbappendaddrchain"); 1115 1116 return (1); 1117 1118bad: 1119 /* 1120 * On error, free the prepended addreseses. For consistency 1121 * with sbappendaddr(), leave it to our caller to free 1122 * the input record chain passed to us as m0. 1123 */ 1124 while ((n = n0) != NULL) { 1125 struct mbuf *np; 1126 1127 /* Undo the sballoc() of this record */ 1128 for (np = n; np; np = np->m_next) 1129 sbfree(sb, np); 1130 1131 n0 = n->m_nextpkt; /* iterate at next prepended address */ 1132 MFREE(n, np); /* free prepended address (not data) */ 1133 } 1134 return error; 1135} 1136 1137 1138int 1139sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 1140{ 1141 struct mbuf *m, *mlast, *n; 1142 int space; 1143 1144 KASSERT(solocked(sb->sb_so)); 1145 1146 space = 0; 1147 if (control == NULL) 1148 panic("sbappendcontrol"); 1149 for (m = control; ; m = m->m_next) { 1150 space += m->m_len; 1151 MCLAIM(m, sb->sb_mowner); 1152 if (m->m_next == NULL) 1153 break; 1154 } 1155 n = m; /* save pointer to last control buffer */ 1156 for (m = m0; m; m = m->m_next) { 1157 MCLAIM(m, sb->sb_mowner); 1158 space += m->m_len; 1159 } 1160 if (space > sbspace(sb)) 1161 return (0); 1162 n->m_next = m0; /* concatenate data to control */ 1163 1164 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 1165 1166 for (m = control; m->m_next != NULL; m = m->m_next) 1167 sballoc(sb, m); 1168 sballoc(sb, m); 1169 mlast = m; 1170 SBLINKRECORD(sb, control); 1171 1172 sb->sb_mbtail = mlast; 1173 SBLASTMBUFCHK(sb, "sbappendcontrol"); 1174 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 1175 1176 return (1); 1177} 1178 1179/* 1180 * Compress mbuf chain m into the socket 1181 * buffer sb following mbuf n. If n 1182 * is null, the buffer is presumed empty. 1183 */ 1184void 1185sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1186{ 1187 int eor; 1188 struct mbuf *o; 1189 1190 KASSERT(solocked(sb->sb_so)); 1191 1192 eor = 0; 1193 while (m) { 1194 eor |= m->m_flags & M_EOR; 1195 if (m->m_len == 0 && 1196 (eor == 0 || 1197 (((o = m->m_next) || (o = n)) && 1198 o->m_type == m->m_type))) { 1199 if (sb->sb_lastrecord == m) 1200 sb->sb_lastrecord = m->m_next; 1201 m = m_free(m); 1202 continue; 1203 } 1204 if (n && (n->m_flags & M_EOR) == 0 && 1205 /* M_TRAILINGSPACE() checks buffer writeability */ 1206 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 1207 m->m_len <= M_TRAILINGSPACE(n) && 1208 n->m_type == m->m_type) { 1209 memcpy(mtod(n, char *) + n->m_len, mtod(m, void *), 1210 (unsigned)m->m_len); 1211 n->m_len += m->m_len; 1212 sb->sb_cc += m->m_len; 1213 m = m_free(m); 1214 continue; 1215 } 1216 if (n) 1217 n->m_next = m; 1218 else 1219 sb->sb_mb = m; 1220 sb->sb_mbtail = m; 1221 sballoc(sb, m); 1222 n = m; 1223 m->m_flags &= ~M_EOR; 1224 m = m->m_next; 1225 n->m_next = 0; 1226 } 1227 if (eor) { 1228 if (n) 1229 n->m_flags |= eor; 1230 else 1231 printf("semi-panic: sbcompress\n"); 1232 } 1233 SBLASTMBUFCHK(sb, __func__); 1234} 1235 1236/* 1237 * Free all mbufs in a sockbuf. 1238 * Check that all resources are reclaimed. 1239 */ 1240void 1241sbflush(struct sockbuf *sb) 1242{ 1243 1244 KASSERT(solocked(sb->sb_so)); 1245 KASSERT((sb->sb_flags & SB_LOCK) == 0); 1246 1247 while (sb->sb_mbcnt) 1248 sbdrop(sb, (int)sb->sb_cc); 1249 1250 KASSERT(sb->sb_cc == 0); 1251 KASSERT(sb->sb_mb == NULL); 1252 KASSERT(sb->sb_mbtail == NULL); 1253 KASSERT(sb->sb_lastrecord == NULL); 1254} 1255 1256/* 1257 * Drop data from (the front of) a sockbuf. 1258 */ 1259void 1260sbdrop(struct sockbuf *sb, int len) 1261{ 1262 struct mbuf *m, *mn, *next; 1263 1264 KASSERT(solocked(sb->sb_so)); 1265 1266 next = (m = sb->sb_mb) ? m->m_nextpkt : NULL; 1267 while (len > 0) { 1268 if (m == NULL) { 1269 if (next == NULL) 1270 panic("sbdrop(%p,%d): cc=%lu", 1271 sb, len, sb->sb_cc); 1272 m = next; 1273 next = m->m_nextpkt; 1274 continue; 1275 } 1276 if (m->m_len > len) { 1277 m->m_len -= len; 1278 m->m_data += len; 1279 sb->sb_cc -= len; 1280 break; 1281 } 1282 len -= m->m_len; 1283 sbfree(sb, m); 1284 MFREE(m, mn); 1285 m = mn; 1286 } 1287 while (m && m->m_len == 0) { 1288 sbfree(sb, m); 1289 MFREE(m, mn); 1290 m = mn; 1291 } 1292 if (m) { 1293 sb->sb_mb = m; 1294 m->m_nextpkt = next; 1295 } else 1296 sb->sb_mb = next; 1297 /* 1298 * First part is an inline SB_EMPTY_FIXUP(). Second part 1299 * makes sure sb_lastrecord is up-to-date if we dropped 1300 * part of the last record. 1301 */ 1302 m = sb->sb_mb; 1303 if (m == NULL) { 1304 sb->sb_mbtail = NULL; 1305 sb->sb_lastrecord = NULL; 1306 } else if (m->m_nextpkt == NULL) 1307 sb->sb_lastrecord = m; 1308} 1309 1310/* 1311 * Drop a record off the front of a sockbuf 1312 * and move the next record to the front. 1313 */ 1314void 1315sbdroprecord(struct sockbuf *sb) 1316{ 1317 struct mbuf *m, *mn; 1318 1319 KASSERT(solocked(sb->sb_so)); 1320 1321 m = sb->sb_mb; 1322 if (m) { 1323 sb->sb_mb = m->m_nextpkt; 1324 do { 1325 sbfree(sb, m); 1326 MFREE(m, mn); 1327 } while ((m = mn) != NULL); 1328 } 1329 SB_EMPTY_FIXUP(sb); 1330} 1331 1332/* 1333 * Create a "control" mbuf containing the specified data 1334 * with the specified type for presentation on a socket buffer. 1335 */ 1336struct mbuf * 1337sbcreatecontrol1(void **p, int size, int type, int level, int flags) 1338{ 1339 struct cmsghdr *cp; 1340 struct mbuf *m; 1341 int space = CMSG_SPACE(size); 1342 1343 if ((flags & M_DONTWAIT) && space > MCLBYTES) { 1344 printf("%s: message too large %d\n", __func__, space); 1345 return NULL; 1346 } 1347 1348 if ((m = m_get(flags, MT_CONTROL)) == NULL) 1349 return NULL; 1350 if (space > MLEN) { 1351 if (space > MCLBYTES) 1352 MEXTMALLOC(m, space, M_WAITOK); 1353 else 1354 MCLGET(m, flags); 1355 if ((m->m_flags & M_EXT) == 0) { 1356 m_free(m); 1357 return NULL; 1358 } 1359 } 1360 cp = mtod(m, struct cmsghdr *); 1361 *p = CMSG_DATA(cp); 1362 m->m_len = space; 1363 cp->cmsg_len = CMSG_LEN(size); 1364 cp->cmsg_level = level; 1365 cp->cmsg_type = type; 1366 return m; 1367} 1368 1369struct mbuf * 1370sbcreatecontrol(void *p, int size, int type, int level) 1371{ 1372 struct mbuf *m; 1373 void *v; 1374 1375 m = sbcreatecontrol1(&v, size, type, level, M_DONTWAIT); 1376 if (m == NULL) 1377 return NULL; 1378 memcpy(v, p, size); 1379 return m; 1380} 1381 1382void 1383solockretry(struct socket *so, kmutex_t *lock) 1384{ 1385 1386 while (lock != so->so_lock) { 1387 mutex_exit(lock); 1388 lock = so->so_lock; 1389 mutex_enter(lock); 1390 } 1391} 1392 1393bool 1394solocked(struct socket *so) 1395{ 1396 1397 return mutex_owned(so->so_lock); 1398} 1399 1400bool 1401solocked2(struct socket *so1, struct socket *so2) 1402{ 1403 kmutex_t *lock; 1404 1405 lock = so1->so_lock; 1406 if (lock != so2->so_lock) 1407 return false; 1408 return mutex_owned(lock); 1409} 1410 1411/* 1412 * sosetlock: assign a default lock to a new socket. 1413 */ 1414void 1415sosetlock(struct socket *so) 1416{ 1417 if (so->so_lock == NULL) { 1418 kmutex_t *lock = softnet_lock; 1419 1420 so->so_lock = lock; 1421 mutex_obj_hold(lock); 1422 mutex_enter(lock); 1423 } 1424 1425 /* In all cases, lock must be held on return from PRU_ATTACH. */ 1426 KASSERT(solocked(so)); 1427} 1428 1429/* 1430 * Set lock on sockbuf sb; sleep if lock is already held. 1431 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. 1432 * Returns error without lock if sleep is interrupted. 1433 */ 1434int 1435sblock(struct sockbuf *sb, int wf) 1436{ 1437 struct socket *so; 1438 kmutex_t *lock; 1439 int error; 1440 1441 KASSERT(solocked(sb->sb_so)); 1442 1443 for (;;) { 1444 if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) { 1445 sb->sb_flags |= SB_LOCK; 1446 return 0; 1447 } 1448 if (wf != M_WAITOK) 1449 return EWOULDBLOCK; 1450 so = sb->sb_so; 1451 lock = so->so_lock; 1452 if ((sb->sb_flags & SB_NOINTR) != 0) { 1453 cv_wait(&so->so_cv, lock); 1454 error = 0; 1455 } else 1456 error = cv_wait_sig(&so->so_cv, lock); 1457 if (__predict_false(lock != so->so_lock)) 1458 solockretry(so, lock); 1459 if (error != 0) 1460 return error; 1461 } 1462} 1463 1464void 1465sbunlock(struct sockbuf *sb) 1466{ 1467 struct socket *so; 1468 1469 so = sb->sb_so; 1470 1471 KASSERT(solocked(so)); 1472 KASSERT((sb->sb_flags & SB_LOCK) != 0); 1473 1474 sb->sb_flags &= ~SB_LOCK; 1475 cv_broadcast(&so->so_cv); 1476} 1477 1478int 1479sowait(struct socket *so, bool catch, int timo) 1480{ 1481 kmutex_t *lock; 1482 int error; 1483 1484 KASSERT(solocked(so)); 1485 KASSERT(catch || timo != 0); 1486 1487 lock = so->so_lock; 1488 if (catch) 1489 error = cv_timedwait_sig(&so->so_cv, lock, timo); 1490 else 1491 error = cv_timedwait(&so->so_cv, lock, timo); 1492 if (__predict_false(lock != so->so_lock)) 1493 solockretry(so, lock); 1494 return error; 1495} 1496