uipc_socket2.c revision 1.57
1/* $NetBSD: uipc_socket2.c,v 1.57 2003/09/22 12:59:59 christos Exp $ */ 2 3/* 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 32 */ 33 34#include <sys/cdefs.h> 35__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.57 2003/09/22 12:59:59 christos Exp $"); 36 37#include "opt_mbuftrace.h" 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/proc.h> 42#include <sys/file.h> 43#include <sys/buf.h> 44#include <sys/malloc.h> 45#include <sys/mbuf.h> 46#include <sys/protosw.h> 47#include <sys/poll.h> 48#include <sys/socket.h> 49#include <sys/socketvar.h> 50#include <sys/signalvar.h> 51 52/* 53 * Primitive routines for operating on sockets and socket buffers 54 */ 55 56/* strings for sleep message: */ 57const char netcon[] = "netcon"; 58const char netcls[] = "netcls"; 59const char netio[] = "netio"; 60const char netlck[] = "netlck"; 61 62/* 63 * Procedures to manipulate state flags of socket 64 * and do appropriate wakeups. Normal sequence from the 65 * active (originating) side is that soisconnecting() is 66 * called during processing of connect() call, 67 * resulting in an eventual call to soisconnected() if/when the 68 * connection is established. When the connection is torn down 69 * soisdisconnecting() is called during processing of disconnect() call, 70 * and soisdisconnected() is called when the connection to the peer 71 * is totally severed. The semantics of these routines are such that 72 * connectionless protocols can call soisconnected() and soisdisconnected() 73 * only, bypassing the in-progress calls when setting up a ``connection'' 74 * takes no time. 75 * 76 * From the passive side, a socket is created with 77 * two queues of sockets: so_q0 for connections in progress 78 * and so_q for connections already made and awaiting user acceptance. 79 * As a protocol is preparing incoming connections, it creates a socket 80 * structure queued on so_q0 by calling sonewconn(). When the connection 81 * is established, soisconnected() is called, and transfers the 82 * socket structure to so_q, making it available to accept(). 83 * 84 * If a socket is closed with sockets on either 85 * so_q0 or so_q, these sockets are dropped. 86 * 87 * If higher level protocols are implemented in 88 * the kernel, the wakeups done here will sometimes 89 * cause software-interrupt process scheduling. 90 */ 91 92void 93soisconnecting(struct socket *so) 94{ 95 96 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 97 so->so_state |= SS_ISCONNECTING; 98} 99 100void 101soisconnected(struct socket *so) 102{ 103 struct socket *head; 104 105 head = so->so_head; 106 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 107 so->so_state |= SS_ISCONNECTED; 108 if (head && soqremque(so, 0)) { 109 soqinsque(head, so, 1); 110 sorwakeup(head); 111 wakeup((caddr_t)&head->so_timeo); 112 } else { 113 wakeup((caddr_t)&so->so_timeo); 114 sorwakeup(so); 115 sowwakeup(so); 116 } 117} 118 119void 120soisdisconnecting(struct socket *so) 121{ 122 123 so->so_state &= ~SS_ISCONNECTING; 124 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 125 wakeup((caddr_t)&so->so_timeo); 126 sowwakeup(so); 127 sorwakeup(so); 128} 129 130void 131soisdisconnected(struct socket *so) 132{ 133 134 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 135 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 136 wakeup((caddr_t)&so->so_timeo); 137 sowwakeup(so); 138 sorwakeup(so); 139} 140 141/* 142 * When an attempt at a new connection is noted on a socket 143 * which accepts connections, sonewconn is called. If the 144 * connection is possible (subject to space constraints, etc.) 145 * then we allocate a new structure, propoerly linked into the 146 * data structure of the original socket, and return this. 147 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 148 * 149 * Currently, sonewconn() is defined as sonewconn1() in socketvar.h 150 * to catch calls that are missing the (new) second parameter. 151 */ 152struct socket * 153sonewconn1(struct socket *head, int connstatus) 154{ 155 struct socket *so; 156 int soqueue; 157 158 soqueue = connstatus ? 1 : 0; 159 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 160 return ((struct socket *)0); 161 so = pool_get(&socket_pool, PR_NOWAIT); 162 if (so == NULL) 163 return (NULL); 164 memset((caddr_t)so, 0, sizeof(*so)); 165 so->so_type = head->so_type; 166 so->so_options = head->so_options &~ SO_ACCEPTCONN; 167 so->so_linger = head->so_linger; 168 so->so_state = head->so_state | SS_NOFDREF; 169 so->so_proto = head->so_proto; 170 so->so_timeo = head->so_timeo; 171 so->so_pgid = head->so_pgid; 172 so->so_send = head->so_send; 173 so->so_receive = head->so_receive; 174 so->so_uid = head->so_uid; 175#ifdef MBUFTRACE 176 so->so_mowner = head->so_mowner; 177 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 178 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 179#endif 180 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 181 soqinsque(head, so, soqueue); 182 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 183 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 184 (struct proc *)0)) { 185 (void) soqremque(so, soqueue); 186 pool_put(&socket_pool, so); 187 return (NULL); 188 } 189 if (connstatus) { 190 sorwakeup(head); 191 wakeup((caddr_t)&head->so_timeo); 192 so->so_state |= connstatus; 193 } 194 return (so); 195} 196 197void 198soqinsque(struct socket *head, struct socket *so, int q) 199{ 200 201#ifdef DIAGNOSTIC 202 if (so->so_onq != NULL) 203 panic("soqinsque"); 204#endif 205 206 so->so_head = head; 207 if (q == 0) { 208 head->so_q0len++; 209 so->so_onq = &head->so_q0; 210 } else { 211 head->so_qlen++; 212 so->so_onq = &head->so_q; 213 } 214 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 215} 216 217int 218soqremque(struct socket *so, int q) 219{ 220 struct socket *head; 221 222 head = so->so_head; 223 if (q == 0) { 224 if (so->so_onq != &head->so_q0) 225 return (0); 226 head->so_q0len--; 227 } else { 228 if (so->so_onq != &head->so_q) 229 return (0); 230 head->so_qlen--; 231 } 232 TAILQ_REMOVE(so->so_onq, so, so_qe); 233 so->so_onq = NULL; 234 so->so_head = NULL; 235 return (1); 236} 237 238/* 239 * Socantsendmore indicates that no more data will be sent on the 240 * socket; it would normally be applied to a socket when the user 241 * informs the system that no more data is to be sent, by the protocol 242 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 243 * will be received, and will normally be applied to the socket by a 244 * protocol when it detects that the peer will send no more data. 245 * Data queued for reading in the socket may yet be read. 246 */ 247 248void 249socantsendmore(struct socket *so) 250{ 251 252 so->so_state |= SS_CANTSENDMORE; 253 sowwakeup(so); 254} 255 256void 257socantrcvmore(struct socket *so) 258{ 259 260 so->so_state |= SS_CANTRCVMORE; 261 sorwakeup(so); 262} 263 264/* 265 * Wait for data to arrive at/drain from a socket buffer. 266 */ 267int 268sbwait(struct sockbuf *sb) 269{ 270 271 sb->sb_flags |= SB_WAIT; 272 return (tsleep((caddr_t)&sb->sb_cc, 273 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio, 274 sb->sb_timeo)); 275} 276 277/* 278 * Lock a sockbuf already known to be locked; 279 * return any error returned from sleep (EINTR). 280 */ 281int 282sb_lock(struct sockbuf *sb) 283{ 284 int error; 285 286 while (sb->sb_flags & SB_LOCK) { 287 sb->sb_flags |= SB_WANT; 288 error = tsleep((caddr_t)&sb->sb_flags, 289 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, 290 netlck, 0); 291 if (error) 292 return (error); 293 } 294 sb->sb_flags |= SB_LOCK; 295 return (0); 296} 297 298/* 299 * Wakeup processes waiting on a socket buffer. 300 * Do asynchronous notification via SIGIO 301 * if the socket buffer has the SB_ASYNC flag set. 302 */ 303void 304sowakeup(struct socket *so, struct sockbuf *sb, int code) 305{ 306 selnotify(&sb->sb_sel, 0); 307 sb->sb_flags &= ~SB_SEL; 308 if (sb->sb_flags & SB_WAIT) { 309 sb->sb_flags &= ~SB_WAIT; 310 wakeup((caddr_t)&sb->sb_cc); 311 } 312 if (sb->sb_flags & SB_ASYNC) { 313 int band; 314 if (code == POLL_IN) 315 band = POLLIN|POLLRDNORM; 316 else 317 band = POLLOUT|POLLWRNORM; 318 fownsignal(so->so_pgid, SIGIO, code, band, so); 319 } 320 if (sb->sb_flags & SB_UPCALL) 321 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 322} 323 324/* 325 * Socket buffer (struct sockbuf) utility routines. 326 * 327 * Each socket contains two socket buffers: one for sending data and 328 * one for receiving data. Each buffer contains a queue of mbufs, 329 * information about the number of mbufs and amount of data in the 330 * queue, and other fields allowing poll() statements and notification 331 * on data availability to be implemented. 332 * 333 * Data stored in a socket buffer is maintained as a list of records. 334 * Each record is a list of mbufs chained together with the m_next 335 * field. Records are chained together with the m_nextpkt field. The upper 336 * level routine soreceive() expects the following conventions to be 337 * observed when placing information in the receive buffer: 338 * 339 * 1. If the protocol requires each message be preceded by the sender's 340 * name, then a record containing that name must be present before 341 * any associated data (mbuf's must be of type MT_SONAME). 342 * 2. If the protocol supports the exchange of ``access rights'' (really 343 * just additional data associated with the message), and there are 344 * ``rights'' to be received, then a record containing this data 345 * should be present (mbuf's must be of type MT_CONTROL). 346 * 3. If a name or rights record exists, then it must be followed by 347 * a data record, perhaps of zero length. 348 * 349 * Before using a new socket structure it is first necessary to reserve 350 * buffer space to the socket, by calling sbreserve(). This should commit 351 * some of the available buffer space in the system buffer pool for the 352 * socket (currently, it does nothing but enforce limits). The space 353 * should be released by calling sbrelease() when the socket is destroyed. 354 */ 355 356int 357soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 358{ 359 360 if (sbreserve(&so->so_snd, sndcc) == 0) 361 goto bad; 362 if (sbreserve(&so->so_rcv, rcvcc) == 0) 363 goto bad2; 364 if (so->so_rcv.sb_lowat == 0) 365 so->so_rcv.sb_lowat = 1; 366 if (so->so_snd.sb_lowat == 0) 367 so->so_snd.sb_lowat = MCLBYTES; 368 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 369 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 370 return (0); 371 bad2: 372 sbrelease(&so->so_snd); 373 bad: 374 return (ENOBUFS); 375} 376 377/* 378 * Allot mbufs to a sockbuf. 379 * Attempt to scale mbmax so that mbcnt doesn't become limiting 380 * if buffering efficiency is near the normal case. 381 */ 382int 383sbreserve(struct sockbuf *sb, u_long cc) 384{ 385 386 if (cc == 0 || 387 (u_quad_t) cc > (u_quad_t) sb_max * MCLBYTES / (MSIZE + MCLBYTES)) 388 return (0); 389 sb->sb_hiwat = cc; 390 sb->sb_mbmax = min(cc * 2, sb_max); 391 if (sb->sb_lowat > sb->sb_hiwat) 392 sb->sb_lowat = sb->sb_hiwat; 393 return (1); 394} 395 396/* 397 * Free mbufs held by a socket, and reserved mbuf space. 398 */ 399void 400sbrelease(struct sockbuf *sb) 401{ 402 403 sbflush(sb); 404 sb->sb_hiwat = sb->sb_mbmax = 0; 405} 406 407/* 408 * Routines to add and remove 409 * data from an mbuf queue. 410 * 411 * The routines sbappend() or sbappendrecord() are normally called to 412 * append new mbufs to a socket buffer, after checking that adequate 413 * space is available, comparing the function sbspace() with the amount 414 * of data to be added. sbappendrecord() differs from sbappend() in 415 * that data supplied is treated as the beginning of a new record. 416 * To place a sender's address, optional access rights, and data in a 417 * socket receive buffer, sbappendaddr() should be used. To place 418 * access rights and data in a socket receive buffer, sbappendrights() 419 * should be used. In either case, the new data begins a new record. 420 * Note that unlike sbappend() and sbappendrecord(), these routines check 421 * for the caller that there will be enough space to store the data. 422 * Each fails if there is not enough space, or if it cannot find mbufs 423 * to store additional information in. 424 * 425 * Reliable protocols may use the socket send buffer to hold data 426 * awaiting acknowledgement. Data is normally copied from a socket 427 * send buffer in a protocol with m_copy for output to a peer, 428 * and then removing the data from the socket buffer with sbdrop() 429 * or sbdroprecord() when the data is acknowledged by the peer. 430 */ 431 432#ifdef SOCKBUF_DEBUG 433void 434sblastrecordchk(struct sockbuf *sb, const char *where) 435{ 436 struct mbuf *m = sb->sb_mb; 437 438 while (m && m->m_nextpkt) 439 m = m->m_nextpkt; 440 441 if (m != sb->sb_lastrecord) { 442 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 443 sb->sb_mb, sb->sb_lastrecord, m); 444 printf("packet chain:\n"); 445 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 446 printf("\t%p\n", m); 447 panic("sblastrecordchk from %s", where); 448 } 449} 450 451void 452sblastmbufchk(struct sockbuf *sb, const char *where) 453{ 454 struct mbuf *m = sb->sb_mb; 455 struct mbuf *n; 456 457 while (m && m->m_nextpkt) 458 m = m->m_nextpkt; 459 460 while (m && m->m_next) 461 m = m->m_next; 462 463 if (m != sb->sb_mbtail) { 464 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 465 sb->sb_mb, sb->sb_mbtail, m); 466 printf("packet tree:\n"); 467 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 468 printf("\t"); 469 for (n = m; n != NULL; n = n->m_next) 470 printf("%p ", n); 471 printf("\n"); 472 } 473 panic("sblastmbufchk from %s", where); 474 } 475} 476#endif /* SOCKBUF_DEBUG */ 477 478#define SBLINKRECORD(sb, m0) \ 479do { \ 480 if ((sb)->sb_lastrecord != NULL) \ 481 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 482 else \ 483 (sb)->sb_mb = (m0); \ 484 (sb)->sb_lastrecord = (m0); \ 485} while (/*CONSTCOND*/0) 486 487/* 488 * Append mbuf chain m to the last record in the 489 * socket buffer sb. The additional space associated 490 * the mbuf chain is recorded in sb. Empty mbufs are 491 * discarded and mbufs are compacted where possible. 492 */ 493void 494sbappend(struct sockbuf *sb, struct mbuf *m) 495{ 496 struct mbuf *n; 497 498 if (m == 0) 499 return; 500 501#ifdef MBUFTRACE 502 m_claim(m, sb->sb_mowner); 503#endif 504 505 SBLASTRECORDCHK(sb, "sbappend 1"); 506 507 if ((n = sb->sb_lastrecord) != NULL) { 508 /* 509 * XXX Would like to simply use sb_mbtail here, but 510 * XXX I need to verify that I won't miss an EOR that 511 * XXX way. 512 */ 513 do { 514 if (n->m_flags & M_EOR) { 515 sbappendrecord(sb, m); /* XXXXXX!!!! */ 516 return; 517 } 518 } while (n->m_next && (n = n->m_next)); 519 } else { 520 /* 521 * If this is the first record in the socket buffer, it's 522 * also the last record. 523 */ 524 sb->sb_lastrecord = m; 525 } 526 sbcompress(sb, m, n); 527 SBLASTRECORDCHK(sb, "sbappend 2"); 528} 529 530/* 531 * This version of sbappend() should only be used when the caller 532 * absolutely knows that there will never be more than one record 533 * in the socket buffer, that is, a stream protocol (such as TCP). 534 */ 535void 536sbappendstream(struct sockbuf *sb, struct mbuf *m) 537{ 538 539 KDASSERT(m->m_nextpkt == NULL); 540 KASSERT(sb->sb_mb == sb->sb_lastrecord); 541 542 SBLASTMBUFCHK(sb, __func__); 543 544#ifdef MBUFTRACE 545 m_claim(m, sb->sb_mowner); 546#endif 547 548 sbcompress(sb, m, sb->sb_mbtail); 549 550 sb->sb_lastrecord = sb->sb_mb; 551 SBLASTRECORDCHK(sb, __func__); 552} 553 554#ifdef SOCKBUF_DEBUG 555void 556sbcheck(struct sockbuf *sb) 557{ 558 struct mbuf *m; 559 u_long len, mbcnt; 560 561 len = 0; 562 mbcnt = 0; 563 for (m = sb->sb_mb; m; m = m->m_next) { 564 len += m->m_len; 565 mbcnt += MSIZE; 566 if (m->m_flags & M_EXT) 567 mbcnt += m->m_ext.ext_size; 568 if (m->m_nextpkt) 569 panic("sbcheck nextpkt"); 570 } 571 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 572 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 573 mbcnt, sb->sb_mbcnt); 574 panic("sbcheck"); 575 } 576} 577#endif 578 579/* 580 * As above, except the mbuf chain 581 * begins a new record. 582 */ 583void 584sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 585{ 586 struct mbuf *m; 587 588 if (m0 == 0) 589 return; 590 591#ifdef MBUFTRACE 592 m_claim(m0, sb->sb_mowner); 593#endif 594 /* 595 * Put the first mbuf on the queue. 596 * Note this permits zero length records. 597 */ 598 sballoc(sb, m0); 599 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 600 SBLINKRECORD(sb, m0); 601 m = m0->m_next; 602 m0->m_next = 0; 603 if (m && (m0->m_flags & M_EOR)) { 604 m0->m_flags &= ~M_EOR; 605 m->m_flags |= M_EOR; 606 } 607 sbcompress(sb, m, m0); 608 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 609} 610 611/* 612 * As above except that OOB data 613 * is inserted at the beginning of the sockbuf, 614 * but after any other OOB data. 615 */ 616void 617sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 618{ 619 struct mbuf *m, **mp; 620 621 if (m0 == 0) 622 return; 623 624 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 625 626 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 627 again: 628 switch (m->m_type) { 629 630 case MT_OOBDATA: 631 continue; /* WANT next train */ 632 633 case MT_CONTROL: 634 if ((m = m->m_next) != NULL) 635 goto again; /* inspect THIS train further */ 636 } 637 break; 638 } 639 /* 640 * Put the first mbuf on the queue. 641 * Note this permits zero length records. 642 */ 643 sballoc(sb, m0); 644 m0->m_nextpkt = *mp; 645 if (*mp == NULL) { 646 /* m0 is actually the new tail */ 647 sb->sb_lastrecord = m0; 648 } 649 *mp = m0; 650 m = m0->m_next; 651 m0->m_next = 0; 652 if (m && (m0->m_flags & M_EOR)) { 653 m0->m_flags &= ~M_EOR; 654 m->m_flags |= M_EOR; 655 } 656 sbcompress(sb, m, m0); 657 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 658} 659 660/* 661 * Append address and data, and optionally, control (ancillary) data 662 * to the receive queue of a socket. If present, 663 * m0 must include a packet header with total length. 664 * Returns 0 if no space in sockbuf or insufficient mbufs. 665 */ 666int 667sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, 668 struct mbuf *control) 669{ 670 struct mbuf *m, *n, *nlast; 671 int space, len; 672 673 space = asa->sa_len; 674 675 if (m0 != NULL) { 676 if ((m0->m_flags & M_PKTHDR) == 0) 677 panic("sbappendaddr"); 678 space += m0->m_pkthdr.len; 679#ifdef MBUFTRACE 680 m_claim(m0, sb->sb_mowner); 681#endif 682 } 683 for (n = control; n; n = n->m_next) { 684 space += n->m_len; 685 MCLAIM(n, sb->sb_mowner); 686 if (n->m_next == 0) /* keep pointer to last control buf */ 687 break; 688 } 689 if (space > sbspace(sb)) 690 return (0); 691 MGET(m, M_DONTWAIT, MT_SONAME); 692 if (m == 0) 693 return (0); 694 MCLAIM(m, sb->sb_mowner); 695 /* 696 * XXX avoid 'comparison always true' warning which isn't easily 697 * avoided. 698 */ 699 len = asa->sa_len; 700 if (len > MLEN) { 701 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 702 if ((m->m_flags & M_EXT) == 0) { 703 m_free(m); 704 return (0); 705 } 706 } 707 m->m_len = asa->sa_len; 708 memcpy(mtod(m, caddr_t), (caddr_t)asa, asa->sa_len); 709 if (n) 710 n->m_next = m0; /* concatenate data to control */ 711 else 712 control = m0; 713 m->m_next = control; 714 715 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 716 717 for (n = m; n->m_next != NULL; n = n->m_next) 718 sballoc(sb, n); 719 sballoc(sb, n); 720 nlast = n; 721 SBLINKRECORD(sb, m); 722 723 sb->sb_mbtail = nlast; 724 SBLASTMBUFCHK(sb, "sbappendaddr"); 725 726 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 727 728 return (1); 729} 730 731int 732sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 733{ 734 struct mbuf *m, *mlast, *n; 735 int space; 736 737 space = 0; 738 if (control == 0) 739 panic("sbappendcontrol"); 740 for (m = control; ; m = m->m_next) { 741 space += m->m_len; 742 MCLAIM(m, sb->sb_mowner); 743 if (m->m_next == 0) 744 break; 745 } 746 n = m; /* save pointer to last control buffer */ 747 for (m = m0; m; m = m->m_next) { 748 MCLAIM(m, sb->sb_mowner); 749 space += m->m_len; 750 } 751 if (space > sbspace(sb)) 752 return (0); 753 n->m_next = m0; /* concatenate data to control */ 754 755 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 756 757 for (m = control; m->m_next != NULL; m = m->m_next) 758 sballoc(sb, m); 759 sballoc(sb, m); 760 mlast = m; 761 SBLINKRECORD(sb, control); 762 763 sb->sb_mbtail = mlast; 764 SBLASTMBUFCHK(sb, "sbappendcontrol"); 765 766 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 767 768 return (1); 769} 770 771/* 772 * Compress mbuf chain m into the socket 773 * buffer sb following mbuf n. If n 774 * is null, the buffer is presumed empty. 775 */ 776void 777sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 778{ 779 int eor; 780 struct mbuf *o; 781 782 eor = 0; 783 while (m) { 784 eor |= m->m_flags & M_EOR; 785 if (m->m_len == 0 && 786 (eor == 0 || 787 (((o = m->m_next) || (o = n)) && 788 o->m_type == m->m_type))) { 789 if (sb->sb_lastrecord == m) 790 sb->sb_lastrecord = m->m_next; 791 m = m_free(m); 792 continue; 793 } 794 if (n && (n->m_flags & M_EOR) == 0 && 795 /* M_TRAILINGSPACE() checks buffer writeability */ 796 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 797 m->m_len <= M_TRAILINGSPACE(n) && 798 n->m_type == m->m_type) { 799 memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t), 800 (unsigned)m->m_len); 801 n->m_len += m->m_len; 802 sb->sb_cc += m->m_len; 803 m = m_free(m); 804 continue; 805 } 806 if (n) 807 n->m_next = m; 808 else 809 sb->sb_mb = m; 810 sb->sb_mbtail = m; 811 sballoc(sb, m); 812 n = m; 813 m->m_flags &= ~M_EOR; 814 m = m->m_next; 815 n->m_next = 0; 816 } 817 if (eor) { 818 if (n) 819 n->m_flags |= eor; 820 else 821 printf("semi-panic: sbcompress\n"); 822 } 823 SBLASTMBUFCHK(sb, __func__); 824} 825 826/* 827 * Free all mbufs in a sockbuf. 828 * Check that all resources are reclaimed. 829 */ 830void 831sbflush(struct sockbuf *sb) 832{ 833 834 KASSERT((sb->sb_flags & SB_LOCK) == 0); 835 836 while (sb->sb_mbcnt) 837 sbdrop(sb, (int)sb->sb_cc); 838 839 KASSERT(sb->sb_cc == 0); 840 KASSERT(sb->sb_mb == NULL); 841 KASSERT(sb->sb_mbtail == NULL); 842 KASSERT(sb->sb_lastrecord == NULL); 843} 844 845/* 846 * Drop data from (the front of) a sockbuf. 847 */ 848void 849sbdrop(struct sockbuf *sb, int len) 850{ 851 struct mbuf *m, *mn, *next; 852 853 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 854 while (len > 0) { 855 if (m == 0) { 856 if (next == 0) 857 panic("sbdrop"); 858 m = next; 859 next = m->m_nextpkt; 860 continue; 861 } 862 if (m->m_len > len) { 863 m->m_len -= len; 864 m->m_data += len; 865 sb->sb_cc -= len; 866 break; 867 } 868 len -= m->m_len; 869 sbfree(sb, m); 870 MFREE(m, mn); 871 m = mn; 872 } 873 while (m && m->m_len == 0) { 874 sbfree(sb, m); 875 MFREE(m, mn); 876 m = mn; 877 } 878 if (m) { 879 sb->sb_mb = m; 880 m->m_nextpkt = next; 881 } else 882 sb->sb_mb = next; 883 /* 884 * First part is an inline SB_EMPTY_FIXUP(). Second part 885 * makes sure sb_lastrecord is up-to-date if we dropped 886 * part of the last record. 887 */ 888 m = sb->sb_mb; 889 if (m == NULL) { 890 sb->sb_mbtail = NULL; 891 sb->sb_lastrecord = NULL; 892 } else if (m->m_nextpkt == NULL) 893 sb->sb_lastrecord = m; 894} 895 896/* 897 * Drop a record off the front of a sockbuf 898 * and move the next record to the front. 899 */ 900void 901sbdroprecord(struct sockbuf *sb) 902{ 903 struct mbuf *m, *mn; 904 905 m = sb->sb_mb; 906 if (m) { 907 sb->sb_mb = m->m_nextpkt; 908 do { 909 sbfree(sb, m); 910 MFREE(m, mn); 911 } while ((m = mn) != NULL); 912 } 913 SB_EMPTY_FIXUP(sb); 914} 915 916/* 917 * Create a "control" mbuf containing the specified data 918 * with the specified type for presentation on a socket buffer. 919 */ 920struct mbuf * 921sbcreatecontrol(caddr_t p, int size, int type, int level) 922{ 923 struct cmsghdr *cp; 924 struct mbuf *m; 925 926 if (CMSG_SPACE(size) > MCLBYTES) { 927 printf("sbcreatecontrol: message too large %d\n", size); 928 return NULL; 929 } 930 931 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 932 return ((struct mbuf *) NULL); 933 if (CMSG_SPACE(size) > MLEN) { 934 MCLGET(m, M_DONTWAIT); 935 if ((m->m_flags & M_EXT) == 0) { 936 m_free(m); 937 return NULL; 938 } 939 } 940 cp = mtod(m, struct cmsghdr *); 941 memcpy(CMSG_DATA(cp), p, size); 942 m->m_len = CMSG_SPACE(size); 943 cp->cmsg_len = CMSG_LEN(size); 944 cp->cmsg_level = level; 945 cp->cmsg_type = type; 946 return (m); 947} 948