uipc_socket2.c revision 1.56
1/* $NetBSD: uipc_socket2.c,v 1.56 2003/09/21 19:17:11 jdolecek Exp $ */ 2 3/* 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 32 */ 33 34#include <sys/cdefs.h> 35__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.56 2003/09/21 19:17:11 jdolecek Exp $"); 36 37#include "opt_mbuftrace.h" 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/proc.h> 42#include <sys/file.h> 43#include <sys/buf.h> 44#include <sys/malloc.h> 45#include <sys/mbuf.h> 46#include <sys/protosw.h> 47#include <sys/poll.h> 48#include <sys/socket.h> 49#include <sys/socketvar.h> 50#include <sys/signalvar.h> 51 52/* 53 * Primitive routines for operating on sockets and socket buffers 54 */ 55 56/* strings for sleep message: */ 57const char netcon[] = "netcon"; 58const char netcls[] = "netcls"; 59const char netio[] = "netio"; 60const char netlck[] = "netlck"; 61 62/* 63 * Procedures to manipulate state flags of socket 64 * and do appropriate wakeups. Normal sequence from the 65 * active (originating) side is that soisconnecting() is 66 * called during processing of connect() call, 67 * resulting in an eventual call to soisconnected() if/when the 68 * connection is established. When the connection is torn down 69 * soisdisconnecting() is called during processing of disconnect() call, 70 * and soisdisconnected() is called when the connection to the peer 71 * is totally severed. The semantics of these routines are such that 72 * connectionless protocols can call soisconnected() and soisdisconnected() 73 * only, bypassing the in-progress calls when setting up a ``connection'' 74 * takes no time. 75 * 76 * From the passive side, a socket is created with 77 * two queues of sockets: so_q0 for connections in progress 78 * and so_q for connections already made and awaiting user acceptance. 79 * As a protocol is preparing incoming connections, it creates a socket 80 * structure queued on so_q0 by calling sonewconn(). When the connection 81 * is established, soisconnected() is called, and transfers the 82 * socket structure to so_q, making it available to accept(). 83 * 84 * If a socket is closed with sockets on either 85 * so_q0 or so_q, these sockets are dropped. 86 * 87 * If higher level protocols are implemented in 88 * the kernel, the wakeups done here will sometimes 89 * cause software-interrupt process scheduling. 90 */ 91 92void 93soisconnecting(struct socket *so) 94{ 95 96 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 97 so->so_state |= SS_ISCONNECTING; 98} 99 100void 101soisconnected(struct socket *so) 102{ 103 struct socket *head; 104 105 head = so->so_head; 106 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 107 so->so_state |= SS_ISCONNECTED; 108 if (head && soqremque(so, 0)) { 109 soqinsque(head, so, 1); 110 sorwakeup(head); 111 wakeup((caddr_t)&head->so_timeo); 112 } else { 113 wakeup((caddr_t)&so->so_timeo); 114 sorwakeup(so); 115 sowwakeup(so); 116 } 117} 118 119void 120soisdisconnecting(struct socket *so) 121{ 122 123 so->so_state &= ~SS_ISCONNECTING; 124 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 125 wakeup((caddr_t)&so->so_timeo); 126 sowwakeup(so); 127 sorwakeup(so); 128} 129 130void 131soisdisconnected(struct socket *so) 132{ 133 134 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 135 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 136 wakeup((caddr_t)&so->so_timeo); 137 sowwakeup(so); 138 sorwakeup(so); 139} 140 141/* 142 * When an attempt at a new connection is noted on a socket 143 * which accepts connections, sonewconn is called. If the 144 * connection is possible (subject to space constraints, etc.) 145 * then we allocate a new structure, propoerly linked into the 146 * data structure of the original socket, and return this. 147 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 148 * 149 * Currently, sonewconn() is defined as sonewconn1() in socketvar.h 150 * to catch calls that are missing the (new) second parameter. 151 */ 152struct socket * 153sonewconn1(struct socket *head, int connstatus) 154{ 155 struct socket *so; 156 int soqueue; 157 158 soqueue = connstatus ? 1 : 0; 159 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 160 return ((struct socket *)0); 161 so = pool_get(&socket_pool, PR_NOWAIT); 162 if (so == NULL) 163 return (NULL); 164 memset((caddr_t)so, 0, sizeof(*so)); 165 so->so_type = head->so_type; 166 so->so_options = head->so_options &~ SO_ACCEPTCONN; 167 so->so_linger = head->so_linger; 168 so->so_state = head->so_state | SS_NOFDREF; 169 so->so_proto = head->so_proto; 170 so->so_timeo = head->so_timeo; 171 so->so_pgid = head->so_pgid; 172 so->so_send = head->so_send; 173 so->so_receive = head->so_receive; 174 so->so_uid = head->so_uid; 175#ifdef MBUFTRACE 176 so->so_mowner = head->so_mowner; 177 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 178 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 179#endif 180 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 181 soqinsque(head, so, soqueue); 182 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 183 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 184 (struct proc *)0)) { 185 (void) soqremque(so, soqueue); 186 pool_put(&socket_pool, so); 187 return (NULL); 188 } 189 if (connstatus) { 190 sorwakeup(head); 191 wakeup((caddr_t)&head->so_timeo); 192 so->so_state |= connstatus; 193 } 194 return (so); 195} 196 197void 198soqinsque(struct socket *head, struct socket *so, int q) 199{ 200 201#ifdef DIAGNOSTIC 202 if (so->so_onq != NULL) 203 panic("soqinsque"); 204#endif 205 206 so->so_head = head; 207 if (q == 0) { 208 head->so_q0len++; 209 so->so_onq = &head->so_q0; 210 } else { 211 head->so_qlen++; 212 so->so_onq = &head->so_q; 213 } 214 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 215} 216 217int 218soqremque(struct socket *so, int q) 219{ 220 struct socket *head; 221 222 head = so->so_head; 223 if (q == 0) { 224 if (so->so_onq != &head->so_q0) 225 return (0); 226 head->so_q0len--; 227 } else { 228 if (so->so_onq != &head->so_q) 229 return (0); 230 head->so_qlen--; 231 } 232 TAILQ_REMOVE(so->so_onq, so, so_qe); 233 so->so_onq = NULL; 234 so->so_head = NULL; 235 return (1); 236} 237 238/* 239 * Socantsendmore indicates that no more data will be sent on the 240 * socket; it would normally be applied to a socket when the user 241 * informs the system that no more data is to be sent, by the protocol 242 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 243 * will be received, and will normally be applied to the socket by a 244 * protocol when it detects that the peer will send no more data. 245 * Data queued for reading in the socket may yet be read. 246 */ 247 248void 249socantsendmore(struct socket *so) 250{ 251 252 so->so_state |= SS_CANTSENDMORE; 253 sowwakeup(so); 254} 255 256void 257socantrcvmore(struct socket *so) 258{ 259 260 so->so_state |= SS_CANTRCVMORE; 261 sorwakeup(so); 262} 263 264/* 265 * Wait for data to arrive at/drain from a socket buffer. 266 */ 267int 268sbwait(struct sockbuf *sb) 269{ 270 271 sb->sb_flags |= SB_WAIT; 272 return (tsleep((caddr_t)&sb->sb_cc, 273 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio, 274 sb->sb_timeo)); 275} 276 277/* 278 * Lock a sockbuf already known to be locked; 279 * return any error returned from sleep (EINTR). 280 */ 281int 282sb_lock(struct sockbuf *sb) 283{ 284 int error; 285 286 while (sb->sb_flags & SB_LOCK) { 287 sb->sb_flags |= SB_WANT; 288 error = tsleep((caddr_t)&sb->sb_flags, 289 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, 290 netlck, 0); 291 if (error) 292 return (error); 293 } 294 sb->sb_flags |= SB_LOCK; 295 return (0); 296} 297 298/* 299 * Wakeup processes waiting on a socket buffer. 300 * Do asynchronous notification via SIGIO 301 * if the socket buffer has the SB_ASYNC flag set. 302 */ 303void 304sowakeup(struct socket *so, struct sockbuf *sb, int code) 305{ 306 selnotify(&sb->sb_sel, 0); 307 sb->sb_flags &= ~SB_SEL; 308 if (sb->sb_flags & SB_WAIT) { 309 sb->sb_flags &= ~SB_WAIT; 310 wakeup((caddr_t)&sb->sb_cc); 311 } 312 if (sb->sb_flags & SB_ASYNC) { 313 int band; 314 if (code == POLL_IN) { 315 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 316 band = (POLLPRI | POLLRDBAND); 317 else 318 band = (POLLIN | POLLRDNORM); 319 } else { 320 if (so->so_oobmark) 321 band = (POLLPRI | POLLWRBAND); 322 else 323 band = (POLLOUT | POLLWRNORM); 324 } 325 fownsignal(so->so_pgid, code, band, so); 326 } 327 if (sb->sb_flags & SB_UPCALL) 328 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 329} 330 331/* 332 * Socket buffer (struct sockbuf) utility routines. 333 * 334 * Each socket contains two socket buffers: one for sending data and 335 * one for receiving data. Each buffer contains a queue of mbufs, 336 * information about the number of mbufs and amount of data in the 337 * queue, and other fields allowing poll() statements and notification 338 * on data availability to be implemented. 339 * 340 * Data stored in a socket buffer is maintained as a list of records. 341 * Each record is a list of mbufs chained together with the m_next 342 * field. Records are chained together with the m_nextpkt field. The upper 343 * level routine soreceive() expects the following conventions to be 344 * observed when placing information in the receive buffer: 345 * 346 * 1. If the protocol requires each message be preceded by the sender's 347 * name, then a record containing that name must be present before 348 * any associated data (mbuf's must be of type MT_SONAME). 349 * 2. If the protocol supports the exchange of ``access rights'' (really 350 * just additional data associated with the message), and there are 351 * ``rights'' to be received, then a record containing this data 352 * should be present (mbuf's must be of type MT_CONTROL). 353 * 3. If a name or rights record exists, then it must be followed by 354 * a data record, perhaps of zero length. 355 * 356 * Before using a new socket structure it is first necessary to reserve 357 * buffer space to the socket, by calling sbreserve(). This should commit 358 * some of the available buffer space in the system buffer pool for the 359 * socket (currently, it does nothing but enforce limits). The space 360 * should be released by calling sbrelease() when the socket is destroyed. 361 */ 362 363int 364soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 365{ 366 367 if (sbreserve(&so->so_snd, sndcc) == 0) 368 goto bad; 369 if (sbreserve(&so->so_rcv, rcvcc) == 0) 370 goto bad2; 371 if (so->so_rcv.sb_lowat == 0) 372 so->so_rcv.sb_lowat = 1; 373 if (so->so_snd.sb_lowat == 0) 374 so->so_snd.sb_lowat = MCLBYTES; 375 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 376 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 377 return (0); 378 bad2: 379 sbrelease(&so->so_snd); 380 bad: 381 return (ENOBUFS); 382} 383 384/* 385 * Allot mbufs to a sockbuf. 386 * Attempt to scale mbmax so that mbcnt doesn't become limiting 387 * if buffering efficiency is near the normal case. 388 */ 389int 390sbreserve(struct sockbuf *sb, u_long cc) 391{ 392 393 if (cc == 0 || 394 (u_quad_t) cc > (u_quad_t) sb_max * MCLBYTES / (MSIZE + MCLBYTES)) 395 return (0); 396 sb->sb_hiwat = cc; 397 sb->sb_mbmax = min(cc * 2, sb_max); 398 if (sb->sb_lowat > sb->sb_hiwat) 399 sb->sb_lowat = sb->sb_hiwat; 400 return (1); 401} 402 403/* 404 * Free mbufs held by a socket, and reserved mbuf space. 405 */ 406void 407sbrelease(struct sockbuf *sb) 408{ 409 410 sbflush(sb); 411 sb->sb_hiwat = sb->sb_mbmax = 0; 412} 413 414/* 415 * Routines to add and remove 416 * data from an mbuf queue. 417 * 418 * The routines sbappend() or sbappendrecord() are normally called to 419 * append new mbufs to a socket buffer, after checking that adequate 420 * space is available, comparing the function sbspace() with the amount 421 * of data to be added. sbappendrecord() differs from sbappend() in 422 * that data supplied is treated as the beginning of a new record. 423 * To place a sender's address, optional access rights, and data in a 424 * socket receive buffer, sbappendaddr() should be used. To place 425 * access rights and data in a socket receive buffer, sbappendrights() 426 * should be used. In either case, the new data begins a new record. 427 * Note that unlike sbappend() and sbappendrecord(), these routines check 428 * for the caller that there will be enough space to store the data. 429 * Each fails if there is not enough space, or if it cannot find mbufs 430 * to store additional information in. 431 * 432 * Reliable protocols may use the socket send buffer to hold data 433 * awaiting acknowledgement. Data is normally copied from a socket 434 * send buffer in a protocol with m_copy for output to a peer, 435 * and then removing the data from the socket buffer with sbdrop() 436 * or sbdroprecord() when the data is acknowledged by the peer. 437 */ 438 439#ifdef SOCKBUF_DEBUG 440void 441sblastrecordchk(struct sockbuf *sb, const char *where) 442{ 443 struct mbuf *m = sb->sb_mb; 444 445 while (m && m->m_nextpkt) 446 m = m->m_nextpkt; 447 448 if (m != sb->sb_lastrecord) { 449 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 450 sb->sb_mb, sb->sb_lastrecord, m); 451 printf("packet chain:\n"); 452 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 453 printf("\t%p\n", m); 454 panic("sblastrecordchk from %s", where); 455 } 456} 457 458void 459sblastmbufchk(struct sockbuf *sb, const char *where) 460{ 461 struct mbuf *m = sb->sb_mb; 462 struct mbuf *n; 463 464 while (m && m->m_nextpkt) 465 m = m->m_nextpkt; 466 467 while (m && m->m_next) 468 m = m->m_next; 469 470 if (m != sb->sb_mbtail) { 471 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 472 sb->sb_mb, sb->sb_mbtail, m); 473 printf("packet tree:\n"); 474 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 475 printf("\t"); 476 for (n = m; n != NULL; n = n->m_next) 477 printf("%p ", n); 478 printf("\n"); 479 } 480 panic("sblastmbufchk from %s", where); 481 } 482} 483#endif /* SOCKBUF_DEBUG */ 484 485#define SBLINKRECORD(sb, m0) \ 486do { \ 487 if ((sb)->sb_lastrecord != NULL) \ 488 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 489 else \ 490 (sb)->sb_mb = (m0); \ 491 (sb)->sb_lastrecord = (m0); \ 492} while (/*CONSTCOND*/0) 493 494/* 495 * Append mbuf chain m to the last record in the 496 * socket buffer sb. The additional space associated 497 * the mbuf chain is recorded in sb. Empty mbufs are 498 * discarded and mbufs are compacted where possible. 499 */ 500void 501sbappend(struct sockbuf *sb, struct mbuf *m) 502{ 503 struct mbuf *n; 504 505 if (m == 0) 506 return; 507 508#ifdef MBUFTRACE 509 m_claim(m, sb->sb_mowner); 510#endif 511 512 SBLASTRECORDCHK(sb, "sbappend 1"); 513 514 if ((n = sb->sb_lastrecord) != NULL) { 515 /* 516 * XXX Would like to simply use sb_mbtail here, but 517 * XXX I need to verify that I won't miss an EOR that 518 * XXX way. 519 */ 520 do { 521 if (n->m_flags & M_EOR) { 522 sbappendrecord(sb, m); /* XXXXXX!!!! */ 523 return; 524 } 525 } while (n->m_next && (n = n->m_next)); 526 } else { 527 /* 528 * If this is the first record in the socket buffer, it's 529 * also the last record. 530 */ 531 sb->sb_lastrecord = m; 532 } 533 sbcompress(sb, m, n); 534 SBLASTRECORDCHK(sb, "sbappend 2"); 535} 536 537/* 538 * This version of sbappend() should only be used when the caller 539 * absolutely knows that there will never be more than one record 540 * in the socket buffer, that is, a stream protocol (such as TCP). 541 */ 542void 543sbappendstream(struct sockbuf *sb, struct mbuf *m) 544{ 545 546 KDASSERT(m->m_nextpkt == NULL); 547 KASSERT(sb->sb_mb == sb->sb_lastrecord); 548 549 SBLASTMBUFCHK(sb, __func__); 550 551#ifdef MBUFTRACE 552 m_claim(m, sb->sb_mowner); 553#endif 554 555 sbcompress(sb, m, sb->sb_mbtail); 556 557 sb->sb_lastrecord = sb->sb_mb; 558 SBLASTRECORDCHK(sb, __func__); 559} 560 561#ifdef SOCKBUF_DEBUG 562void 563sbcheck(struct sockbuf *sb) 564{ 565 struct mbuf *m; 566 u_long len, mbcnt; 567 568 len = 0; 569 mbcnt = 0; 570 for (m = sb->sb_mb; m; m = m->m_next) { 571 len += m->m_len; 572 mbcnt += MSIZE; 573 if (m->m_flags & M_EXT) 574 mbcnt += m->m_ext.ext_size; 575 if (m->m_nextpkt) 576 panic("sbcheck nextpkt"); 577 } 578 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 579 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 580 mbcnt, sb->sb_mbcnt); 581 panic("sbcheck"); 582 } 583} 584#endif 585 586/* 587 * As above, except the mbuf chain 588 * begins a new record. 589 */ 590void 591sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 592{ 593 struct mbuf *m; 594 595 if (m0 == 0) 596 return; 597 598#ifdef MBUFTRACE 599 m_claim(m0, sb->sb_mowner); 600#endif 601 /* 602 * Put the first mbuf on the queue. 603 * Note this permits zero length records. 604 */ 605 sballoc(sb, m0); 606 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 607 SBLINKRECORD(sb, m0); 608 m = m0->m_next; 609 m0->m_next = 0; 610 if (m && (m0->m_flags & M_EOR)) { 611 m0->m_flags &= ~M_EOR; 612 m->m_flags |= M_EOR; 613 } 614 sbcompress(sb, m, m0); 615 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 616} 617 618/* 619 * As above except that OOB data 620 * is inserted at the beginning of the sockbuf, 621 * but after any other OOB data. 622 */ 623void 624sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 625{ 626 struct mbuf *m, **mp; 627 628 if (m0 == 0) 629 return; 630 631 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 632 633 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 634 again: 635 switch (m->m_type) { 636 637 case MT_OOBDATA: 638 continue; /* WANT next train */ 639 640 case MT_CONTROL: 641 if ((m = m->m_next) != NULL) 642 goto again; /* inspect THIS train further */ 643 } 644 break; 645 } 646 /* 647 * Put the first mbuf on the queue. 648 * Note this permits zero length records. 649 */ 650 sballoc(sb, m0); 651 m0->m_nextpkt = *mp; 652 if (*mp == NULL) { 653 /* m0 is actually the new tail */ 654 sb->sb_lastrecord = m0; 655 } 656 *mp = m0; 657 m = m0->m_next; 658 m0->m_next = 0; 659 if (m && (m0->m_flags & M_EOR)) { 660 m0->m_flags &= ~M_EOR; 661 m->m_flags |= M_EOR; 662 } 663 sbcompress(sb, m, m0); 664 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 665} 666 667/* 668 * Append address and data, and optionally, control (ancillary) data 669 * to the receive queue of a socket. If present, 670 * m0 must include a packet header with total length. 671 * Returns 0 if no space in sockbuf or insufficient mbufs. 672 */ 673int 674sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, 675 struct mbuf *control) 676{ 677 struct mbuf *m, *n, *nlast; 678 int space, len; 679 680 space = asa->sa_len; 681 682 if (m0 != NULL) { 683 if ((m0->m_flags & M_PKTHDR) == 0) 684 panic("sbappendaddr"); 685 space += m0->m_pkthdr.len; 686#ifdef MBUFTRACE 687 m_claim(m0, sb->sb_mowner); 688#endif 689 } 690 for (n = control; n; n = n->m_next) { 691 space += n->m_len; 692 MCLAIM(n, sb->sb_mowner); 693 if (n->m_next == 0) /* keep pointer to last control buf */ 694 break; 695 } 696 if (space > sbspace(sb)) 697 return (0); 698 MGET(m, M_DONTWAIT, MT_SONAME); 699 if (m == 0) 700 return (0); 701 MCLAIM(m, sb->sb_mowner); 702 /* 703 * XXX avoid 'comparison always true' warning which isn't easily 704 * avoided. 705 */ 706 len = asa->sa_len; 707 if (len > MLEN) { 708 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 709 if ((m->m_flags & M_EXT) == 0) { 710 m_free(m); 711 return (0); 712 } 713 } 714 m->m_len = asa->sa_len; 715 memcpy(mtod(m, caddr_t), (caddr_t)asa, asa->sa_len); 716 if (n) 717 n->m_next = m0; /* concatenate data to control */ 718 else 719 control = m0; 720 m->m_next = control; 721 722 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 723 724 for (n = m; n->m_next != NULL; n = n->m_next) 725 sballoc(sb, n); 726 sballoc(sb, n); 727 nlast = n; 728 SBLINKRECORD(sb, m); 729 730 sb->sb_mbtail = nlast; 731 SBLASTMBUFCHK(sb, "sbappendaddr"); 732 733 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 734 735 return (1); 736} 737 738int 739sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 740{ 741 struct mbuf *m, *mlast, *n; 742 int space; 743 744 space = 0; 745 if (control == 0) 746 panic("sbappendcontrol"); 747 for (m = control; ; m = m->m_next) { 748 space += m->m_len; 749 MCLAIM(m, sb->sb_mowner); 750 if (m->m_next == 0) 751 break; 752 } 753 n = m; /* save pointer to last control buffer */ 754 for (m = m0; m; m = m->m_next) { 755 MCLAIM(m, sb->sb_mowner); 756 space += m->m_len; 757 } 758 if (space > sbspace(sb)) 759 return (0); 760 n->m_next = m0; /* concatenate data to control */ 761 762 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 763 764 for (m = control; m->m_next != NULL; m = m->m_next) 765 sballoc(sb, m); 766 sballoc(sb, m); 767 mlast = m; 768 SBLINKRECORD(sb, control); 769 770 sb->sb_mbtail = mlast; 771 SBLASTMBUFCHK(sb, "sbappendcontrol"); 772 773 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 774 775 return (1); 776} 777 778/* 779 * Compress mbuf chain m into the socket 780 * buffer sb following mbuf n. If n 781 * is null, the buffer is presumed empty. 782 */ 783void 784sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 785{ 786 int eor; 787 struct mbuf *o; 788 789 eor = 0; 790 while (m) { 791 eor |= m->m_flags & M_EOR; 792 if (m->m_len == 0 && 793 (eor == 0 || 794 (((o = m->m_next) || (o = n)) && 795 o->m_type == m->m_type))) { 796 if (sb->sb_lastrecord == m) 797 sb->sb_lastrecord = m->m_next; 798 m = m_free(m); 799 continue; 800 } 801 if (n && (n->m_flags & M_EOR) == 0 && 802 /* M_TRAILINGSPACE() checks buffer writeability */ 803 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 804 m->m_len <= M_TRAILINGSPACE(n) && 805 n->m_type == m->m_type) { 806 memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t), 807 (unsigned)m->m_len); 808 n->m_len += m->m_len; 809 sb->sb_cc += m->m_len; 810 m = m_free(m); 811 continue; 812 } 813 if (n) 814 n->m_next = m; 815 else 816 sb->sb_mb = m; 817 sb->sb_mbtail = m; 818 sballoc(sb, m); 819 n = m; 820 m->m_flags &= ~M_EOR; 821 m = m->m_next; 822 n->m_next = 0; 823 } 824 if (eor) { 825 if (n) 826 n->m_flags |= eor; 827 else 828 printf("semi-panic: sbcompress\n"); 829 } 830 SBLASTMBUFCHK(sb, __func__); 831} 832 833/* 834 * Free all mbufs in a sockbuf. 835 * Check that all resources are reclaimed. 836 */ 837void 838sbflush(struct sockbuf *sb) 839{ 840 841 KASSERT((sb->sb_flags & SB_LOCK) == 0); 842 843 while (sb->sb_mbcnt) 844 sbdrop(sb, (int)sb->sb_cc); 845 846 KASSERT(sb->sb_cc == 0); 847 KASSERT(sb->sb_mb == NULL); 848 KASSERT(sb->sb_mbtail == NULL); 849 KASSERT(sb->sb_lastrecord == NULL); 850} 851 852/* 853 * Drop data from (the front of) a sockbuf. 854 */ 855void 856sbdrop(struct sockbuf *sb, int len) 857{ 858 struct mbuf *m, *mn, *next; 859 860 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 861 while (len > 0) { 862 if (m == 0) { 863 if (next == 0) 864 panic("sbdrop"); 865 m = next; 866 next = m->m_nextpkt; 867 continue; 868 } 869 if (m->m_len > len) { 870 m->m_len -= len; 871 m->m_data += len; 872 sb->sb_cc -= len; 873 break; 874 } 875 len -= m->m_len; 876 sbfree(sb, m); 877 MFREE(m, mn); 878 m = mn; 879 } 880 while (m && m->m_len == 0) { 881 sbfree(sb, m); 882 MFREE(m, mn); 883 m = mn; 884 } 885 if (m) { 886 sb->sb_mb = m; 887 m->m_nextpkt = next; 888 } else 889 sb->sb_mb = next; 890 /* 891 * First part is an inline SB_EMPTY_FIXUP(). Second part 892 * makes sure sb_lastrecord is up-to-date if we dropped 893 * part of the last record. 894 */ 895 m = sb->sb_mb; 896 if (m == NULL) { 897 sb->sb_mbtail = NULL; 898 sb->sb_lastrecord = NULL; 899 } else if (m->m_nextpkt == NULL) 900 sb->sb_lastrecord = m; 901} 902 903/* 904 * Drop a record off the front of a sockbuf 905 * and move the next record to the front. 906 */ 907void 908sbdroprecord(struct sockbuf *sb) 909{ 910 struct mbuf *m, *mn; 911 912 m = sb->sb_mb; 913 if (m) { 914 sb->sb_mb = m->m_nextpkt; 915 do { 916 sbfree(sb, m); 917 MFREE(m, mn); 918 } while ((m = mn) != NULL); 919 } 920 SB_EMPTY_FIXUP(sb); 921} 922 923/* 924 * Create a "control" mbuf containing the specified data 925 * with the specified type for presentation on a socket buffer. 926 */ 927struct mbuf * 928sbcreatecontrol(caddr_t p, int size, int type, int level) 929{ 930 struct cmsghdr *cp; 931 struct mbuf *m; 932 933 if (CMSG_SPACE(size) > MCLBYTES) { 934 printf("sbcreatecontrol: message too large %d\n", size); 935 return NULL; 936 } 937 938 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 939 return ((struct mbuf *) NULL); 940 if (CMSG_SPACE(size) > MLEN) { 941 MCLGET(m, M_DONTWAIT); 942 if ((m->m_flags & M_EXT) == 0) { 943 m_free(m); 944 return NULL; 945 } 946 } 947 cp = mtod(m, struct cmsghdr *); 948 memcpy(CMSG_DATA(cp), p, size); 949 m->m_len = CMSG_SPACE(size); 950 cp->cmsg_len = CMSG_LEN(size); 951 cp->cmsg_level = level; 952 cp->cmsg_type = type; 953 return (m); 954} 955