uipc_socket2.c revision 1.58
1/* $NetBSD: uipc_socket2.c,v 1.58 2003/10/21 22:55:47 thorpej Exp $ */ 2 3/* 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 32 */ 33 34#include <sys/cdefs.h> 35__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.58 2003/10/21 22:55:47 thorpej Exp $"); 36 37#include "opt_mbuftrace.h" 38#include "opt_sb_max.h" 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/proc.h> 43#include <sys/file.h> 44#include <sys/buf.h> 45#include <sys/malloc.h> 46#include <sys/mbuf.h> 47#include <sys/protosw.h> 48#include <sys/poll.h> 49#include <sys/socket.h> 50#include <sys/socketvar.h> 51#include <sys/signalvar.h> 52 53/* 54 * Primitive routines for operating on sockets and socket buffers 55 */ 56 57/* strings for sleep message: */ 58const char netcon[] = "netcon"; 59const char netcls[] = "netcls"; 60const char netio[] = "netio"; 61const char netlck[] = "netlck"; 62 63u_long sb_max = SB_MAX; /* maximum socket buffer size */ 64static u_long sb_max_adj; /* adjusted sb_max */ 65 66/* 67 * Procedures to manipulate state flags of socket 68 * and do appropriate wakeups. Normal sequence from the 69 * active (originating) side is that soisconnecting() is 70 * called during processing of connect() call, 71 * resulting in an eventual call to soisconnected() if/when the 72 * connection is established. When the connection is torn down 73 * soisdisconnecting() is called during processing of disconnect() call, 74 * and soisdisconnected() is called when the connection to the peer 75 * is totally severed. The semantics of these routines are such that 76 * connectionless protocols can call soisconnected() and soisdisconnected() 77 * only, bypassing the in-progress calls when setting up a ``connection'' 78 * takes no time. 79 * 80 * From the passive side, a socket is created with 81 * two queues of sockets: so_q0 for connections in progress 82 * and so_q for connections already made and awaiting user acceptance. 83 * As a protocol is preparing incoming connections, it creates a socket 84 * structure queued on so_q0 by calling sonewconn(). When the connection 85 * is established, soisconnected() is called, and transfers the 86 * socket structure to so_q, making it available to accept(). 87 * 88 * If a socket is closed with sockets on either 89 * so_q0 or so_q, these sockets are dropped. 90 * 91 * If higher level protocols are implemented in 92 * the kernel, the wakeups done here will sometimes 93 * cause software-interrupt process scheduling. 94 */ 95 96void 97soisconnecting(struct socket *so) 98{ 99 100 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 101 so->so_state |= SS_ISCONNECTING; 102} 103 104void 105soisconnected(struct socket *so) 106{ 107 struct socket *head; 108 109 head = so->so_head; 110 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 111 so->so_state |= SS_ISCONNECTED; 112 if (head && soqremque(so, 0)) { 113 soqinsque(head, so, 1); 114 sorwakeup(head); 115 wakeup((caddr_t)&head->so_timeo); 116 } else { 117 wakeup((caddr_t)&so->so_timeo); 118 sorwakeup(so); 119 sowwakeup(so); 120 } 121} 122 123void 124soisdisconnecting(struct socket *so) 125{ 126 127 so->so_state &= ~SS_ISCONNECTING; 128 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 129 wakeup((caddr_t)&so->so_timeo); 130 sowwakeup(so); 131 sorwakeup(so); 132} 133 134void 135soisdisconnected(struct socket *so) 136{ 137 138 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 139 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 140 wakeup((caddr_t)&so->so_timeo); 141 sowwakeup(so); 142 sorwakeup(so); 143} 144 145/* 146 * When an attempt at a new connection is noted on a socket 147 * which accepts connections, sonewconn is called. If the 148 * connection is possible (subject to space constraints, etc.) 149 * then we allocate a new structure, propoerly linked into the 150 * data structure of the original socket, and return this. 151 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 152 * 153 * Currently, sonewconn() is defined as sonewconn1() in socketvar.h 154 * to catch calls that are missing the (new) second parameter. 155 */ 156struct socket * 157sonewconn1(struct socket *head, int connstatus) 158{ 159 struct socket *so; 160 int soqueue; 161 162 soqueue = connstatus ? 1 : 0; 163 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 164 return ((struct socket *)0); 165 so = pool_get(&socket_pool, PR_NOWAIT); 166 if (so == NULL) 167 return (NULL); 168 memset((caddr_t)so, 0, sizeof(*so)); 169 so->so_type = head->so_type; 170 so->so_options = head->so_options &~ SO_ACCEPTCONN; 171 so->so_linger = head->so_linger; 172 so->so_state = head->so_state | SS_NOFDREF; 173 so->so_proto = head->so_proto; 174 so->so_timeo = head->so_timeo; 175 so->so_pgid = head->so_pgid; 176 so->so_send = head->so_send; 177 so->so_receive = head->so_receive; 178 so->so_uid = head->so_uid; 179#ifdef MBUFTRACE 180 so->so_mowner = head->so_mowner; 181 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 182 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 183#endif 184 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 185 soqinsque(head, so, soqueue); 186 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 187 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 188 (struct proc *)0)) { 189 (void) soqremque(so, soqueue); 190 pool_put(&socket_pool, so); 191 return (NULL); 192 } 193 if (connstatus) { 194 sorwakeup(head); 195 wakeup((caddr_t)&head->so_timeo); 196 so->so_state |= connstatus; 197 } 198 return (so); 199} 200 201void 202soqinsque(struct socket *head, struct socket *so, int q) 203{ 204 205#ifdef DIAGNOSTIC 206 if (so->so_onq != NULL) 207 panic("soqinsque"); 208#endif 209 210 so->so_head = head; 211 if (q == 0) { 212 head->so_q0len++; 213 so->so_onq = &head->so_q0; 214 } else { 215 head->so_qlen++; 216 so->so_onq = &head->so_q; 217 } 218 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 219} 220 221int 222soqremque(struct socket *so, int q) 223{ 224 struct socket *head; 225 226 head = so->so_head; 227 if (q == 0) { 228 if (so->so_onq != &head->so_q0) 229 return (0); 230 head->so_q0len--; 231 } else { 232 if (so->so_onq != &head->so_q) 233 return (0); 234 head->so_qlen--; 235 } 236 TAILQ_REMOVE(so->so_onq, so, so_qe); 237 so->so_onq = NULL; 238 so->so_head = NULL; 239 return (1); 240} 241 242/* 243 * Socantsendmore indicates that no more data will be sent on the 244 * socket; it would normally be applied to a socket when the user 245 * informs the system that no more data is to be sent, by the protocol 246 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 247 * will be received, and will normally be applied to the socket by a 248 * protocol when it detects that the peer will send no more data. 249 * Data queued for reading in the socket may yet be read. 250 */ 251 252void 253socantsendmore(struct socket *so) 254{ 255 256 so->so_state |= SS_CANTSENDMORE; 257 sowwakeup(so); 258} 259 260void 261socantrcvmore(struct socket *so) 262{ 263 264 so->so_state |= SS_CANTRCVMORE; 265 sorwakeup(so); 266} 267 268/* 269 * Wait for data to arrive at/drain from a socket buffer. 270 */ 271int 272sbwait(struct sockbuf *sb) 273{ 274 275 sb->sb_flags |= SB_WAIT; 276 return (tsleep((caddr_t)&sb->sb_cc, 277 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio, 278 sb->sb_timeo)); 279} 280 281/* 282 * Lock a sockbuf already known to be locked; 283 * return any error returned from sleep (EINTR). 284 */ 285int 286sb_lock(struct sockbuf *sb) 287{ 288 int error; 289 290 while (sb->sb_flags & SB_LOCK) { 291 sb->sb_flags |= SB_WANT; 292 error = tsleep((caddr_t)&sb->sb_flags, 293 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, 294 netlck, 0); 295 if (error) 296 return (error); 297 } 298 sb->sb_flags |= SB_LOCK; 299 return (0); 300} 301 302/* 303 * Wakeup processes waiting on a socket buffer. 304 * Do asynchronous notification via SIGIO 305 * if the socket buffer has the SB_ASYNC flag set. 306 */ 307void 308sowakeup(struct socket *so, struct sockbuf *sb, int code) 309{ 310 selnotify(&sb->sb_sel, 0); 311 sb->sb_flags &= ~SB_SEL; 312 if (sb->sb_flags & SB_WAIT) { 313 sb->sb_flags &= ~SB_WAIT; 314 wakeup((caddr_t)&sb->sb_cc); 315 } 316 if (sb->sb_flags & SB_ASYNC) { 317 int band; 318 if (code == POLL_IN) 319 band = POLLIN|POLLRDNORM; 320 else 321 band = POLLOUT|POLLWRNORM; 322 fownsignal(so->so_pgid, SIGIO, code, band, so); 323 } 324 if (sb->sb_flags & SB_UPCALL) 325 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 326} 327 328/* 329 * Socket buffer (struct sockbuf) utility routines. 330 * 331 * Each socket contains two socket buffers: one for sending data and 332 * one for receiving data. Each buffer contains a queue of mbufs, 333 * information about the number of mbufs and amount of data in the 334 * queue, and other fields allowing poll() statements and notification 335 * on data availability to be implemented. 336 * 337 * Data stored in a socket buffer is maintained as a list of records. 338 * Each record is a list of mbufs chained together with the m_next 339 * field. Records are chained together with the m_nextpkt field. The upper 340 * level routine soreceive() expects the following conventions to be 341 * observed when placing information in the receive buffer: 342 * 343 * 1. If the protocol requires each message be preceded by the sender's 344 * name, then a record containing that name must be present before 345 * any associated data (mbuf's must be of type MT_SONAME). 346 * 2. If the protocol supports the exchange of ``access rights'' (really 347 * just additional data associated with the message), and there are 348 * ``rights'' to be received, then a record containing this data 349 * should be present (mbuf's must be of type MT_CONTROL). 350 * 3. If a name or rights record exists, then it must be followed by 351 * a data record, perhaps of zero length. 352 * 353 * Before using a new socket structure it is first necessary to reserve 354 * buffer space to the socket, by calling sbreserve(). This should commit 355 * some of the available buffer space in the system buffer pool for the 356 * socket (currently, it does nothing but enforce limits). The space 357 * should be released by calling sbrelease() when the socket is destroyed. 358 */ 359 360int 361sb_max_set(u_long new_sbmax) 362{ 363 int s; 364 365 if (new_sbmax < (16 * 1024)) 366 return (EINVAL); 367 368 s = splsoftnet(); 369 sb_max = new_sbmax; 370 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 371 splx(s); 372 373 return (0); 374} 375 376int 377soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 378{ 379 380 if (sbreserve(&so->so_snd, sndcc) == 0) 381 goto bad; 382 if (sbreserve(&so->so_rcv, rcvcc) == 0) 383 goto bad2; 384 if (so->so_rcv.sb_lowat == 0) 385 so->so_rcv.sb_lowat = 1; 386 if (so->so_snd.sb_lowat == 0) 387 so->so_snd.sb_lowat = MCLBYTES; 388 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 389 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 390 return (0); 391 bad2: 392 sbrelease(&so->so_snd); 393 bad: 394 return (ENOBUFS); 395} 396 397/* 398 * Allot mbufs to a sockbuf. 399 * Attempt to scale mbmax so that mbcnt doesn't become limiting 400 * if buffering efficiency is near the normal case. 401 */ 402int 403sbreserve(struct sockbuf *sb, u_long cc) 404{ 405 406 KDASSERT(sb_max_adj != 0); 407 if (cc == 0 || cc > sb_max_adj) 408 return (0); 409 sb->sb_hiwat = cc; 410 sb->sb_mbmax = min(cc * 2, sb_max); 411 if (sb->sb_lowat > sb->sb_hiwat) 412 sb->sb_lowat = sb->sb_hiwat; 413 return (1); 414} 415 416/* 417 * Free mbufs held by a socket, and reserved mbuf space. 418 */ 419void 420sbrelease(struct sockbuf *sb) 421{ 422 423 sbflush(sb); 424 sb->sb_hiwat = sb->sb_mbmax = 0; 425} 426 427/* 428 * Routines to add and remove 429 * data from an mbuf queue. 430 * 431 * The routines sbappend() or sbappendrecord() are normally called to 432 * append new mbufs to a socket buffer, after checking that adequate 433 * space is available, comparing the function sbspace() with the amount 434 * of data to be added. sbappendrecord() differs from sbappend() in 435 * that data supplied is treated as the beginning of a new record. 436 * To place a sender's address, optional access rights, and data in a 437 * socket receive buffer, sbappendaddr() should be used. To place 438 * access rights and data in a socket receive buffer, sbappendrights() 439 * should be used. In either case, the new data begins a new record. 440 * Note that unlike sbappend() and sbappendrecord(), these routines check 441 * for the caller that there will be enough space to store the data. 442 * Each fails if there is not enough space, or if it cannot find mbufs 443 * to store additional information in. 444 * 445 * Reliable protocols may use the socket send buffer to hold data 446 * awaiting acknowledgement. Data is normally copied from a socket 447 * send buffer in a protocol with m_copy for output to a peer, 448 * and then removing the data from the socket buffer with sbdrop() 449 * or sbdroprecord() when the data is acknowledged by the peer. 450 */ 451 452#ifdef SOCKBUF_DEBUG 453void 454sblastrecordchk(struct sockbuf *sb, const char *where) 455{ 456 struct mbuf *m = sb->sb_mb; 457 458 while (m && m->m_nextpkt) 459 m = m->m_nextpkt; 460 461 if (m != sb->sb_lastrecord) { 462 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 463 sb->sb_mb, sb->sb_lastrecord, m); 464 printf("packet chain:\n"); 465 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 466 printf("\t%p\n", m); 467 panic("sblastrecordchk from %s", where); 468 } 469} 470 471void 472sblastmbufchk(struct sockbuf *sb, const char *where) 473{ 474 struct mbuf *m = sb->sb_mb; 475 struct mbuf *n; 476 477 while (m && m->m_nextpkt) 478 m = m->m_nextpkt; 479 480 while (m && m->m_next) 481 m = m->m_next; 482 483 if (m != sb->sb_mbtail) { 484 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 485 sb->sb_mb, sb->sb_mbtail, m); 486 printf("packet tree:\n"); 487 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 488 printf("\t"); 489 for (n = m; n != NULL; n = n->m_next) 490 printf("%p ", n); 491 printf("\n"); 492 } 493 panic("sblastmbufchk from %s", where); 494 } 495} 496#endif /* SOCKBUF_DEBUG */ 497 498#define SBLINKRECORD(sb, m0) \ 499do { \ 500 if ((sb)->sb_lastrecord != NULL) \ 501 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 502 else \ 503 (sb)->sb_mb = (m0); \ 504 (sb)->sb_lastrecord = (m0); \ 505} while (/*CONSTCOND*/0) 506 507/* 508 * Append mbuf chain m to the last record in the 509 * socket buffer sb. The additional space associated 510 * the mbuf chain is recorded in sb. Empty mbufs are 511 * discarded and mbufs are compacted where possible. 512 */ 513void 514sbappend(struct sockbuf *sb, struct mbuf *m) 515{ 516 struct mbuf *n; 517 518 if (m == 0) 519 return; 520 521#ifdef MBUFTRACE 522 m_claim(m, sb->sb_mowner); 523#endif 524 525 SBLASTRECORDCHK(sb, "sbappend 1"); 526 527 if ((n = sb->sb_lastrecord) != NULL) { 528 /* 529 * XXX Would like to simply use sb_mbtail here, but 530 * XXX I need to verify that I won't miss an EOR that 531 * XXX way. 532 */ 533 do { 534 if (n->m_flags & M_EOR) { 535 sbappendrecord(sb, m); /* XXXXXX!!!! */ 536 return; 537 } 538 } while (n->m_next && (n = n->m_next)); 539 } else { 540 /* 541 * If this is the first record in the socket buffer, it's 542 * also the last record. 543 */ 544 sb->sb_lastrecord = m; 545 } 546 sbcompress(sb, m, n); 547 SBLASTRECORDCHK(sb, "sbappend 2"); 548} 549 550/* 551 * This version of sbappend() should only be used when the caller 552 * absolutely knows that there will never be more than one record 553 * in the socket buffer, that is, a stream protocol (such as TCP). 554 */ 555void 556sbappendstream(struct sockbuf *sb, struct mbuf *m) 557{ 558 559 KDASSERT(m->m_nextpkt == NULL); 560 KASSERT(sb->sb_mb == sb->sb_lastrecord); 561 562 SBLASTMBUFCHK(sb, __func__); 563 564#ifdef MBUFTRACE 565 m_claim(m, sb->sb_mowner); 566#endif 567 568 sbcompress(sb, m, sb->sb_mbtail); 569 570 sb->sb_lastrecord = sb->sb_mb; 571 SBLASTRECORDCHK(sb, __func__); 572} 573 574#ifdef SOCKBUF_DEBUG 575void 576sbcheck(struct sockbuf *sb) 577{ 578 struct mbuf *m; 579 u_long len, mbcnt; 580 581 len = 0; 582 mbcnt = 0; 583 for (m = sb->sb_mb; m; m = m->m_next) { 584 len += m->m_len; 585 mbcnt += MSIZE; 586 if (m->m_flags & M_EXT) 587 mbcnt += m->m_ext.ext_size; 588 if (m->m_nextpkt) 589 panic("sbcheck nextpkt"); 590 } 591 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 592 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 593 mbcnt, sb->sb_mbcnt); 594 panic("sbcheck"); 595 } 596} 597#endif 598 599/* 600 * As above, except the mbuf chain 601 * begins a new record. 602 */ 603void 604sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 605{ 606 struct mbuf *m; 607 608 if (m0 == 0) 609 return; 610 611#ifdef MBUFTRACE 612 m_claim(m0, sb->sb_mowner); 613#endif 614 /* 615 * Put the first mbuf on the queue. 616 * Note this permits zero length records. 617 */ 618 sballoc(sb, m0); 619 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 620 SBLINKRECORD(sb, m0); 621 m = m0->m_next; 622 m0->m_next = 0; 623 if (m && (m0->m_flags & M_EOR)) { 624 m0->m_flags &= ~M_EOR; 625 m->m_flags |= M_EOR; 626 } 627 sbcompress(sb, m, m0); 628 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 629} 630 631/* 632 * As above except that OOB data 633 * is inserted at the beginning of the sockbuf, 634 * but after any other OOB data. 635 */ 636void 637sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 638{ 639 struct mbuf *m, **mp; 640 641 if (m0 == 0) 642 return; 643 644 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 645 646 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 647 again: 648 switch (m->m_type) { 649 650 case MT_OOBDATA: 651 continue; /* WANT next train */ 652 653 case MT_CONTROL: 654 if ((m = m->m_next) != NULL) 655 goto again; /* inspect THIS train further */ 656 } 657 break; 658 } 659 /* 660 * Put the first mbuf on the queue. 661 * Note this permits zero length records. 662 */ 663 sballoc(sb, m0); 664 m0->m_nextpkt = *mp; 665 if (*mp == NULL) { 666 /* m0 is actually the new tail */ 667 sb->sb_lastrecord = m0; 668 } 669 *mp = m0; 670 m = m0->m_next; 671 m0->m_next = 0; 672 if (m && (m0->m_flags & M_EOR)) { 673 m0->m_flags &= ~M_EOR; 674 m->m_flags |= M_EOR; 675 } 676 sbcompress(sb, m, m0); 677 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 678} 679 680/* 681 * Append address and data, and optionally, control (ancillary) data 682 * to the receive queue of a socket. If present, 683 * m0 must include a packet header with total length. 684 * Returns 0 if no space in sockbuf or insufficient mbufs. 685 */ 686int 687sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, 688 struct mbuf *control) 689{ 690 struct mbuf *m, *n, *nlast; 691 int space, len; 692 693 space = asa->sa_len; 694 695 if (m0 != NULL) { 696 if ((m0->m_flags & M_PKTHDR) == 0) 697 panic("sbappendaddr"); 698 space += m0->m_pkthdr.len; 699#ifdef MBUFTRACE 700 m_claim(m0, sb->sb_mowner); 701#endif 702 } 703 for (n = control; n; n = n->m_next) { 704 space += n->m_len; 705 MCLAIM(n, sb->sb_mowner); 706 if (n->m_next == 0) /* keep pointer to last control buf */ 707 break; 708 } 709 if (space > sbspace(sb)) 710 return (0); 711 MGET(m, M_DONTWAIT, MT_SONAME); 712 if (m == 0) 713 return (0); 714 MCLAIM(m, sb->sb_mowner); 715 /* 716 * XXX avoid 'comparison always true' warning which isn't easily 717 * avoided. 718 */ 719 len = asa->sa_len; 720 if (len > MLEN) { 721 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 722 if ((m->m_flags & M_EXT) == 0) { 723 m_free(m); 724 return (0); 725 } 726 } 727 m->m_len = asa->sa_len; 728 memcpy(mtod(m, caddr_t), (caddr_t)asa, asa->sa_len); 729 if (n) 730 n->m_next = m0; /* concatenate data to control */ 731 else 732 control = m0; 733 m->m_next = control; 734 735 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 736 737 for (n = m; n->m_next != NULL; n = n->m_next) 738 sballoc(sb, n); 739 sballoc(sb, n); 740 nlast = n; 741 SBLINKRECORD(sb, m); 742 743 sb->sb_mbtail = nlast; 744 SBLASTMBUFCHK(sb, "sbappendaddr"); 745 746 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 747 748 return (1); 749} 750 751int 752sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 753{ 754 struct mbuf *m, *mlast, *n; 755 int space; 756 757 space = 0; 758 if (control == 0) 759 panic("sbappendcontrol"); 760 for (m = control; ; m = m->m_next) { 761 space += m->m_len; 762 MCLAIM(m, sb->sb_mowner); 763 if (m->m_next == 0) 764 break; 765 } 766 n = m; /* save pointer to last control buffer */ 767 for (m = m0; m; m = m->m_next) { 768 MCLAIM(m, sb->sb_mowner); 769 space += m->m_len; 770 } 771 if (space > sbspace(sb)) 772 return (0); 773 n->m_next = m0; /* concatenate data to control */ 774 775 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 776 777 for (m = control; m->m_next != NULL; m = m->m_next) 778 sballoc(sb, m); 779 sballoc(sb, m); 780 mlast = m; 781 SBLINKRECORD(sb, control); 782 783 sb->sb_mbtail = mlast; 784 SBLASTMBUFCHK(sb, "sbappendcontrol"); 785 786 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 787 788 return (1); 789} 790 791/* 792 * Compress mbuf chain m into the socket 793 * buffer sb following mbuf n. If n 794 * is null, the buffer is presumed empty. 795 */ 796void 797sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 798{ 799 int eor; 800 struct mbuf *o; 801 802 eor = 0; 803 while (m) { 804 eor |= m->m_flags & M_EOR; 805 if (m->m_len == 0 && 806 (eor == 0 || 807 (((o = m->m_next) || (o = n)) && 808 o->m_type == m->m_type))) { 809 if (sb->sb_lastrecord == m) 810 sb->sb_lastrecord = m->m_next; 811 m = m_free(m); 812 continue; 813 } 814 if (n && (n->m_flags & M_EOR) == 0 && 815 /* M_TRAILINGSPACE() checks buffer writeability */ 816 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 817 m->m_len <= M_TRAILINGSPACE(n) && 818 n->m_type == m->m_type) { 819 memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t), 820 (unsigned)m->m_len); 821 n->m_len += m->m_len; 822 sb->sb_cc += m->m_len; 823 m = m_free(m); 824 continue; 825 } 826 if (n) 827 n->m_next = m; 828 else 829 sb->sb_mb = m; 830 sb->sb_mbtail = m; 831 sballoc(sb, m); 832 n = m; 833 m->m_flags &= ~M_EOR; 834 m = m->m_next; 835 n->m_next = 0; 836 } 837 if (eor) { 838 if (n) 839 n->m_flags |= eor; 840 else 841 printf("semi-panic: sbcompress\n"); 842 } 843 SBLASTMBUFCHK(sb, __func__); 844} 845 846/* 847 * Free all mbufs in a sockbuf. 848 * Check that all resources are reclaimed. 849 */ 850void 851sbflush(struct sockbuf *sb) 852{ 853 854 KASSERT((sb->sb_flags & SB_LOCK) == 0); 855 856 while (sb->sb_mbcnt) 857 sbdrop(sb, (int)sb->sb_cc); 858 859 KASSERT(sb->sb_cc == 0); 860 KASSERT(sb->sb_mb == NULL); 861 KASSERT(sb->sb_mbtail == NULL); 862 KASSERT(sb->sb_lastrecord == NULL); 863} 864 865/* 866 * Drop data from (the front of) a sockbuf. 867 */ 868void 869sbdrop(struct sockbuf *sb, int len) 870{ 871 struct mbuf *m, *mn, *next; 872 873 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 874 while (len > 0) { 875 if (m == 0) { 876 if (next == 0) 877 panic("sbdrop"); 878 m = next; 879 next = m->m_nextpkt; 880 continue; 881 } 882 if (m->m_len > len) { 883 m->m_len -= len; 884 m->m_data += len; 885 sb->sb_cc -= len; 886 break; 887 } 888 len -= m->m_len; 889 sbfree(sb, m); 890 MFREE(m, mn); 891 m = mn; 892 } 893 while (m && m->m_len == 0) { 894 sbfree(sb, m); 895 MFREE(m, mn); 896 m = mn; 897 } 898 if (m) { 899 sb->sb_mb = m; 900 m->m_nextpkt = next; 901 } else 902 sb->sb_mb = next; 903 /* 904 * First part is an inline SB_EMPTY_FIXUP(). Second part 905 * makes sure sb_lastrecord is up-to-date if we dropped 906 * part of the last record. 907 */ 908 m = sb->sb_mb; 909 if (m == NULL) { 910 sb->sb_mbtail = NULL; 911 sb->sb_lastrecord = NULL; 912 } else if (m->m_nextpkt == NULL) 913 sb->sb_lastrecord = m; 914} 915 916/* 917 * Drop a record off the front of a sockbuf 918 * and move the next record to the front. 919 */ 920void 921sbdroprecord(struct sockbuf *sb) 922{ 923 struct mbuf *m, *mn; 924 925 m = sb->sb_mb; 926 if (m) { 927 sb->sb_mb = m->m_nextpkt; 928 do { 929 sbfree(sb, m); 930 MFREE(m, mn); 931 } while ((m = mn) != NULL); 932 } 933 SB_EMPTY_FIXUP(sb); 934} 935 936/* 937 * Create a "control" mbuf containing the specified data 938 * with the specified type for presentation on a socket buffer. 939 */ 940struct mbuf * 941sbcreatecontrol(caddr_t p, int size, int type, int level) 942{ 943 struct cmsghdr *cp; 944 struct mbuf *m; 945 946 if (CMSG_SPACE(size) > MCLBYTES) { 947 printf("sbcreatecontrol: message too large %d\n", size); 948 return NULL; 949 } 950 951 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 952 return ((struct mbuf *) NULL); 953 if (CMSG_SPACE(size) > MLEN) { 954 MCLGET(m, M_DONTWAIT); 955 if ((m->m_flags & M_EXT) == 0) { 956 m_free(m); 957 return NULL; 958 } 959 } 960 cp = mtod(m, struct cmsghdr *); 961 memcpy(CMSG_DATA(cp), p, size); 962 m->m_len = CMSG_SPACE(size); 963 cp->cmsg_len = CMSG_LEN(size); 964 cp->cmsg_level = level; 965 cp->cmsg_type = type; 966 return (m); 967} 968