uipc_socket2.c revision 1.60
1/* $NetBSD: uipc_socket2.c,v 1.60 2004/04/18 16:38:42 matt Exp $ */ 2 3/* 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 32 */ 33 34#include <sys/cdefs.h> 35__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.60 2004/04/18 16:38:42 matt Exp $"); 36 37#include "opt_mbuftrace.h" 38#include "opt_sb_max.h" 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/proc.h> 43#include <sys/file.h> 44#include <sys/buf.h> 45#include <sys/malloc.h> 46#include <sys/mbuf.h> 47#include <sys/protosw.h> 48#include <sys/poll.h> 49#include <sys/socket.h> 50#include <sys/socketvar.h> 51#include <sys/signalvar.h> 52 53/* 54 * Primitive routines for operating on sockets and socket buffers 55 */ 56 57/* strings for sleep message: */ 58const char netcon[] = "netcon"; 59const char netcls[] = "netcls"; 60const char netio[] = "netio"; 61const char netlck[] = "netlck"; 62 63u_long sb_max = SB_MAX; /* maximum socket buffer size */ 64static u_long sb_max_adj; /* adjusted sb_max */ 65 66/* 67 * Procedures to manipulate state flags of socket 68 * and do appropriate wakeups. Normal sequence from the 69 * active (originating) side is that soisconnecting() is 70 * called during processing of connect() call, 71 * resulting in an eventual call to soisconnected() if/when the 72 * connection is established. When the connection is torn down 73 * soisdisconnecting() is called during processing of disconnect() call, 74 * and soisdisconnected() is called when the connection to the peer 75 * is totally severed. The semantics of these routines are such that 76 * connectionless protocols can call soisconnected() and soisdisconnected() 77 * only, bypassing the in-progress calls when setting up a ``connection'' 78 * takes no time. 79 * 80 * From the passive side, a socket is created with 81 * two queues of sockets: so_q0 for connections in progress 82 * and so_q for connections already made and awaiting user acceptance. 83 * As a protocol is preparing incoming connections, it creates a socket 84 * structure queued on so_q0 by calling sonewconn(). When the connection 85 * is established, soisconnected() is called, and transfers the 86 * socket structure to so_q, making it available to accept(). 87 * 88 * If a socket is closed with sockets on either 89 * so_q0 or so_q, these sockets are dropped. 90 * 91 * If higher level protocols are implemented in 92 * the kernel, the wakeups done here will sometimes 93 * cause software-interrupt process scheduling. 94 */ 95 96void 97soisconnecting(struct socket *so) 98{ 99 100 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 101 so->so_state |= SS_ISCONNECTING; 102} 103 104void 105soisconnected(struct socket *so) 106{ 107 struct socket *head; 108 109 head = so->so_head; 110 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 111 so->so_state |= SS_ISCONNECTED; 112 if (head && soqremque(so, 0)) { 113 soqinsque(head, so, 1); 114 sorwakeup(head); 115 wakeup((caddr_t)&head->so_timeo); 116 } else { 117 wakeup((caddr_t)&so->so_timeo); 118 sorwakeup(so); 119 sowwakeup(so); 120 } 121} 122 123void 124soisdisconnecting(struct socket *so) 125{ 126 127 so->so_state &= ~SS_ISCONNECTING; 128 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 129 wakeup((caddr_t)&so->so_timeo); 130 sowwakeup(so); 131 sorwakeup(so); 132} 133 134void 135soisdisconnected(struct socket *so) 136{ 137 138 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 139 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 140 wakeup((caddr_t)&so->so_timeo); 141 sowwakeup(so); 142 sorwakeup(so); 143} 144 145/* 146 * When an attempt at a new connection is noted on a socket 147 * which accepts connections, sonewconn is called. If the 148 * connection is possible (subject to space constraints, etc.) 149 * then we allocate a new structure, propoerly linked into the 150 * data structure of the original socket, and return this. 151 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 152 * 153 * Currently, sonewconn() is defined as sonewconn1() in socketvar.h 154 * to catch calls that are missing the (new) second parameter. 155 */ 156struct socket * 157sonewconn1(struct socket *head, int connstatus) 158{ 159 struct socket *so; 160 int soqueue; 161 162 soqueue = connstatus ? 1 : 0; 163 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 164 return ((struct socket *)0); 165 so = pool_get(&socket_pool, PR_NOWAIT); 166 if (so == NULL) 167 return (NULL); 168 memset((caddr_t)so, 0, sizeof(*so)); 169 so->so_type = head->so_type; 170 so->so_options = head->so_options &~ SO_ACCEPTCONN; 171 so->so_linger = head->so_linger; 172 so->so_state = head->so_state | SS_NOFDREF; 173 so->so_proto = head->so_proto; 174 so->so_timeo = head->so_timeo; 175 so->so_pgid = head->so_pgid; 176 so->so_send = head->so_send; 177 so->so_receive = head->so_receive; 178 so->so_uid = head->so_uid; 179#ifdef MBUFTRACE 180 so->so_mowner = head->so_mowner; 181 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 182 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 183#endif 184 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 185 soqinsque(head, so, soqueue); 186 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 187 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 188 (struct proc *)0)) { 189 (void) soqremque(so, soqueue); 190 pool_put(&socket_pool, so); 191 return (NULL); 192 } 193 if (connstatus) { 194 sorwakeup(head); 195 wakeup((caddr_t)&head->so_timeo); 196 so->so_state |= connstatus; 197 } 198 return (so); 199} 200 201void 202soqinsque(struct socket *head, struct socket *so, int q) 203{ 204 205#ifdef DIAGNOSTIC 206 if (so->so_onq != NULL) 207 panic("soqinsque"); 208#endif 209 210 so->so_head = head; 211 if (q == 0) { 212 head->so_q0len++; 213 so->so_onq = &head->so_q0; 214 } else { 215 head->so_qlen++; 216 so->so_onq = &head->so_q; 217 } 218 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 219} 220 221int 222soqremque(struct socket *so, int q) 223{ 224 struct socket *head; 225 226 head = so->so_head; 227 if (q == 0) { 228 if (so->so_onq != &head->so_q0) 229 return (0); 230 head->so_q0len--; 231 } else { 232 if (so->so_onq != &head->so_q) 233 return (0); 234 head->so_qlen--; 235 } 236 TAILQ_REMOVE(so->so_onq, so, so_qe); 237 so->so_onq = NULL; 238 so->so_head = NULL; 239 return (1); 240} 241 242/* 243 * Socantsendmore indicates that no more data will be sent on the 244 * socket; it would normally be applied to a socket when the user 245 * informs the system that no more data is to be sent, by the protocol 246 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 247 * will be received, and will normally be applied to the socket by a 248 * protocol when it detects that the peer will send no more data. 249 * Data queued for reading in the socket may yet be read. 250 */ 251 252void 253socantsendmore(struct socket *so) 254{ 255 256 so->so_state |= SS_CANTSENDMORE; 257 sowwakeup(so); 258} 259 260void 261socantrcvmore(struct socket *so) 262{ 263 264 so->so_state |= SS_CANTRCVMORE; 265 sorwakeup(so); 266} 267 268/* 269 * Wait for data to arrive at/drain from a socket buffer. 270 */ 271int 272sbwait(struct sockbuf *sb) 273{ 274 275 sb->sb_flags |= SB_WAIT; 276 return (tsleep((caddr_t)&sb->sb_cc, 277 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio, 278 sb->sb_timeo)); 279} 280 281/* 282 * Lock a sockbuf already known to be locked; 283 * return any error returned from sleep (EINTR). 284 */ 285int 286sb_lock(struct sockbuf *sb) 287{ 288 int error; 289 290 while (sb->sb_flags & SB_LOCK) { 291 sb->sb_flags |= SB_WANT; 292 error = tsleep((caddr_t)&sb->sb_flags, 293 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, 294 netlck, 0); 295 if (error) 296 return (error); 297 } 298 sb->sb_flags |= SB_LOCK; 299 return (0); 300} 301 302/* 303 * Wakeup processes waiting on a socket buffer. 304 * Do asynchronous notification via SIGIO 305 * if the socket buffer has the SB_ASYNC flag set. 306 */ 307void 308sowakeup(struct socket *so, struct sockbuf *sb, int code) 309{ 310 selnotify(&sb->sb_sel, 0); 311 sb->sb_flags &= ~SB_SEL; 312 if (sb->sb_flags & SB_WAIT) { 313 sb->sb_flags &= ~SB_WAIT; 314 wakeup((caddr_t)&sb->sb_cc); 315 } 316 if (sb->sb_flags & SB_ASYNC) { 317 int band; 318 if (code == POLL_IN) 319 band = POLLIN|POLLRDNORM; 320 else 321 band = POLLOUT|POLLWRNORM; 322 fownsignal(so->so_pgid, SIGIO, code, band, so); 323 } 324 if (sb->sb_flags & SB_UPCALL) 325 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 326} 327 328/* 329 * Socket buffer (struct sockbuf) utility routines. 330 * 331 * Each socket contains two socket buffers: one for sending data and 332 * one for receiving data. Each buffer contains a queue of mbufs, 333 * information about the number of mbufs and amount of data in the 334 * queue, and other fields allowing poll() statements and notification 335 * on data availability to be implemented. 336 * 337 * Data stored in a socket buffer is maintained as a list of records. 338 * Each record is a list of mbufs chained together with the m_next 339 * field. Records are chained together with the m_nextpkt field. The upper 340 * level routine soreceive() expects the following conventions to be 341 * observed when placing information in the receive buffer: 342 * 343 * 1. If the protocol requires each message be preceded by the sender's 344 * name, then a record containing that name must be present before 345 * any associated data (mbuf's must be of type MT_SONAME). 346 * 2. If the protocol supports the exchange of ``access rights'' (really 347 * just additional data associated with the message), and there are 348 * ``rights'' to be received, then a record containing this data 349 * should be present (mbuf's must be of type MT_CONTROL). 350 * 3. If a name or rights record exists, then it must be followed by 351 * a data record, perhaps of zero length. 352 * 353 * Before using a new socket structure it is first necessary to reserve 354 * buffer space to the socket, by calling sbreserve(). This should commit 355 * some of the available buffer space in the system buffer pool for the 356 * socket (currently, it does nothing but enforce limits). The space 357 * should be released by calling sbrelease() when the socket is destroyed. 358 */ 359 360int 361sb_max_set(u_long new_sbmax) 362{ 363 int s; 364 365 if (new_sbmax < (16 * 1024)) 366 return (EINVAL); 367 368 s = splsoftnet(); 369 sb_max = new_sbmax; 370 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 371 splx(s); 372 373 return (0); 374} 375 376int 377soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 378{ 379 380 if (sbreserve(&so->so_snd, sndcc, so) == 0) 381 goto bad; 382 if (sbreserve(&so->so_rcv, rcvcc, so) == 0) 383 goto bad2; 384 if (so->so_rcv.sb_lowat == 0) 385 so->so_rcv.sb_lowat = 1; 386 if (so->so_snd.sb_lowat == 0) 387 so->so_snd.sb_lowat = MCLBYTES; 388 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 389 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 390 return (0); 391 bad2: 392 sbrelease(&so->so_snd, so); 393 bad: 394 return (ENOBUFS); 395} 396 397/* 398 * Allot mbufs to a sockbuf. 399 * Attempt to scale mbmax so that mbcnt doesn't become limiting 400 * if buffering efficiency is near the normal case. 401 */ 402int 403sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) 404{ 405 struct proc *p = curproc; /* XXX */ 406 407 KDASSERT(sb_max_adj != 0); 408 if (cc == 0 || cc > sb_max_adj) 409 return (0); 410 if (so) { 411 rlim_t maxcc; 412 if (p && p->p_ucred->cr_uid == so->so_uid) 413 maxcc = p->p_rlimit[RLIMIT_SBSIZE].rlim_cur; 414 else 415 maxcc = RLIM_INFINITY; 416 if (!chgsbsize(so->so_uid, &sb->sb_hiwat, cc, maxcc)) 417 return 0; 418 } 419 sb->sb_mbmax = min(cc * 2, sb_max); 420 if (sb->sb_lowat > sb->sb_hiwat) 421 sb->sb_lowat = sb->sb_hiwat; 422 return (1); 423} 424 425/* 426 * Free mbufs held by a socket, and reserved mbuf space. 427 */ 428void 429sbrelease(struct sockbuf *sb, struct socket *so) 430{ 431 432 sbflush(sb); 433 (void)chgsbsize(so->so_uid, &sb->sb_hiwat, 0, 434 RLIM_INFINITY); 435 sb->sb_mbmax = 0; 436} 437 438/* 439 * Routines to add and remove 440 * data from an mbuf queue. 441 * 442 * The routines sbappend() or sbappendrecord() are normally called to 443 * append new mbufs to a socket buffer, after checking that adequate 444 * space is available, comparing the function sbspace() with the amount 445 * of data to be added. sbappendrecord() differs from sbappend() in 446 * that data supplied is treated as the beginning of a new record. 447 * To place a sender's address, optional access rights, and data in a 448 * socket receive buffer, sbappendaddr() should be used. To place 449 * access rights and data in a socket receive buffer, sbappendrights() 450 * should be used. In either case, the new data begins a new record. 451 * Note that unlike sbappend() and sbappendrecord(), these routines check 452 * for the caller that there will be enough space to store the data. 453 * Each fails if there is not enough space, or if it cannot find mbufs 454 * to store additional information in. 455 * 456 * Reliable protocols may use the socket send buffer to hold data 457 * awaiting acknowledgement. Data is normally copied from a socket 458 * send buffer in a protocol with m_copy for output to a peer, 459 * and then removing the data from the socket buffer with sbdrop() 460 * or sbdroprecord() when the data is acknowledged by the peer. 461 */ 462 463#ifdef SOCKBUF_DEBUG 464void 465sblastrecordchk(struct sockbuf *sb, const char *where) 466{ 467 struct mbuf *m = sb->sb_mb; 468 469 while (m && m->m_nextpkt) 470 m = m->m_nextpkt; 471 472 if (m != sb->sb_lastrecord) { 473 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 474 sb->sb_mb, sb->sb_lastrecord, m); 475 printf("packet chain:\n"); 476 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 477 printf("\t%p\n", m); 478 panic("sblastrecordchk from %s", where); 479 } 480} 481 482void 483sblastmbufchk(struct sockbuf *sb, const char *where) 484{ 485 struct mbuf *m = sb->sb_mb; 486 struct mbuf *n; 487 488 while (m && m->m_nextpkt) 489 m = m->m_nextpkt; 490 491 while (m && m->m_next) 492 m = m->m_next; 493 494 if (m != sb->sb_mbtail) { 495 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 496 sb->sb_mb, sb->sb_mbtail, m); 497 printf("packet tree:\n"); 498 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 499 printf("\t"); 500 for (n = m; n != NULL; n = n->m_next) 501 printf("%p ", n); 502 printf("\n"); 503 } 504 panic("sblastmbufchk from %s", where); 505 } 506} 507#endif /* SOCKBUF_DEBUG */ 508 509#define SBLINKRECORD(sb, m0) \ 510do { \ 511 if ((sb)->sb_lastrecord != NULL) \ 512 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 513 else \ 514 (sb)->sb_mb = (m0); \ 515 (sb)->sb_lastrecord = (m0); \ 516} while (/*CONSTCOND*/0) 517 518/* 519 * Append mbuf chain m to the last record in the 520 * socket buffer sb. The additional space associated 521 * the mbuf chain is recorded in sb. Empty mbufs are 522 * discarded and mbufs are compacted where possible. 523 */ 524void 525sbappend(struct sockbuf *sb, struct mbuf *m) 526{ 527 struct mbuf *n; 528 529 if (m == 0) 530 return; 531 532#ifdef MBUFTRACE 533 m_claim(m, sb->sb_mowner); 534#endif 535 536 SBLASTRECORDCHK(sb, "sbappend 1"); 537 538 if ((n = sb->sb_lastrecord) != NULL) { 539 /* 540 * XXX Would like to simply use sb_mbtail here, but 541 * XXX I need to verify that I won't miss an EOR that 542 * XXX way. 543 */ 544 do { 545 if (n->m_flags & M_EOR) { 546 sbappendrecord(sb, m); /* XXXXXX!!!! */ 547 return; 548 } 549 } while (n->m_next && (n = n->m_next)); 550 } else { 551 /* 552 * If this is the first record in the socket buffer, it's 553 * also the last record. 554 */ 555 sb->sb_lastrecord = m; 556 } 557 sbcompress(sb, m, n); 558 SBLASTRECORDCHK(sb, "sbappend 2"); 559} 560 561/* 562 * This version of sbappend() should only be used when the caller 563 * absolutely knows that there will never be more than one record 564 * in the socket buffer, that is, a stream protocol (such as TCP). 565 */ 566void 567sbappendstream(struct sockbuf *sb, struct mbuf *m) 568{ 569 570 KDASSERT(m->m_nextpkt == NULL); 571 KASSERT(sb->sb_mb == sb->sb_lastrecord); 572 573 SBLASTMBUFCHK(sb, __func__); 574 575#ifdef MBUFTRACE 576 m_claim(m, sb->sb_mowner); 577#endif 578 579 sbcompress(sb, m, sb->sb_mbtail); 580 581 sb->sb_lastrecord = sb->sb_mb; 582 SBLASTRECORDCHK(sb, __func__); 583} 584 585#ifdef SOCKBUF_DEBUG 586void 587sbcheck(struct sockbuf *sb) 588{ 589 struct mbuf *m; 590 u_long len, mbcnt; 591 592 len = 0; 593 mbcnt = 0; 594 for (m = sb->sb_mb; m; m = m->m_next) { 595 len += m->m_len; 596 mbcnt += MSIZE; 597 if (m->m_flags & M_EXT) 598 mbcnt += m->m_ext.ext_size; 599 if (m->m_nextpkt) 600 panic("sbcheck nextpkt"); 601 } 602 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 603 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 604 mbcnt, sb->sb_mbcnt); 605 panic("sbcheck"); 606 } 607} 608#endif 609 610/* 611 * As above, except the mbuf chain 612 * begins a new record. 613 */ 614void 615sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 616{ 617 struct mbuf *m; 618 619 if (m0 == 0) 620 return; 621 622#ifdef MBUFTRACE 623 m_claim(m0, sb->sb_mowner); 624#endif 625 /* 626 * Put the first mbuf on the queue. 627 * Note this permits zero length records. 628 */ 629 sballoc(sb, m0); 630 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 631 SBLINKRECORD(sb, m0); 632 m = m0->m_next; 633 m0->m_next = 0; 634 if (m && (m0->m_flags & M_EOR)) { 635 m0->m_flags &= ~M_EOR; 636 m->m_flags |= M_EOR; 637 } 638 sbcompress(sb, m, m0); 639 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 640} 641 642/* 643 * As above except that OOB data 644 * is inserted at the beginning of the sockbuf, 645 * but after any other OOB data. 646 */ 647void 648sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 649{ 650 struct mbuf *m, **mp; 651 652 if (m0 == 0) 653 return; 654 655 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 656 657 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 658 again: 659 switch (m->m_type) { 660 661 case MT_OOBDATA: 662 continue; /* WANT next train */ 663 664 case MT_CONTROL: 665 if ((m = m->m_next) != NULL) 666 goto again; /* inspect THIS train further */ 667 } 668 break; 669 } 670 /* 671 * Put the first mbuf on the queue. 672 * Note this permits zero length records. 673 */ 674 sballoc(sb, m0); 675 m0->m_nextpkt = *mp; 676 if (*mp == NULL) { 677 /* m0 is actually the new tail */ 678 sb->sb_lastrecord = m0; 679 } 680 *mp = m0; 681 m = m0->m_next; 682 m0->m_next = 0; 683 if (m && (m0->m_flags & M_EOR)) { 684 m0->m_flags &= ~M_EOR; 685 m->m_flags |= M_EOR; 686 } 687 sbcompress(sb, m, m0); 688 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 689} 690 691/* 692 * Append address and data, and optionally, control (ancillary) data 693 * to the receive queue of a socket. If present, 694 * m0 must include a packet header with total length. 695 * Returns 0 if no space in sockbuf or insufficient mbufs. 696 */ 697int 698sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, 699 struct mbuf *control) 700{ 701 struct mbuf *m, *n, *nlast; 702 int space, len; 703 704 space = asa->sa_len; 705 706 if (m0 != NULL) { 707 if ((m0->m_flags & M_PKTHDR) == 0) 708 panic("sbappendaddr"); 709 space += m0->m_pkthdr.len; 710#ifdef MBUFTRACE 711 m_claim(m0, sb->sb_mowner); 712#endif 713 } 714 for (n = control; n; n = n->m_next) { 715 space += n->m_len; 716 MCLAIM(n, sb->sb_mowner); 717 if (n->m_next == 0) /* keep pointer to last control buf */ 718 break; 719 } 720 if (space > sbspace(sb)) 721 return (0); 722 MGET(m, M_DONTWAIT, MT_SONAME); 723 if (m == 0) 724 return (0); 725 MCLAIM(m, sb->sb_mowner); 726 /* 727 * XXX avoid 'comparison always true' warning which isn't easily 728 * avoided. 729 */ 730 len = asa->sa_len; 731 if (len > MLEN) { 732 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 733 if ((m->m_flags & M_EXT) == 0) { 734 m_free(m); 735 return (0); 736 } 737 } 738 m->m_len = asa->sa_len; 739 memcpy(mtod(m, caddr_t), (caddr_t)asa, asa->sa_len); 740 if (n) 741 n->m_next = m0; /* concatenate data to control */ 742 else 743 control = m0; 744 m->m_next = control; 745 746 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 747 748 for (n = m; n->m_next != NULL; n = n->m_next) 749 sballoc(sb, n); 750 sballoc(sb, n); 751 nlast = n; 752 SBLINKRECORD(sb, m); 753 754 sb->sb_mbtail = nlast; 755 SBLASTMBUFCHK(sb, "sbappendaddr"); 756 757 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 758 759 return (1); 760} 761 762int 763sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 764{ 765 struct mbuf *m, *mlast, *n; 766 int space; 767 768 space = 0; 769 if (control == 0) 770 panic("sbappendcontrol"); 771 for (m = control; ; m = m->m_next) { 772 space += m->m_len; 773 MCLAIM(m, sb->sb_mowner); 774 if (m->m_next == 0) 775 break; 776 } 777 n = m; /* save pointer to last control buffer */ 778 for (m = m0; m; m = m->m_next) { 779 MCLAIM(m, sb->sb_mowner); 780 space += m->m_len; 781 } 782 if (space > sbspace(sb)) 783 return (0); 784 n->m_next = m0; /* concatenate data to control */ 785 786 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 787 788 for (m = control; m->m_next != NULL; m = m->m_next) 789 sballoc(sb, m); 790 sballoc(sb, m); 791 mlast = m; 792 SBLINKRECORD(sb, control); 793 794 sb->sb_mbtail = mlast; 795 SBLASTMBUFCHK(sb, "sbappendcontrol"); 796 797 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 798 799 return (1); 800} 801 802/* 803 * Compress mbuf chain m into the socket 804 * buffer sb following mbuf n. If n 805 * is null, the buffer is presumed empty. 806 */ 807void 808sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 809{ 810 int eor; 811 struct mbuf *o; 812 813 eor = 0; 814 while (m) { 815 eor |= m->m_flags & M_EOR; 816 if (m->m_len == 0 && 817 (eor == 0 || 818 (((o = m->m_next) || (o = n)) && 819 o->m_type == m->m_type))) { 820 if (sb->sb_lastrecord == m) 821 sb->sb_lastrecord = m->m_next; 822 m = m_free(m); 823 continue; 824 } 825 if (n && (n->m_flags & M_EOR) == 0 && 826 /* M_TRAILINGSPACE() checks buffer writeability */ 827 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 828 m->m_len <= M_TRAILINGSPACE(n) && 829 n->m_type == m->m_type) { 830 memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t), 831 (unsigned)m->m_len); 832 n->m_len += m->m_len; 833 sb->sb_cc += m->m_len; 834 m = m_free(m); 835 continue; 836 } 837 if (n) 838 n->m_next = m; 839 else 840 sb->sb_mb = m; 841 sb->sb_mbtail = m; 842 sballoc(sb, m); 843 n = m; 844 m->m_flags &= ~M_EOR; 845 m = m->m_next; 846 n->m_next = 0; 847 } 848 if (eor) { 849 if (n) 850 n->m_flags |= eor; 851 else 852 printf("semi-panic: sbcompress\n"); 853 } 854 SBLASTMBUFCHK(sb, __func__); 855} 856 857/* 858 * Free all mbufs in a sockbuf. 859 * Check that all resources are reclaimed. 860 */ 861void 862sbflush(struct sockbuf *sb) 863{ 864 865 KASSERT((sb->sb_flags & SB_LOCK) == 0); 866 867 while (sb->sb_mbcnt) 868 sbdrop(sb, (int)sb->sb_cc); 869 870 KASSERT(sb->sb_cc == 0); 871 KASSERT(sb->sb_mb == NULL); 872 KASSERT(sb->sb_mbtail == NULL); 873 KASSERT(sb->sb_lastrecord == NULL); 874} 875 876/* 877 * Drop data from (the front of) a sockbuf. 878 */ 879void 880sbdrop(struct sockbuf *sb, int len) 881{ 882 struct mbuf *m, *mn, *next; 883 884 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 885 while (len > 0) { 886 if (m == 0) { 887 if (next == 0) 888 panic("sbdrop"); 889 m = next; 890 next = m->m_nextpkt; 891 continue; 892 } 893 if (m->m_len > len) { 894 m->m_len -= len; 895 m->m_data += len; 896 sb->sb_cc -= len; 897 break; 898 } 899 len -= m->m_len; 900 sbfree(sb, m); 901 MFREE(m, mn); 902 m = mn; 903 } 904 while (m && m->m_len == 0) { 905 sbfree(sb, m); 906 MFREE(m, mn); 907 m = mn; 908 } 909 if (m) { 910 sb->sb_mb = m; 911 m->m_nextpkt = next; 912 } else 913 sb->sb_mb = next; 914 /* 915 * First part is an inline SB_EMPTY_FIXUP(). Second part 916 * makes sure sb_lastrecord is up-to-date if we dropped 917 * part of the last record. 918 */ 919 m = sb->sb_mb; 920 if (m == NULL) { 921 sb->sb_mbtail = NULL; 922 sb->sb_lastrecord = NULL; 923 } else if (m->m_nextpkt == NULL) 924 sb->sb_lastrecord = m; 925} 926 927/* 928 * Drop a record off the front of a sockbuf 929 * and move the next record to the front. 930 */ 931void 932sbdroprecord(struct sockbuf *sb) 933{ 934 struct mbuf *m, *mn; 935 936 m = sb->sb_mb; 937 if (m) { 938 sb->sb_mb = m->m_nextpkt; 939 do { 940 sbfree(sb, m); 941 MFREE(m, mn); 942 } while ((m = mn) != NULL); 943 } 944 SB_EMPTY_FIXUP(sb); 945} 946 947/* 948 * Create a "control" mbuf containing the specified data 949 * with the specified type for presentation on a socket buffer. 950 */ 951struct mbuf * 952sbcreatecontrol(caddr_t p, int size, int type, int level) 953{ 954 struct cmsghdr *cp; 955 struct mbuf *m; 956 957 if (CMSG_SPACE(size) > MCLBYTES) { 958 printf("sbcreatecontrol: message too large %d\n", size); 959 return NULL; 960 } 961 962 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 963 return ((struct mbuf *) NULL); 964 if (CMSG_SPACE(size) > MLEN) { 965 MCLGET(m, M_DONTWAIT); 966 if ((m->m_flags & M_EXT) == 0) { 967 m_free(m); 968 return NULL; 969 } 970 } 971 cp = mtod(m, struct cmsghdr *); 972 memcpy(CMSG_DATA(cp), p, size); 973 m->m_len = CMSG_SPACE(size); 974 cp->cmsg_len = CMSG_LEN(size); 975 cp->cmsg_level = level; 976 cp->cmsg_type = type; 977 return (m); 978} 979