uipc_socket2.c revision 1.55
1/* $NetBSD: uipc_socket2.c,v 1.55 2003/09/06 22:03:10 christos Exp $ */ 2 3/* 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 32 */ 33 34#include <sys/cdefs.h> 35__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.55 2003/09/06 22:03:10 christos Exp $"); 36 37#include "opt_mbuftrace.h" 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/proc.h> 42#include <sys/file.h> 43#include <sys/buf.h> 44#include <sys/malloc.h> 45#include <sys/mbuf.h> 46#include <sys/protosw.h> 47#include <sys/poll.h> 48#include <sys/socket.h> 49#include <sys/socketvar.h> 50#include <sys/signalvar.h> 51 52/* 53 * Primitive routines for operating on sockets and socket buffers 54 */ 55 56/* strings for sleep message: */ 57const char netcon[] = "netcon"; 58const char netcls[] = "netcls"; 59const char netio[] = "netio"; 60const char netlck[] = "netlck"; 61 62/* 63 * Procedures to manipulate state flags of socket 64 * and do appropriate wakeups. Normal sequence from the 65 * active (originating) side is that soisconnecting() is 66 * called during processing of connect() call, 67 * resulting in an eventual call to soisconnected() if/when the 68 * connection is established. When the connection is torn down 69 * soisdisconnecting() is called during processing of disconnect() call, 70 * and soisdisconnected() is called when the connection to the peer 71 * is totally severed. The semantics of these routines are such that 72 * connectionless protocols can call soisconnected() and soisdisconnected() 73 * only, bypassing the in-progress calls when setting up a ``connection'' 74 * takes no time. 75 * 76 * From the passive side, a socket is created with 77 * two queues of sockets: so_q0 for connections in progress 78 * and so_q for connections already made and awaiting user acceptance. 79 * As a protocol is preparing incoming connections, it creates a socket 80 * structure queued on so_q0 by calling sonewconn(). When the connection 81 * is established, soisconnected() is called, and transfers the 82 * socket structure to so_q, making it available to accept(). 83 * 84 * If a socket is closed with sockets on either 85 * so_q0 or so_q, these sockets are dropped. 86 * 87 * If higher level protocols are implemented in 88 * the kernel, the wakeups done here will sometimes 89 * cause software-interrupt process scheduling. 90 */ 91 92void 93soisconnecting(struct socket *so) 94{ 95 96 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 97 so->so_state |= SS_ISCONNECTING; 98} 99 100void 101soisconnected(struct socket *so) 102{ 103 struct socket *head; 104 105 head = so->so_head; 106 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 107 so->so_state |= SS_ISCONNECTED; 108 if (head && soqremque(so, 0)) { 109 soqinsque(head, so, 1); 110 sorwakeup(head); 111 wakeup((caddr_t)&head->so_timeo); 112 } else { 113 wakeup((caddr_t)&so->so_timeo); 114 sorwakeup(so); 115 sowwakeup(so); 116 } 117} 118 119void 120soisdisconnecting(struct socket *so) 121{ 122 123 so->so_state &= ~SS_ISCONNECTING; 124 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 125 wakeup((caddr_t)&so->so_timeo); 126 sowwakeup(so); 127 sorwakeup(so); 128} 129 130void 131soisdisconnected(struct socket *so) 132{ 133 134 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 135 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 136 wakeup((caddr_t)&so->so_timeo); 137 sowwakeup(so); 138 sorwakeup(so); 139} 140 141/* 142 * When an attempt at a new connection is noted on a socket 143 * which accepts connections, sonewconn is called. If the 144 * connection is possible (subject to space constraints, etc.) 145 * then we allocate a new structure, propoerly linked into the 146 * data structure of the original socket, and return this. 147 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 148 * 149 * Currently, sonewconn() is defined as sonewconn1() in socketvar.h 150 * to catch calls that are missing the (new) second parameter. 151 */ 152struct socket * 153sonewconn1(struct socket *head, int connstatus) 154{ 155 struct socket *so; 156 int soqueue; 157 158 soqueue = connstatus ? 1 : 0; 159 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 160 return ((struct socket *)0); 161 so = pool_get(&socket_pool, PR_NOWAIT); 162 if (so == NULL) 163 return (NULL); 164 memset((caddr_t)so, 0, sizeof(*so)); 165 so->so_type = head->so_type; 166 so->so_options = head->so_options &~ SO_ACCEPTCONN; 167 so->so_linger = head->so_linger; 168 so->so_state = head->so_state | SS_NOFDREF; 169 so->so_proto = head->so_proto; 170 so->so_timeo = head->so_timeo; 171 so->so_pgid = head->so_pgid; 172 so->so_send = head->so_send; 173 so->so_receive = head->so_receive; 174 so->so_uid = head->so_uid; 175#ifdef MBUFTRACE 176 so->so_mowner = head->so_mowner; 177 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 178 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 179#endif 180 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 181 soqinsque(head, so, soqueue); 182 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 183 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 184 (struct proc *)0)) { 185 (void) soqremque(so, soqueue); 186 pool_put(&socket_pool, so); 187 return (NULL); 188 } 189 if (connstatus) { 190 sorwakeup(head); 191 wakeup((caddr_t)&head->so_timeo); 192 so->so_state |= connstatus; 193 } 194 return (so); 195} 196 197void 198soqinsque(struct socket *head, struct socket *so, int q) 199{ 200 201#ifdef DIAGNOSTIC 202 if (so->so_onq != NULL) 203 panic("soqinsque"); 204#endif 205 206 so->so_head = head; 207 if (q == 0) { 208 head->so_q0len++; 209 so->so_onq = &head->so_q0; 210 } else { 211 head->so_qlen++; 212 so->so_onq = &head->so_q; 213 } 214 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 215} 216 217int 218soqremque(struct socket *so, int q) 219{ 220 struct socket *head; 221 222 head = so->so_head; 223 if (q == 0) { 224 if (so->so_onq != &head->so_q0) 225 return (0); 226 head->so_q0len--; 227 } else { 228 if (so->so_onq != &head->so_q) 229 return (0); 230 head->so_qlen--; 231 } 232 TAILQ_REMOVE(so->so_onq, so, so_qe); 233 so->so_onq = NULL; 234 so->so_head = NULL; 235 return (1); 236} 237 238/* 239 * Socantsendmore indicates that no more data will be sent on the 240 * socket; it would normally be applied to a socket when the user 241 * informs the system that no more data is to be sent, by the protocol 242 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 243 * will be received, and will normally be applied to the socket by a 244 * protocol when it detects that the peer will send no more data. 245 * Data queued for reading in the socket may yet be read. 246 */ 247 248void 249socantsendmore(struct socket *so) 250{ 251 252 so->so_state |= SS_CANTSENDMORE; 253 sowwakeup(so); 254} 255 256void 257socantrcvmore(struct socket *so) 258{ 259 260 so->so_state |= SS_CANTRCVMORE; 261 sorwakeup(so); 262} 263 264/* 265 * Wait for data to arrive at/drain from a socket buffer. 266 */ 267int 268sbwait(struct sockbuf *sb) 269{ 270 271 sb->sb_flags |= SB_WAIT; 272 return (tsleep((caddr_t)&sb->sb_cc, 273 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio, 274 sb->sb_timeo)); 275} 276 277/* 278 * Lock a sockbuf already known to be locked; 279 * return any error returned from sleep (EINTR). 280 */ 281int 282sb_lock(struct sockbuf *sb) 283{ 284 int error; 285 286 while (sb->sb_flags & SB_LOCK) { 287 sb->sb_flags |= SB_WANT; 288 error = tsleep((caddr_t)&sb->sb_flags, 289 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, 290 netlck, 0); 291 if (error) 292 return (error); 293 } 294 sb->sb_flags |= SB_LOCK; 295 return (0); 296} 297 298/* 299 * Wakeup processes waiting on a socket buffer. 300 * Do asynchronous notification via SIGIO 301 * if the socket buffer has the SB_ASYNC flag set. 302 */ 303void 304sowakeup(struct socket *so, struct sockbuf *sb, int code) 305{ 306 struct proc *p; 307 308 selnotify(&sb->sb_sel, 0); 309 sb->sb_flags &= ~SB_SEL; 310 if (sb->sb_flags & SB_WAIT) { 311 sb->sb_flags &= ~SB_WAIT; 312 wakeup((caddr_t)&sb->sb_cc); 313 } 314 if (sb->sb_flags & SB_ASYNC) { 315 ksiginfo_t ksi; 316 memset(&ksi, 0, sizeof(ksi)); 317 ksi.ksi_signo = SIGIO; 318 ksi.ksi_code = code; 319 if (code == POLL_IN) { 320 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) 321 ksi.ksi_band = (POLLPRI | POLLRDBAND); 322 else 323 ksi.ksi_band = (POLLIN | POLLRDNORM); 324 } else { 325 if (so->so_oobmark) 326 ksi.ksi_band = (POLLPRI | POLLWRBAND); 327 else 328 ksi.ksi_band = (POLLOUT | POLLWRNORM); 329 } 330 if (so->so_pgid < 0) 331 kgsignal(-so->so_pgid, &ksi, so); 332 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 333 kpsignal(p, &ksi, so); 334 } 335 if (sb->sb_flags & SB_UPCALL) 336 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 337} 338 339/* 340 * Socket buffer (struct sockbuf) utility routines. 341 * 342 * Each socket contains two socket buffers: one for sending data and 343 * one for receiving data. Each buffer contains a queue of mbufs, 344 * information about the number of mbufs and amount of data in the 345 * queue, and other fields allowing poll() statements and notification 346 * on data availability to be implemented. 347 * 348 * Data stored in a socket buffer is maintained as a list of records. 349 * Each record is a list of mbufs chained together with the m_next 350 * field. Records are chained together with the m_nextpkt field. The upper 351 * level routine soreceive() expects the following conventions to be 352 * observed when placing information in the receive buffer: 353 * 354 * 1. If the protocol requires each message be preceded by the sender's 355 * name, then a record containing that name must be present before 356 * any associated data (mbuf's must be of type MT_SONAME). 357 * 2. If the protocol supports the exchange of ``access rights'' (really 358 * just additional data associated with the message), and there are 359 * ``rights'' to be received, then a record containing this data 360 * should be present (mbuf's must be of type MT_CONTROL). 361 * 3. If a name or rights record exists, then it must be followed by 362 * a data record, perhaps of zero length. 363 * 364 * Before using a new socket structure it is first necessary to reserve 365 * buffer space to the socket, by calling sbreserve(). This should commit 366 * some of the available buffer space in the system buffer pool for the 367 * socket (currently, it does nothing but enforce limits). The space 368 * should be released by calling sbrelease() when the socket is destroyed. 369 */ 370 371int 372soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 373{ 374 375 if (sbreserve(&so->so_snd, sndcc) == 0) 376 goto bad; 377 if (sbreserve(&so->so_rcv, rcvcc) == 0) 378 goto bad2; 379 if (so->so_rcv.sb_lowat == 0) 380 so->so_rcv.sb_lowat = 1; 381 if (so->so_snd.sb_lowat == 0) 382 so->so_snd.sb_lowat = MCLBYTES; 383 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 384 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 385 return (0); 386 bad2: 387 sbrelease(&so->so_snd); 388 bad: 389 return (ENOBUFS); 390} 391 392/* 393 * Allot mbufs to a sockbuf. 394 * Attempt to scale mbmax so that mbcnt doesn't become limiting 395 * if buffering efficiency is near the normal case. 396 */ 397int 398sbreserve(struct sockbuf *sb, u_long cc) 399{ 400 401 if (cc == 0 || 402 (u_quad_t) cc > (u_quad_t) sb_max * MCLBYTES / (MSIZE + MCLBYTES)) 403 return (0); 404 sb->sb_hiwat = cc; 405 sb->sb_mbmax = min(cc * 2, sb_max); 406 if (sb->sb_lowat > sb->sb_hiwat) 407 sb->sb_lowat = sb->sb_hiwat; 408 return (1); 409} 410 411/* 412 * Free mbufs held by a socket, and reserved mbuf space. 413 */ 414void 415sbrelease(struct sockbuf *sb) 416{ 417 418 sbflush(sb); 419 sb->sb_hiwat = sb->sb_mbmax = 0; 420} 421 422/* 423 * Routines to add and remove 424 * data from an mbuf queue. 425 * 426 * The routines sbappend() or sbappendrecord() are normally called to 427 * append new mbufs to a socket buffer, after checking that adequate 428 * space is available, comparing the function sbspace() with the amount 429 * of data to be added. sbappendrecord() differs from sbappend() in 430 * that data supplied is treated as the beginning of a new record. 431 * To place a sender's address, optional access rights, and data in a 432 * socket receive buffer, sbappendaddr() should be used. To place 433 * access rights and data in a socket receive buffer, sbappendrights() 434 * should be used. In either case, the new data begins a new record. 435 * Note that unlike sbappend() and sbappendrecord(), these routines check 436 * for the caller that there will be enough space to store the data. 437 * Each fails if there is not enough space, or if it cannot find mbufs 438 * to store additional information in. 439 * 440 * Reliable protocols may use the socket send buffer to hold data 441 * awaiting acknowledgement. Data is normally copied from a socket 442 * send buffer in a protocol with m_copy for output to a peer, 443 * and then removing the data from the socket buffer with sbdrop() 444 * or sbdroprecord() when the data is acknowledged by the peer. 445 */ 446 447#ifdef SOCKBUF_DEBUG 448void 449sblastrecordchk(struct sockbuf *sb, const char *where) 450{ 451 struct mbuf *m = sb->sb_mb; 452 453 while (m && m->m_nextpkt) 454 m = m->m_nextpkt; 455 456 if (m != sb->sb_lastrecord) { 457 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 458 sb->sb_mb, sb->sb_lastrecord, m); 459 printf("packet chain:\n"); 460 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 461 printf("\t%p\n", m); 462 panic("sblastrecordchk from %s", where); 463 } 464} 465 466void 467sblastmbufchk(struct sockbuf *sb, const char *where) 468{ 469 struct mbuf *m = sb->sb_mb; 470 struct mbuf *n; 471 472 while (m && m->m_nextpkt) 473 m = m->m_nextpkt; 474 475 while (m && m->m_next) 476 m = m->m_next; 477 478 if (m != sb->sb_mbtail) { 479 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 480 sb->sb_mb, sb->sb_mbtail, m); 481 printf("packet tree:\n"); 482 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 483 printf("\t"); 484 for (n = m; n != NULL; n = n->m_next) 485 printf("%p ", n); 486 printf("\n"); 487 } 488 panic("sblastmbufchk from %s", where); 489 } 490} 491#endif /* SOCKBUF_DEBUG */ 492 493#define SBLINKRECORD(sb, m0) \ 494do { \ 495 if ((sb)->sb_lastrecord != NULL) \ 496 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 497 else \ 498 (sb)->sb_mb = (m0); \ 499 (sb)->sb_lastrecord = (m0); \ 500} while (/*CONSTCOND*/0) 501 502/* 503 * Append mbuf chain m to the last record in the 504 * socket buffer sb. The additional space associated 505 * the mbuf chain is recorded in sb. Empty mbufs are 506 * discarded and mbufs are compacted where possible. 507 */ 508void 509sbappend(struct sockbuf *sb, struct mbuf *m) 510{ 511 struct mbuf *n; 512 513 if (m == 0) 514 return; 515 516#ifdef MBUFTRACE 517 m_claim(m, sb->sb_mowner); 518#endif 519 520 SBLASTRECORDCHK(sb, "sbappend 1"); 521 522 if ((n = sb->sb_lastrecord) != NULL) { 523 /* 524 * XXX Would like to simply use sb_mbtail here, but 525 * XXX I need to verify that I won't miss an EOR that 526 * XXX way. 527 */ 528 do { 529 if (n->m_flags & M_EOR) { 530 sbappendrecord(sb, m); /* XXXXXX!!!! */ 531 return; 532 } 533 } while (n->m_next && (n = n->m_next)); 534 } else { 535 /* 536 * If this is the first record in the socket buffer, it's 537 * also the last record. 538 */ 539 sb->sb_lastrecord = m; 540 } 541 sbcompress(sb, m, n); 542 SBLASTRECORDCHK(sb, "sbappend 2"); 543} 544 545/* 546 * This version of sbappend() should only be used when the caller 547 * absolutely knows that there will never be more than one record 548 * in the socket buffer, that is, a stream protocol (such as TCP). 549 */ 550void 551sbappendstream(struct sockbuf *sb, struct mbuf *m) 552{ 553 554 KDASSERT(m->m_nextpkt == NULL); 555 KASSERT(sb->sb_mb == sb->sb_lastrecord); 556 557 SBLASTMBUFCHK(sb, __func__); 558 559#ifdef MBUFTRACE 560 m_claim(m, sb->sb_mowner); 561#endif 562 563 sbcompress(sb, m, sb->sb_mbtail); 564 565 sb->sb_lastrecord = sb->sb_mb; 566 SBLASTRECORDCHK(sb, __func__); 567} 568 569#ifdef SOCKBUF_DEBUG 570void 571sbcheck(struct sockbuf *sb) 572{ 573 struct mbuf *m; 574 u_long len, mbcnt; 575 576 len = 0; 577 mbcnt = 0; 578 for (m = sb->sb_mb; m; m = m->m_next) { 579 len += m->m_len; 580 mbcnt += MSIZE; 581 if (m->m_flags & M_EXT) 582 mbcnt += m->m_ext.ext_size; 583 if (m->m_nextpkt) 584 panic("sbcheck nextpkt"); 585 } 586 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 587 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 588 mbcnt, sb->sb_mbcnt); 589 panic("sbcheck"); 590 } 591} 592#endif 593 594/* 595 * As above, except the mbuf chain 596 * begins a new record. 597 */ 598void 599sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 600{ 601 struct mbuf *m; 602 603 if (m0 == 0) 604 return; 605 606#ifdef MBUFTRACE 607 m_claim(m0, sb->sb_mowner); 608#endif 609 /* 610 * Put the first mbuf on the queue. 611 * Note this permits zero length records. 612 */ 613 sballoc(sb, m0); 614 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 615 SBLINKRECORD(sb, m0); 616 m = m0->m_next; 617 m0->m_next = 0; 618 if (m && (m0->m_flags & M_EOR)) { 619 m0->m_flags &= ~M_EOR; 620 m->m_flags |= M_EOR; 621 } 622 sbcompress(sb, m, m0); 623 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 624} 625 626/* 627 * As above except that OOB data 628 * is inserted at the beginning of the sockbuf, 629 * but after any other OOB data. 630 */ 631void 632sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 633{ 634 struct mbuf *m, **mp; 635 636 if (m0 == 0) 637 return; 638 639 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 640 641 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 642 again: 643 switch (m->m_type) { 644 645 case MT_OOBDATA: 646 continue; /* WANT next train */ 647 648 case MT_CONTROL: 649 if ((m = m->m_next) != NULL) 650 goto again; /* inspect THIS train further */ 651 } 652 break; 653 } 654 /* 655 * Put the first mbuf on the queue. 656 * Note this permits zero length records. 657 */ 658 sballoc(sb, m0); 659 m0->m_nextpkt = *mp; 660 if (*mp == NULL) { 661 /* m0 is actually the new tail */ 662 sb->sb_lastrecord = m0; 663 } 664 *mp = m0; 665 m = m0->m_next; 666 m0->m_next = 0; 667 if (m && (m0->m_flags & M_EOR)) { 668 m0->m_flags &= ~M_EOR; 669 m->m_flags |= M_EOR; 670 } 671 sbcompress(sb, m, m0); 672 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 673} 674 675/* 676 * Append address and data, and optionally, control (ancillary) data 677 * to the receive queue of a socket. If present, 678 * m0 must include a packet header with total length. 679 * Returns 0 if no space in sockbuf or insufficient mbufs. 680 */ 681int 682sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, 683 struct mbuf *control) 684{ 685 struct mbuf *m, *n, *nlast; 686 int space, len; 687 688 space = asa->sa_len; 689 690 if (m0 != NULL) { 691 if ((m0->m_flags & M_PKTHDR) == 0) 692 panic("sbappendaddr"); 693 space += m0->m_pkthdr.len; 694#ifdef MBUFTRACE 695 m_claim(m0, sb->sb_mowner); 696#endif 697 } 698 for (n = control; n; n = n->m_next) { 699 space += n->m_len; 700 MCLAIM(n, sb->sb_mowner); 701 if (n->m_next == 0) /* keep pointer to last control buf */ 702 break; 703 } 704 if (space > sbspace(sb)) 705 return (0); 706 MGET(m, M_DONTWAIT, MT_SONAME); 707 if (m == 0) 708 return (0); 709 MCLAIM(m, sb->sb_mowner); 710 /* 711 * XXX avoid 'comparison always true' warning which isn't easily 712 * avoided. 713 */ 714 len = asa->sa_len; 715 if (len > MLEN) { 716 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 717 if ((m->m_flags & M_EXT) == 0) { 718 m_free(m); 719 return (0); 720 } 721 } 722 m->m_len = asa->sa_len; 723 memcpy(mtod(m, caddr_t), (caddr_t)asa, asa->sa_len); 724 if (n) 725 n->m_next = m0; /* concatenate data to control */ 726 else 727 control = m0; 728 m->m_next = control; 729 730 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 731 732 for (n = m; n->m_next != NULL; n = n->m_next) 733 sballoc(sb, n); 734 sballoc(sb, n); 735 nlast = n; 736 SBLINKRECORD(sb, m); 737 738 sb->sb_mbtail = nlast; 739 SBLASTMBUFCHK(sb, "sbappendaddr"); 740 741 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 742 743 return (1); 744} 745 746int 747sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 748{ 749 struct mbuf *m, *mlast, *n; 750 int space; 751 752 space = 0; 753 if (control == 0) 754 panic("sbappendcontrol"); 755 for (m = control; ; m = m->m_next) { 756 space += m->m_len; 757 MCLAIM(m, sb->sb_mowner); 758 if (m->m_next == 0) 759 break; 760 } 761 n = m; /* save pointer to last control buffer */ 762 for (m = m0; m; m = m->m_next) { 763 MCLAIM(m, sb->sb_mowner); 764 space += m->m_len; 765 } 766 if (space > sbspace(sb)) 767 return (0); 768 n->m_next = m0; /* concatenate data to control */ 769 770 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 771 772 for (m = control; m->m_next != NULL; m = m->m_next) 773 sballoc(sb, m); 774 sballoc(sb, m); 775 mlast = m; 776 SBLINKRECORD(sb, control); 777 778 sb->sb_mbtail = mlast; 779 SBLASTMBUFCHK(sb, "sbappendcontrol"); 780 781 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 782 783 return (1); 784} 785 786/* 787 * Compress mbuf chain m into the socket 788 * buffer sb following mbuf n. If n 789 * is null, the buffer is presumed empty. 790 */ 791void 792sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 793{ 794 int eor; 795 struct mbuf *o; 796 797 eor = 0; 798 while (m) { 799 eor |= m->m_flags & M_EOR; 800 if (m->m_len == 0 && 801 (eor == 0 || 802 (((o = m->m_next) || (o = n)) && 803 o->m_type == m->m_type))) { 804 if (sb->sb_lastrecord == m) 805 sb->sb_lastrecord = m->m_next; 806 m = m_free(m); 807 continue; 808 } 809 if (n && (n->m_flags & M_EOR) == 0 && 810 /* M_TRAILINGSPACE() checks buffer writeability */ 811 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 812 m->m_len <= M_TRAILINGSPACE(n) && 813 n->m_type == m->m_type) { 814 memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t), 815 (unsigned)m->m_len); 816 n->m_len += m->m_len; 817 sb->sb_cc += m->m_len; 818 m = m_free(m); 819 continue; 820 } 821 if (n) 822 n->m_next = m; 823 else 824 sb->sb_mb = m; 825 sb->sb_mbtail = m; 826 sballoc(sb, m); 827 n = m; 828 m->m_flags &= ~M_EOR; 829 m = m->m_next; 830 n->m_next = 0; 831 } 832 if (eor) { 833 if (n) 834 n->m_flags |= eor; 835 else 836 printf("semi-panic: sbcompress\n"); 837 } 838 SBLASTMBUFCHK(sb, __func__); 839} 840 841/* 842 * Free all mbufs in a sockbuf. 843 * Check that all resources are reclaimed. 844 */ 845void 846sbflush(struct sockbuf *sb) 847{ 848 849 KASSERT((sb->sb_flags & SB_LOCK) == 0); 850 851 while (sb->sb_mbcnt) 852 sbdrop(sb, (int)sb->sb_cc); 853 854 KASSERT(sb->sb_cc == 0); 855 KASSERT(sb->sb_mb == NULL); 856 KASSERT(sb->sb_mbtail == NULL); 857 KASSERT(sb->sb_lastrecord == NULL); 858} 859 860/* 861 * Drop data from (the front of) a sockbuf. 862 */ 863void 864sbdrop(struct sockbuf *sb, int len) 865{ 866 struct mbuf *m, *mn, *next; 867 868 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 869 while (len > 0) { 870 if (m == 0) { 871 if (next == 0) 872 panic("sbdrop"); 873 m = next; 874 next = m->m_nextpkt; 875 continue; 876 } 877 if (m->m_len > len) { 878 m->m_len -= len; 879 m->m_data += len; 880 sb->sb_cc -= len; 881 break; 882 } 883 len -= m->m_len; 884 sbfree(sb, m); 885 MFREE(m, mn); 886 m = mn; 887 } 888 while (m && m->m_len == 0) { 889 sbfree(sb, m); 890 MFREE(m, mn); 891 m = mn; 892 } 893 if (m) { 894 sb->sb_mb = m; 895 m->m_nextpkt = next; 896 } else 897 sb->sb_mb = next; 898 /* 899 * First part is an inline SB_EMPTY_FIXUP(). Second part 900 * makes sure sb_lastrecord is up-to-date if we dropped 901 * part of the last record. 902 */ 903 m = sb->sb_mb; 904 if (m == NULL) { 905 sb->sb_mbtail = NULL; 906 sb->sb_lastrecord = NULL; 907 } else if (m->m_nextpkt == NULL) 908 sb->sb_lastrecord = m; 909} 910 911/* 912 * Drop a record off the front of a sockbuf 913 * and move the next record to the front. 914 */ 915void 916sbdroprecord(struct sockbuf *sb) 917{ 918 struct mbuf *m, *mn; 919 920 m = sb->sb_mb; 921 if (m) { 922 sb->sb_mb = m->m_nextpkt; 923 do { 924 sbfree(sb, m); 925 MFREE(m, mn); 926 } while ((m = mn) != NULL); 927 } 928 SB_EMPTY_FIXUP(sb); 929} 930 931/* 932 * Create a "control" mbuf containing the specified data 933 * with the specified type for presentation on a socket buffer. 934 */ 935struct mbuf * 936sbcreatecontrol(caddr_t p, int size, int type, int level) 937{ 938 struct cmsghdr *cp; 939 struct mbuf *m; 940 941 if (CMSG_SPACE(size) > MCLBYTES) { 942 printf("sbcreatecontrol: message too large %d\n", size); 943 return NULL; 944 } 945 946 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 947 return ((struct mbuf *) NULL); 948 if (CMSG_SPACE(size) > MLEN) { 949 MCLGET(m, M_DONTWAIT); 950 if ((m->m_flags & M_EXT) == 0) { 951 m_free(m); 952 return NULL; 953 } 954 } 955 cp = mtod(m, struct cmsghdr *); 956 memcpy(CMSG_DATA(cp), p, size); 957 m->m_len = CMSG_SPACE(size); 958 cp->cmsg_len = CMSG_LEN(size); 959 cp->cmsg_level = level; 960 cp->cmsg_type = type; 961 return (m); 962} 963