34 35#include "opt_mac.h" 36#include "opt_param.h" 37 38#include <sys/param.h> 39#include <sys/aio.h> /* for aio_swake proto */ 40#include <sys/domain.h> 41#include <sys/event.h> 42#include <sys/file.h> /* for maxfiles */ 43#include <sys/kernel.h> 44#include <sys/lock.h> 45#include <sys/mac.h> 46#include <sys/malloc.h> 47#include <sys/mbuf.h> 48#include <sys/mutex.h> 49#include <sys/proc.h> 50#include <sys/protosw.h> 51#include <sys/resourcevar.h> 52#include <sys/signalvar.h> 53#include <sys/socket.h> 54#include <sys/socketvar.h> 55#include <sys/stat.h> 56#include <sys/sysctl.h> 57#include <sys/systm.h> 58 59int maxsockets; 60 61void (*aio_swake)(struct socket *, struct sockbuf *); 62 63/* 64 * Primitive routines for operating on sockets and socket buffers 65 */ 66 67u_long sb_max = SB_MAX; 68static u_long sb_max_adj = 69 SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ 70 71static u_long sb_efficiency = 8; /* parameter for sbreserve() */ 72 73#ifdef REGRESSION 74static int regression_sonewconn_earlytest = 1; 75SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW, 76 ®ression_sonewconn_earlytest, 0, "Perform early sonewconn limit test"); 77#endif 78 79/* 80 * Procedures to manipulate state flags of socket 81 * and do appropriate wakeups. Normal sequence from the 82 * active (originating) side is that soisconnecting() is 83 * called during processing of connect() call, 84 * resulting in an eventual call to soisconnected() if/when the 85 * connection is established. When the connection is torn down 86 * soisdisconnecting() is called during processing of disconnect() call, 87 * and soisdisconnected() is called when the connection to the peer 88 * is totally severed. The semantics of these routines are such that 89 * connectionless protocols can call soisconnected() and soisdisconnected() 90 * only, bypassing the in-progress calls when setting up a ``connection'' 91 * takes no time. 92 * 93 * From the passive side, a socket is created with 94 * two queues of sockets: so_incomp for connections in progress 95 * and so_comp for connections already made and awaiting user acceptance. 96 * As a protocol is preparing incoming connections, it creates a socket 97 * structure queued on so_incomp by calling sonewconn(). When the connection 98 * is established, soisconnected() is called, and transfers the 99 * socket structure to so_comp, making it available to accept(). 100 * 101 * If a socket is closed with sockets on either 102 * so_incomp or so_comp, these sockets are dropped. 103 * 104 * If higher level protocols are implemented in 105 * the kernel, the wakeups done here will sometimes 106 * cause software-interrupt process scheduling. 107 */ 108 109void 110soisconnecting(so) 111 register struct socket *so; 112{ 113 114 SOCK_LOCK(so); 115 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 116 so->so_state |= SS_ISCONNECTING; 117 SOCK_UNLOCK(so); 118} 119 120void 121soisconnected(so) 122 struct socket *so; 123{ 124 struct socket *head; 125 126 ACCEPT_LOCK(); 127 SOCK_LOCK(so); 128 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 129 so->so_state |= SS_ISCONNECTED; 130 head = so->so_head; 131 if (head != NULL && (so->so_qstate & SQ_INCOMP)) { 132 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 133 SOCK_UNLOCK(so); 134 TAILQ_REMOVE(&head->so_incomp, so, so_list); 135 head->so_incqlen--; 136 so->so_qstate &= ~SQ_INCOMP; 137 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); 138 head->so_qlen++; 139 so->so_qstate |= SQ_COMP; 140 ACCEPT_UNLOCK(); 141 sorwakeup(head); 142 wakeup_one(&head->so_timeo); 143 } else { 144 ACCEPT_UNLOCK(); 145 so->so_upcall = 146 head->so_accf->so_accept_filter->accf_callback; 147 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 148 so->so_rcv.sb_flags |= SB_UPCALL; 149 so->so_options &= ~SO_ACCEPTFILTER; 150 SOCK_UNLOCK(so); 151 so->so_upcall(so, so->so_upcallarg, M_DONTWAIT); 152 } 153 return; 154 } 155 SOCK_UNLOCK(so); 156 ACCEPT_UNLOCK(); 157 wakeup(&so->so_timeo); 158 sorwakeup(so); 159 sowwakeup(so); 160} 161 162void 163soisdisconnecting(so) 164 register struct socket *so; 165{ 166 167 /* 168 * XXXRW: This code assumes that SOCK_LOCK(so) and 169 * SOCKBUF_LOCK(&so->so_rcv) are the same. 170 */ 171 SOCKBUF_LOCK(&so->so_rcv); 172 so->so_state &= ~SS_ISCONNECTING; 173 so->so_state |= SS_ISDISCONNECTING; 174 so->so_rcv.sb_state |= SBS_CANTRCVMORE; 175 sorwakeup_locked(so); 176 SOCKBUF_LOCK(&so->so_snd); 177 so->so_snd.sb_state |= SBS_CANTSENDMORE; 178 sowwakeup_locked(so); 179 wakeup(&so->so_timeo); 180} 181 182void 183soisdisconnected(so) 184 register struct socket *so; 185{ 186 187 /* 188 * XXXRW: This code assumes that SOCK_LOCK(so) and 189 * SOCKBUF_LOCK(&so->so_rcv) are the same. 190 */ 191 SOCKBUF_LOCK(&so->so_rcv); 192 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 193 so->so_state |= SS_ISDISCONNECTED; 194 so->so_rcv.sb_state |= SBS_CANTRCVMORE; 195 sorwakeup_locked(so); 196 SOCKBUF_LOCK(&so->so_snd); 197 so->so_snd.sb_state |= SBS_CANTSENDMORE; 198 sbdrop_locked(&so->so_snd, so->so_snd.sb_cc); 199 sowwakeup_locked(so); 200 wakeup(&so->so_timeo); 201} 202 203/* 204 * When an attempt at a new connection is noted on a socket 205 * which accepts connections, sonewconn is called. If the 206 * connection is possible (subject to space constraints, etc.) 207 * then we allocate a new structure, propoerly linked into the 208 * data structure of the original socket, and return this. 209 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 210 * 211 * note: the ref count on the socket is 0 on return 212 */ 213struct socket * 214sonewconn(head, connstatus) 215 register struct socket *head; 216 int connstatus; 217{ 218 register struct socket *so; 219 int over; 220 221 ACCEPT_LOCK(); 222 over = (head->so_qlen > 3 * head->so_qlimit / 2); 223 ACCEPT_UNLOCK(); 224#ifdef REGRESSION 225 if (regression_sonewconn_earlytest && over) 226#else 227 if (over) 228#endif 229 return (NULL); 230 so = soalloc(M_NOWAIT); 231 if (so == NULL) 232 return (NULL); 233 if ((head->so_options & SO_ACCEPTFILTER) != 0) 234 connstatus = 0; 235 so->so_head = head; 236 so->so_type = head->so_type; 237 so->so_options = head->so_options &~ SO_ACCEPTCONN; 238 so->so_linger = head->so_linger; 239 so->so_state = head->so_state | SS_NOFDREF; 240 so->so_proto = head->so_proto; 241 so->so_timeo = head->so_timeo; 242 so->so_cred = crhold(head->so_cred); 243#ifdef MAC 244 SOCK_LOCK(head); 245 mac_create_socket_from_socket(head, so); 246 SOCK_UNLOCK(head); 247#endif 248 knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv), 249 NULL, NULL, NULL); 250 knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd), 251 NULL, NULL, NULL); 252 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || 253 (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { 254 sodealloc(so); 255 return (NULL); 256 } 257 so->so_state |= connstatus; 258 ACCEPT_LOCK(); 259 if (connstatus) { 260 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); 261 so->so_qstate |= SQ_COMP; 262 head->so_qlen++; 263 } else { 264 /* 265 * Keep removing sockets from the head until there's room for 266 * us to insert on the tail. In pre-locking revisions, this 267 * was a simple if(), but as we could be racing with other 268 * threads and soabort() requires dropping locks, we must 269 * loop waiting for the condition to be true. 270 */ 271 while (head->so_incqlen > head->so_qlimit) { 272 struct socket *sp; 273 sp = TAILQ_FIRST(&head->so_incomp); 274 TAILQ_REMOVE(&head->so_incomp, sp, so_list); 275 head->so_incqlen--; 276 sp->so_qstate &= ~SQ_INCOMP; 277 sp->so_head = NULL; 278 ACCEPT_UNLOCK(); 279 soabort(sp); 280 ACCEPT_LOCK(); 281 } 282 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list); 283 so->so_qstate |= SQ_INCOMP; 284 head->so_incqlen++; 285 } 286 ACCEPT_UNLOCK(); 287 if (connstatus) { 288 sorwakeup(head); 289 wakeup_one(&head->so_timeo); 290 } 291 return (so); 292} 293 294/* 295 * Socantsendmore indicates that no more data will be sent on the 296 * socket; it would normally be applied to a socket when the user 297 * informs the system that no more data is to be sent, by the protocol 298 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 299 * will be received, and will normally be applied to the socket by a 300 * protocol when it detects that the peer will send no more data. 301 * Data queued for reading in the socket may yet be read. 302 */ 303void 304socantsendmore_locked(so) 305 struct socket *so; 306{ 307 308 SOCKBUF_LOCK_ASSERT(&so->so_snd); 309 310 so->so_snd.sb_state |= SBS_CANTSENDMORE; 311 sowwakeup_locked(so); 312 mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 313} 314 315void 316socantsendmore(so) 317 struct socket *so; 318{ 319 320 SOCKBUF_LOCK(&so->so_snd); 321 socantsendmore_locked(so); 322 mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 323} 324 325void 326socantrcvmore_locked(so) 327 struct socket *so; 328{ 329 330 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 331 332 so->so_rcv.sb_state |= SBS_CANTRCVMORE; 333 sorwakeup_locked(so); 334 mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 335} 336 337void 338socantrcvmore(so) 339 struct socket *so; 340{ 341 342 SOCKBUF_LOCK(&so->so_rcv); 343 socantrcvmore_locked(so); 344 mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 345} 346 347/* 348 * Wait for data to arrive at/drain from a socket buffer. 349 */ 350int 351sbwait(sb) 352 struct sockbuf *sb; 353{ 354 355 SOCKBUF_LOCK_ASSERT(sb); 356 357 sb->sb_flags |= SB_WAIT; 358 return (msleep(&sb->sb_cc, &sb->sb_mtx, 359 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", 360 sb->sb_timeo)); 361} 362 363/* 364 * Lock a sockbuf already known to be locked; 365 * return any error returned from sleep (EINTR). 366 */ 367int 368sb_lock(sb) 369 register struct sockbuf *sb; 370{ 371 int error; 372 373 SOCKBUF_LOCK_ASSERT(sb); 374 375 while (sb->sb_flags & SB_LOCK) { 376 sb->sb_flags |= SB_WANT; 377 error = msleep(&sb->sb_flags, &sb->sb_mtx, 378 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, 379 "sblock", 0); 380 if (error) 381 return (error); 382 } 383 sb->sb_flags |= SB_LOCK; 384 return (0); 385} 386 387/* 388 * Wakeup processes waiting on a socket buffer. Do asynchronous 389 * notification via SIGIO if the socket has the SS_ASYNC flag set. 390 * 391 * Called with the socket buffer lock held; will release the lock by the end 392 * of the function. This allows the caller to acquire the socket buffer lock 393 * while testing for the need for various sorts of wakeup and hold it through 394 * to the point where it's no longer required. We currently hold the lock 395 * through calls out to other subsystems (with the exception of kqueue), and 396 * then release it to avoid lock order issues. It's not clear that's 397 * correct. 398 */ 399void 400sowakeup(so, sb) 401 register struct socket *so; 402 register struct sockbuf *sb; 403{ 404 405 SOCKBUF_LOCK_ASSERT(sb); 406 407 selwakeuppri(&sb->sb_sel, PSOCK); 408 sb->sb_flags &= ~SB_SEL; 409 if (sb->sb_flags & SB_WAIT) { 410 sb->sb_flags &= ~SB_WAIT; 411 wakeup(&sb->sb_cc); 412 } 413 KNOTE_LOCKED(&sb->sb_sel.si_note, 0); 414 SOCKBUF_UNLOCK(sb); 415 if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) 416 pgsigio(&so->so_sigio, SIGIO, 0); 417 if (sb->sb_flags & SB_UPCALL) 418 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 419 if (sb->sb_flags & SB_AIO) 420 aio_swake(so, sb); 421 mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); 422} 423 424/* 425 * Socket buffer (struct sockbuf) utility routines. 426 * 427 * Each socket contains two socket buffers: one for sending data and 428 * one for receiving data. Each buffer contains a queue of mbufs, 429 * information about the number of mbufs and amount of data in the 430 * queue, and other fields allowing select() statements and notification 431 * on data availability to be implemented. 432 * 433 * Data stored in a socket buffer is maintained as a list of records. 434 * Each record is a list of mbufs chained together with the m_next 435 * field. Records are chained together with the m_nextpkt field. The upper 436 * level routine soreceive() expects the following conventions to be 437 * observed when placing information in the receive buffer: 438 * 439 * 1. If the protocol requires each message be preceded by the sender's 440 * name, then a record containing that name must be present before 441 * any associated data (mbuf's must be of type MT_SONAME). 442 * 2. If the protocol supports the exchange of ``access rights'' (really 443 * just additional data associated with the message), and there are 444 * ``rights'' to be received, then a record containing this data 445 * should be present (mbuf's must be of type MT_RIGHTS). 446 * 3. If a name or rights record exists, then it must be followed by 447 * a data record, perhaps of zero length. 448 * 449 * Before using a new socket structure it is first necessary to reserve 450 * buffer space to the socket, by calling sbreserve(). This should commit 451 * some of the available buffer space in the system buffer pool for the 452 * socket (currently, it does nothing but enforce limits). The space 453 * should be released by calling sbrelease() when the socket is destroyed. 454 */ 455 456int 457soreserve(so, sndcc, rcvcc) 458 register struct socket *so; 459 u_long sndcc, rcvcc; 460{ 461 struct thread *td = curthread; 462 463 SOCKBUF_LOCK(&so->so_snd); 464 SOCKBUF_LOCK(&so->so_rcv); 465 if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) 466 goto bad; 467 if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) 468 goto bad2; 469 if (so->so_rcv.sb_lowat == 0) 470 so->so_rcv.sb_lowat = 1; 471 if (so->so_snd.sb_lowat == 0) 472 so->so_snd.sb_lowat = MCLBYTES; 473 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 474 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 475 SOCKBUF_UNLOCK(&so->so_rcv); 476 SOCKBUF_UNLOCK(&so->so_snd); 477 return (0); 478bad2: 479 sbrelease_locked(&so->so_snd, so); 480bad: 481 SOCKBUF_UNLOCK(&so->so_rcv); 482 SOCKBUF_UNLOCK(&so->so_snd); 483 return (ENOBUFS); 484} 485 486static int 487sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) 488{ 489 int error = 0; 490 u_long old_sb_max = sb_max; 491 492 error = SYSCTL_OUT(req, arg1, sizeof(u_long)); 493 if (error || !req->newptr) 494 return (error); 495 error = SYSCTL_IN(req, arg1, sizeof(u_long)); 496 if (error) 497 return (error); 498 if (sb_max < MSIZE + MCLBYTES) { 499 sb_max = old_sb_max; 500 return (EINVAL); 501 } 502 sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); 503 return (0); 504} 505 506/* 507 * Allot mbufs to a sockbuf. 508 * Attempt to scale mbmax so that mbcnt doesn't become limiting 509 * if buffering efficiency is near the normal case. 510 */ 511int 512sbreserve_locked(sb, cc, so, td) 513 struct sockbuf *sb; 514 u_long cc; 515 struct socket *so; 516 struct thread *td; 517{ 518 rlim_t sbsize_limit; 519 520 SOCKBUF_LOCK_ASSERT(sb); 521 522 /* 523 * td will only be NULL when we're in an interrupt 524 * (e.g. in tcp_input()) 525 */ 526 if (cc > sb_max_adj) 527 return (0); 528 if (td != NULL) { 529 PROC_LOCK(td->td_proc); 530 sbsize_limit = lim_cur(td->td_proc, RLIMIT_SBSIZE); 531 PROC_UNLOCK(td->td_proc); 532 } else 533 sbsize_limit = RLIM_INFINITY; 534 if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, 535 sbsize_limit)) 536 return (0); 537 sb->sb_mbmax = min(cc * sb_efficiency, sb_max); 538 if (sb->sb_lowat > sb->sb_hiwat) 539 sb->sb_lowat = sb->sb_hiwat; 540 return (1); 541} 542 543int 544sbreserve(sb, cc, so, td) 545 struct sockbuf *sb; 546 u_long cc; 547 struct socket *so; 548 struct thread *td; 549{ 550 int error; 551 552 SOCKBUF_LOCK(sb); 553 error = sbreserve_locked(sb, cc, so, td); 554 SOCKBUF_UNLOCK(sb); 555 return (error); 556} 557 558/* 559 * Free mbufs held by a socket, and reserved mbuf space. 560 */ 561void 562sbrelease_locked(sb, so) 563 struct sockbuf *sb; 564 struct socket *so; 565{ 566 567 SOCKBUF_LOCK_ASSERT(sb); 568 569 sbflush_locked(sb); 570 (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, 571 RLIM_INFINITY); 572 sb->sb_mbmax = 0; 573} 574 575void 576sbrelease(sb, so) 577 struct sockbuf *sb; 578 struct socket *so; 579{ 580 581 SOCKBUF_LOCK(sb); 582 sbrelease_locked(sb, so); 583 SOCKBUF_UNLOCK(sb); 584} 585/* 586 * Routines to add and remove 587 * data from an mbuf queue. 588 * 589 * The routines sbappend() or sbappendrecord() are normally called to 590 * append new mbufs to a socket buffer, after checking that adequate 591 * space is available, comparing the function sbspace() with the amount 592 * of data to be added. sbappendrecord() differs from sbappend() in 593 * that data supplied is treated as the beginning of a new record. 594 * To place a sender's address, optional access rights, and data in a 595 * socket receive buffer, sbappendaddr() should be used. To place 596 * access rights and data in a socket receive buffer, sbappendrights() 597 * should be used. In either case, the new data begins a new record. 598 * Note that unlike sbappend() and sbappendrecord(), these routines check 599 * for the caller that there will be enough space to store the data. 600 * Each fails if there is not enough space, or if it cannot find mbufs 601 * to store additional information in. 602 * 603 * Reliable protocols may use the socket send buffer to hold data 604 * awaiting acknowledgement. Data is normally copied from a socket 605 * send buffer in a protocol with m_copy for output to a peer, 606 * and then removing the data from the socket buffer with sbdrop() 607 * or sbdroprecord() when the data is acknowledged by the peer. 608 */ 609 610#ifdef SOCKBUF_DEBUG 611void 612sblastrecordchk(struct sockbuf *sb, const char *file, int line) 613{ 614 struct mbuf *m = sb->sb_mb; 615 616 SOCKBUF_LOCK_ASSERT(sb); 617 618 while (m && m->m_nextpkt) 619 m = m->m_nextpkt; 620 621 if (m != sb->sb_lastrecord) { 622 printf("%s: sb_mb %p sb_lastrecord %p last %p\n", 623 __func__, sb->sb_mb, sb->sb_lastrecord, m); 624 printf("packet chain:\n"); 625 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 626 printf("\t%p\n", m); 627 panic("%s from %s:%u", __func__, file, line); 628 } 629} 630 631void 632sblastmbufchk(struct sockbuf *sb, const char *file, int line) 633{ 634 struct mbuf *m = sb->sb_mb; 635 struct mbuf *n; 636 637 SOCKBUF_LOCK_ASSERT(sb); 638 639 while (m && m->m_nextpkt) 640 m = m->m_nextpkt; 641 642 while (m && m->m_next) 643 m = m->m_next; 644 645 if (m != sb->sb_mbtail) { 646 printf("%s: sb_mb %p sb_mbtail %p last %p\n", 647 __func__, sb->sb_mb, sb->sb_mbtail, m); 648 printf("packet tree:\n"); 649 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 650 printf("\t"); 651 for (n = m; n != NULL; n = n->m_next) 652 printf("%p ", n); 653 printf("\n"); 654 } 655 panic("%s from %s:%u", __func__, file, line); 656 } 657} 658#endif /* SOCKBUF_DEBUG */ 659 660#define SBLINKRECORD(sb, m0) do { \ 661 SOCKBUF_LOCK_ASSERT(sb); \ 662 if ((sb)->sb_lastrecord != NULL) \ 663 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 664 else \ 665 (sb)->sb_mb = (m0); \ 666 (sb)->sb_lastrecord = (m0); \ 667} while (/*CONSTCOND*/0) 668 669/* 670 * Append mbuf chain m to the last record in the 671 * socket buffer sb. The additional space associated 672 * the mbuf chain is recorded in sb. Empty mbufs are 673 * discarded and mbufs are compacted where possible. 674 */ 675void 676sbappend_locked(sb, m) 677 struct sockbuf *sb; 678 struct mbuf *m; 679{ 680 register struct mbuf *n; 681 682 SOCKBUF_LOCK_ASSERT(sb); 683 684 if (m == 0) 685 return; 686 687 SBLASTRECORDCHK(sb); 688 n = sb->sb_mb; 689 if (n) { 690 while (n->m_nextpkt) 691 n = n->m_nextpkt; 692 do { 693 if (n->m_flags & M_EOR) { 694 sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 695 return; 696 } 697 } while (n->m_next && (n = n->m_next)); 698 } else { 699 /* 700 * XXX Would like to simply use sb_mbtail here, but 701 * XXX I need to verify that I won't miss an EOR that 702 * XXX way. 703 */ 704 if ((n = sb->sb_lastrecord) != NULL) { 705 do { 706 if (n->m_flags & M_EOR) { 707 sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 708 return; 709 } 710 } while (n->m_next && (n = n->m_next)); 711 } else { 712 /* 713 * If this is the first record in the socket buffer, 714 * it's also the last record. 715 */ 716 sb->sb_lastrecord = m; 717 } 718 } 719 sbcompress(sb, m, n); 720 SBLASTRECORDCHK(sb); 721} 722 723/* 724 * Append mbuf chain m to the last record in the 725 * socket buffer sb. The additional space associated 726 * the mbuf chain is recorded in sb. Empty mbufs are 727 * discarded and mbufs are compacted where possible. 728 */ 729void 730sbappend(sb, m) 731 struct sockbuf *sb; 732 struct mbuf *m; 733{ 734 735 SOCKBUF_LOCK(sb); 736 sbappend_locked(sb, m); 737 SOCKBUF_UNLOCK(sb); 738} 739 740/* 741 * This version of sbappend() should only be used when the caller 742 * absolutely knows that there will never be more than one record 743 * in the socket buffer, that is, a stream protocol (such as TCP). 744 */ 745void 746sbappendstream_locked(struct sockbuf *sb, struct mbuf *m) 747{ 748 SOCKBUF_LOCK_ASSERT(sb); 749 750 KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); 751 KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); 752 753 SBLASTMBUFCHK(sb); 754 755 sbcompress(sb, m, sb->sb_mbtail); 756 757 sb->sb_lastrecord = sb->sb_mb; 758 SBLASTRECORDCHK(sb); 759} 760 761/* 762 * This version of sbappend() should only be used when the caller 763 * absolutely knows that there will never be more than one record 764 * in the socket buffer, that is, a stream protocol (such as TCP). 765 */ 766void 767sbappendstream(struct sockbuf *sb, struct mbuf *m) 768{ 769 770 SOCKBUF_LOCK(sb); 771 sbappendstream_locked(sb, m); 772 SOCKBUF_UNLOCK(sb); 773} 774 775#ifdef SOCKBUF_DEBUG 776void 777sbcheck(sb) 778 struct sockbuf *sb; 779{ 780 struct mbuf *m; 781 struct mbuf *n = 0; 782 u_long len = 0, mbcnt = 0; 783 784 SOCKBUF_LOCK_ASSERT(sb); 785 786 for (m = sb->sb_mb; m; m = n) { 787 n = m->m_nextpkt; 788 for (; m; m = m->m_next) { 789 len += m->m_len; 790 mbcnt += MSIZE; 791 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ 792 mbcnt += m->m_ext.ext_size; 793 } 794 } 795 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 796 printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc, 797 mbcnt, sb->sb_mbcnt); 798 panic("sbcheck"); 799 } 800} 801#endif 802 803/* 804 * As above, except the mbuf chain 805 * begins a new record. 806 */ 807void 808sbappendrecord_locked(sb, m0) 809 register struct sockbuf *sb; 810 register struct mbuf *m0; 811{ 812 register struct mbuf *m; 813 814 SOCKBUF_LOCK_ASSERT(sb); 815 816 if (m0 == 0) 817 return; 818 m = sb->sb_mb; 819 if (m) 820 while (m->m_nextpkt) 821 m = m->m_nextpkt; 822 /* 823 * Put the first mbuf on the queue. 824 * Note this permits zero length records. 825 */ 826 sballoc(sb, m0); 827 SBLASTRECORDCHK(sb); 828 SBLINKRECORD(sb, m0); 829 if (m) 830 m->m_nextpkt = m0; 831 else 832 sb->sb_mb = m0; 833 m = m0->m_next; 834 m0->m_next = 0; 835 if (m && (m0->m_flags & M_EOR)) { 836 m0->m_flags &= ~M_EOR; 837 m->m_flags |= M_EOR; 838 } 839 sbcompress(sb, m, m0); 840} 841 842/* 843 * As above, except the mbuf chain 844 * begins a new record. 845 */ 846void 847sbappendrecord(sb, m0) 848 register struct sockbuf *sb; 849 register struct mbuf *m0; 850{ 851 852 SOCKBUF_LOCK(sb); 853 sbappendrecord_locked(sb, m0); 854 SOCKBUF_UNLOCK(sb); 855} 856 857/* 858 * As above except that OOB data 859 * is inserted at the beginning of the sockbuf, 860 * but after any other OOB data. 861 */ 862void 863sbinsertoob_locked(sb, m0) 864 register struct sockbuf *sb; 865 register struct mbuf *m0; 866{ 867 register struct mbuf *m; 868 register struct mbuf **mp; 869 870 SOCKBUF_LOCK_ASSERT(sb); 871 872 if (m0 == 0) 873 return; 874 for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) { 875 m = *mp; 876 again: 877 switch (m->m_type) { 878 879 case MT_OOBDATA: 880 continue; /* WANT next train */ 881 882 case MT_CONTROL: 883 m = m->m_next; 884 if (m) 885 goto again; /* inspect THIS train further */ 886 } 887 break; 888 } 889 /* 890 * Put the first mbuf on the queue. 891 * Note this permits zero length records. 892 */ 893 sballoc(sb, m0); 894 m0->m_nextpkt = *mp; 895 *mp = m0; 896 m = m0->m_next; 897 m0->m_next = 0; 898 if (m && (m0->m_flags & M_EOR)) { 899 m0->m_flags &= ~M_EOR; 900 m->m_flags |= M_EOR; 901 } 902 sbcompress(sb, m, m0); 903} 904 905/* 906 * As above except that OOB data 907 * is inserted at the beginning of the sockbuf, 908 * but after any other OOB data. 909 */ 910void 911sbinsertoob(sb, m0) 912 register struct sockbuf *sb; 913 register struct mbuf *m0; 914{ 915 916 SOCKBUF_LOCK(sb); 917 sbinsertoob_locked(sb, m0); 918 SOCKBUF_UNLOCK(sb); 919} 920 921/* 922 * Append address and data, and optionally, control (ancillary) data 923 * to the receive queue of a socket. If present, 924 * m0 must include a packet header with total length. 925 * Returns 0 if no space in sockbuf or insufficient mbufs. 926 */ 927int 928sbappendaddr_locked(sb, asa, m0, control) 929 struct sockbuf *sb; 930 const struct sockaddr *asa; 931 struct mbuf *m0, *control; 932{ 933 struct mbuf *m, *n, *nlast; 934 int space = asa->sa_len; 935 936 SOCKBUF_LOCK_ASSERT(sb); 937 938 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 939 panic("sbappendaddr_locked"); 940 if (m0) 941 space += m0->m_pkthdr.len; 942 space += m_length(control, &n); 943 944 if (space > sbspace(sb)) 945 return (0); 946#if MSIZE <= 256 947 if (asa->sa_len > MLEN) 948 return (0); 949#endif 950 MGET(m, M_DONTWAIT, MT_SONAME); 951 if (m == 0) 952 return (0); 953 m->m_len = asa->sa_len; 954 bcopy(asa, mtod(m, caddr_t), asa->sa_len); 955 if (n) 956 n->m_next = m0; /* concatenate data to control */ 957 else 958 control = m0; 959 m->m_next = control; 960 for (n = m; n->m_next != NULL; n = n->m_next) 961 sballoc(sb, n); 962 sballoc(sb, n); 963 nlast = n; 964 SBLINKRECORD(sb, m); 965 966 sb->sb_mbtail = nlast; 967 SBLASTMBUFCHK(sb); 968 969 SBLASTRECORDCHK(sb); 970 return (1); 971} 972 973/* 974 * Append address and data, and optionally, control (ancillary) data 975 * to the receive queue of a socket. If present, 976 * m0 must include a packet header with total length. 977 * Returns 0 if no space in sockbuf or insufficient mbufs. 978 */ 979int 980sbappendaddr(sb, asa, m0, control) 981 struct sockbuf *sb; 982 const struct sockaddr *asa; 983 struct mbuf *m0, *control; 984{ 985 int retval; 986 987 SOCKBUF_LOCK(sb); 988 retval = sbappendaddr_locked(sb, asa, m0, control); 989 SOCKBUF_UNLOCK(sb); 990 return (retval); 991} 992 993int 994sbappendcontrol_locked(sb, m0, control) 995 struct sockbuf *sb; 996 struct mbuf *control, *m0; 997{ 998 struct mbuf *m, *n, *mlast; 999 int space; 1000 1001 SOCKBUF_LOCK_ASSERT(sb); 1002 1003 if (control == 0) 1004 panic("sbappendcontrol_locked"); 1005 space = m_length(control, &n) + m_length(m0, NULL); 1006 1007 if (space > sbspace(sb)) 1008 return (0); 1009 n->m_next = m0; /* concatenate data to control */ 1010 1011 SBLASTRECORDCHK(sb); 1012 1013 for (m = control; m->m_next; m = m->m_next) 1014 sballoc(sb, m); 1015 sballoc(sb, m); 1016 mlast = m; 1017 SBLINKRECORD(sb, control); 1018 1019 sb->sb_mbtail = mlast; 1020 SBLASTMBUFCHK(sb); 1021 1022 SBLASTRECORDCHK(sb); 1023 return (1); 1024} 1025 1026int 1027sbappendcontrol(sb, m0, control) 1028 struct sockbuf *sb; 1029 struct mbuf *control, *m0; 1030{ 1031 int retval; 1032 1033 SOCKBUF_LOCK(sb); 1034 retval = sbappendcontrol_locked(sb, m0, control); 1035 SOCKBUF_UNLOCK(sb); 1036 return (retval); 1037} 1038 1039/* 1040 * Append the data in mbuf chain (m) into the socket buffer sb following mbuf 1041 * (n). If (n) is NULL, the buffer is presumed empty. 1042 * 1043 * When the data is compressed, mbufs in the chain may be handled in one of 1044 * three ways: 1045 * 1046 * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no 1047 * record boundary, and no change in data type). 1048 * 1049 * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into 1050 * an mbuf already in the socket buffer. This can occur if an 1051 * appropriate mbuf exists, there is room, and no merging of data types 1052 * will occur. 1053 * 1054 * (3) The mbuf may be appended to the end of the existing mbuf chain. 1055 * 1056 * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as 1057 * end-of-record. 1058 */ 1059void 1060sbcompress(sb, m, n) 1061 register struct sockbuf *sb; 1062 register struct mbuf *m, *n; 1063{ 1064 register int eor = 0; 1065 register struct mbuf *o; 1066 1067 SOCKBUF_LOCK_ASSERT(sb); 1068 1069 while (m) { 1070 eor |= m->m_flags & M_EOR; 1071 if (m->m_len == 0 && 1072 (eor == 0 || 1073 (((o = m->m_next) || (o = n)) && 1074 o->m_type == m->m_type))) { 1075 if (sb->sb_lastrecord == m) 1076 sb->sb_lastrecord = m->m_next; 1077 m = m_free(m); 1078 continue; 1079 } 1080 if (n && (n->m_flags & M_EOR) == 0 && 1081 M_WRITABLE(n) && 1082 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 1083 m->m_len <= M_TRAILINGSPACE(n) && 1084 n->m_type == m->m_type) { 1085 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 1086 (unsigned)m->m_len); 1087 n->m_len += m->m_len; 1088 sb->sb_cc += m->m_len; 1089 if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 1090 /* XXX: Probably don't need.*/ 1091 sb->sb_ctl += m->m_len; 1092 m = m_free(m); 1093 continue; 1094 } 1095 if (n) 1096 n->m_next = m; 1097 else 1098 sb->sb_mb = m; 1099 sb->sb_mbtail = m; 1100 sballoc(sb, m); 1101 n = m; 1102 m->m_flags &= ~M_EOR; 1103 m = m->m_next; 1104 n->m_next = 0; 1105 } 1106 if (eor) { 1107 KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); 1108 n->m_flags |= eor; 1109 } 1110 SBLASTMBUFCHK(sb); 1111} 1112 1113/* 1114 * Free all mbufs in a sockbuf. 1115 * Check that all resources are reclaimed. 1116 */ 1117void 1118sbflush_locked(sb) 1119 register struct sockbuf *sb; 1120{ 1121 1122 SOCKBUF_LOCK_ASSERT(sb); 1123 1124 if (sb->sb_flags & SB_LOCK) 1125 panic("sbflush_locked: locked"); 1126 while (sb->sb_mbcnt) { 1127 /* 1128 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty: 1129 * we would loop forever. Panic instead. 1130 */ 1131 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len)) 1132 break; 1133 sbdrop_locked(sb, (int)sb->sb_cc); 1134 } 1135 if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt) 1136 panic("sbflush_locked: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt); 1137} 1138 1139void 1140sbflush(sb) 1141 register struct sockbuf *sb; 1142{ 1143 1144 SOCKBUF_LOCK(sb); 1145 sbflush_locked(sb); 1146 SOCKBUF_UNLOCK(sb); 1147} 1148 1149/* 1150 * Drop data from (the front of) a sockbuf. 1151 */ 1152void 1153sbdrop_locked(sb, len) 1154 register struct sockbuf *sb; 1155 register int len; 1156{ 1157 register struct mbuf *m; 1158 struct mbuf *next; 1159 1160 SOCKBUF_LOCK_ASSERT(sb); 1161 1162 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 1163 while (len > 0) { 1164 if (m == 0) { 1165 if (next == 0) 1166 panic("sbdrop"); 1167 m = next; 1168 next = m->m_nextpkt; 1169 continue; 1170 } 1171 if (m->m_len > len) { 1172 m->m_len -= len; 1173 m->m_data += len; 1174 sb->sb_cc -= len; 1175 if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 1176 sb->sb_ctl -= len; 1177 break; 1178 } 1179 len -= m->m_len; 1180 sbfree(sb, m); 1181 m = m_free(m); 1182 } 1183 while (m && m->m_len == 0) { 1184 sbfree(sb, m); 1185 m = m_free(m); 1186 } 1187 if (m) { 1188 sb->sb_mb = m; 1189 m->m_nextpkt = next; 1190 } else 1191 sb->sb_mb = next; 1192 /* 1193 * First part is an inline SB_EMPTY_FIXUP(). Second part 1194 * makes sure sb_lastrecord is up-to-date if we dropped 1195 * part of the last record. 1196 */ 1197 m = sb->sb_mb; 1198 if (m == NULL) { 1199 sb->sb_mbtail = NULL; 1200 sb->sb_lastrecord = NULL; 1201 } else if (m->m_nextpkt == NULL) { 1202 sb->sb_lastrecord = m; 1203 } 1204} 1205 1206/* 1207 * Drop data from (the front of) a sockbuf. 1208 */ 1209void 1210sbdrop(sb, len) 1211 register struct sockbuf *sb; 1212 register int len; 1213{ 1214 1215 SOCKBUF_LOCK(sb); 1216 sbdrop_locked(sb, len); 1217 SOCKBUF_UNLOCK(sb); 1218} 1219 1220/* 1221 * Drop a record off the front of a sockbuf 1222 * and move the next record to the front. 1223 */ 1224void 1225sbdroprecord_locked(sb) 1226 register struct sockbuf *sb; 1227{ 1228 register struct mbuf *m; 1229 1230 SOCKBUF_LOCK_ASSERT(sb); 1231 1232 m = sb->sb_mb; 1233 if (m) { 1234 sb->sb_mb = m->m_nextpkt; 1235 do { 1236 sbfree(sb, m); 1237 m = m_free(m); 1238 } while (m); 1239 } 1240 SB_EMPTY_FIXUP(sb); 1241} 1242 1243/* 1244 * Drop a record off the front of a sockbuf 1245 * and move the next record to the front. 1246 */ 1247void 1248sbdroprecord(sb) 1249 register struct sockbuf *sb; 1250{ 1251 1252 SOCKBUF_LOCK(sb); 1253 sbdroprecord_locked(sb); 1254 SOCKBUF_UNLOCK(sb); 1255} 1256 1257/* 1258 * Create a "control" mbuf containing the specified data 1259 * with the specified type for presentation on a socket buffer. 1260 */ 1261struct mbuf * 1262sbcreatecontrol(p, size, type, level) 1263 caddr_t p; 1264 register int size; 1265 int type, level; 1266{ 1267 register struct cmsghdr *cp; 1268 struct mbuf *m; 1269 1270 if (CMSG_SPACE((u_int)size) > MCLBYTES) 1271 return ((struct mbuf *) NULL); 1272 if (CMSG_SPACE((u_int)size) > MLEN) 1273 m = m_getcl(M_DONTWAIT, MT_CONTROL, 0); 1274 else 1275 m = m_get(M_DONTWAIT, MT_CONTROL); 1276 if (m == NULL) 1277 return ((struct mbuf *) NULL); 1278 cp = mtod(m, struct cmsghdr *); 1279 m->m_len = 0; 1280 KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m), 1281 ("sbcreatecontrol: short mbuf")); 1282 if (p != NULL) 1283 (void)memcpy(CMSG_DATA(cp), p, size); 1284 m->m_len = CMSG_SPACE(size); 1285 cp->cmsg_len = CMSG_LEN(size); 1286 cp->cmsg_level = level; 1287 cp->cmsg_type = type; 1288 return (m); 1289} 1290 1291/* 1292 * Some routines that return EOPNOTSUPP for entry points that are not 1293 * supported by a protocol. Fill in as needed. 1294 */
| 34 35#include "opt_mac.h" 36#include "opt_param.h" 37 38#include <sys/param.h> 39#include <sys/aio.h> /* for aio_swake proto */ 40#include <sys/domain.h> 41#include <sys/event.h> 42#include <sys/file.h> /* for maxfiles */ 43#include <sys/kernel.h> 44#include <sys/lock.h> 45#include <sys/mac.h> 46#include <sys/malloc.h> 47#include <sys/mbuf.h> 48#include <sys/mutex.h> 49#include <sys/proc.h> 50#include <sys/protosw.h> 51#include <sys/resourcevar.h> 52#include <sys/signalvar.h> 53#include <sys/socket.h> 54#include <sys/socketvar.h> 55#include <sys/stat.h> 56#include <sys/sysctl.h> 57#include <sys/systm.h> 58 59int maxsockets; 60 61void (*aio_swake)(struct socket *, struct sockbuf *); 62 63/* 64 * Primitive routines for operating on sockets and socket buffers 65 */ 66 67u_long sb_max = SB_MAX; 68static u_long sb_max_adj = 69 SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ 70 71static u_long sb_efficiency = 8; /* parameter for sbreserve() */ 72 73#ifdef REGRESSION 74static int regression_sonewconn_earlytest = 1; 75SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW, 76 ®ression_sonewconn_earlytest, 0, "Perform early sonewconn limit test"); 77#endif 78 79/* 80 * Procedures to manipulate state flags of socket 81 * and do appropriate wakeups. Normal sequence from the 82 * active (originating) side is that soisconnecting() is 83 * called during processing of connect() call, 84 * resulting in an eventual call to soisconnected() if/when the 85 * connection is established. When the connection is torn down 86 * soisdisconnecting() is called during processing of disconnect() call, 87 * and soisdisconnected() is called when the connection to the peer 88 * is totally severed. The semantics of these routines are such that 89 * connectionless protocols can call soisconnected() and soisdisconnected() 90 * only, bypassing the in-progress calls when setting up a ``connection'' 91 * takes no time. 92 * 93 * From the passive side, a socket is created with 94 * two queues of sockets: so_incomp for connections in progress 95 * and so_comp for connections already made and awaiting user acceptance. 96 * As a protocol is preparing incoming connections, it creates a socket 97 * structure queued on so_incomp by calling sonewconn(). When the connection 98 * is established, soisconnected() is called, and transfers the 99 * socket structure to so_comp, making it available to accept(). 100 * 101 * If a socket is closed with sockets on either 102 * so_incomp or so_comp, these sockets are dropped. 103 * 104 * If higher level protocols are implemented in 105 * the kernel, the wakeups done here will sometimes 106 * cause software-interrupt process scheduling. 107 */ 108 109void 110soisconnecting(so) 111 register struct socket *so; 112{ 113 114 SOCK_LOCK(so); 115 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 116 so->so_state |= SS_ISCONNECTING; 117 SOCK_UNLOCK(so); 118} 119 120void 121soisconnected(so) 122 struct socket *so; 123{ 124 struct socket *head; 125 126 ACCEPT_LOCK(); 127 SOCK_LOCK(so); 128 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 129 so->so_state |= SS_ISCONNECTED; 130 head = so->so_head; 131 if (head != NULL && (so->so_qstate & SQ_INCOMP)) { 132 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 133 SOCK_UNLOCK(so); 134 TAILQ_REMOVE(&head->so_incomp, so, so_list); 135 head->so_incqlen--; 136 so->so_qstate &= ~SQ_INCOMP; 137 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); 138 head->so_qlen++; 139 so->so_qstate |= SQ_COMP; 140 ACCEPT_UNLOCK(); 141 sorwakeup(head); 142 wakeup_one(&head->so_timeo); 143 } else { 144 ACCEPT_UNLOCK(); 145 so->so_upcall = 146 head->so_accf->so_accept_filter->accf_callback; 147 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 148 so->so_rcv.sb_flags |= SB_UPCALL; 149 so->so_options &= ~SO_ACCEPTFILTER; 150 SOCK_UNLOCK(so); 151 so->so_upcall(so, so->so_upcallarg, M_DONTWAIT); 152 } 153 return; 154 } 155 SOCK_UNLOCK(so); 156 ACCEPT_UNLOCK(); 157 wakeup(&so->so_timeo); 158 sorwakeup(so); 159 sowwakeup(so); 160} 161 162void 163soisdisconnecting(so) 164 register struct socket *so; 165{ 166 167 /* 168 * XXXRW: This code assumes that SOCK_LOCK(so) and 169 * SOCKBUF_LOCK(&so->so_rcv) are the same. 170 */ 171 SOCKBUF_LOCK(&so->so_rcv); 172 so->so_state &= ~SS_ISCONNECTING; 173 so->so_state |= SS_ISDISCONNECTING; 174 so->so_rcv.sb_state |= SBS_CANTRCVMORE; 175 sorwakeup_locked(so); 176 SOCKBUF_LOCK(&so->so_snd); 177 so->so_snd.sb_state |= SBS_CANTSENDMORE; 178 sowwakeup_locked(so); 179 wakeup(&so->so_timeo); 180} 181 182void 183soisdisconnected(so) 184 register struct socket *so; 185{ 186 187 /* 188 * XXXRW: This code assumes that SOCK_LOCK(so) and 189 * SOCKBUF_LOCK(&so->so_rcv) are the same. 190 */ 191 SOCKBUF_LOCK(&so->so_rcv); 192 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 193 so->so_state |= SS_ISDISCONNECTED; 194 so->so_rcv.sb_state |= SBS_CANTRCVMORE; 195 sorwakeup_locked(so); 196 SOCKBUF_LOCK(&so->so_snd); 197 so->so_snd.sb_state |= SBS_CANTSENDMORE; 198 sbdrop_locked(&so->so_snd, so->so_snd.sb_cc); 199 sowwakeup_locked(so); 200 wakeup(&so->so_timeo); 201} 202 203/* 204 * When an attempt at a new connection is noted on a socket 205 * which accepts connections, sonewconn is called. If the 206 * connection is possible (subject to space constraints, etc.) 207 * then we allocate a new structure, propoerly linked into the 208 * data structure of the original socket, and return this. 209 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 210 * 211 * note: the ref count on the socket is 0 on return 212 */ 213struct socket * 214sonewconn(head, connstatus) 215 register struct socket *head; 216 int connstatus; 217{ 218 register struct socket *so; 219 int over; 220 221 ACCEPT_LOCK(); 222 over = (head->so_qlen > 3 * head->so_qlimit / 2); 223 ACCEPT_UNLOCK(); 224#ifdef REGRESSION 225 if (regression_sonewconn_earlytest && over) 226#else 227 if (over) 228#endif 229 return (NULL); 230 so = soalloc(M_NOWAIT); 231 if (so == NULL) 232 return (NULL); 233 if ((head->so_options & SO_ACCEPTFILTER) != 0) 234 connstatus = 0; 235 so->so_head = head; 236 so->so_type = head->so_type; 237 so->so_options = head->so_options &~ SO_ACCEPTCONN; 238 so->so_linger = head->so_linger; 239 so->so_state = head->so_state | SS_NOFDREF; 240 so->so_proto = head->so_proto; 241 so->so_timeo = head->so_timeo; 242 so->so_cred = crhold(head->so_cred); 243#ifdef MAC 244 SOCK_LOCK(head); 245 mac_create_socket_from_socket(head, so); 246 SOCK_UNLOCK(head); 247#endif 248 knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv), 249 NULL, NULL, NULL); 250 knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd), 251 NULL, NULL, NULL); 252 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || 253 (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { 254 sodealloc(so); 255 return (NULL); 256 } 257 so->so_state |= connstatus; 258 ACCEPT_LOCK(); 259 if (connstatus) { 260 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); 261 so->so_qstate |= SQ_COMP; 262 head->so_qlen++; 263 } else { 264 /* 265 * Keep removing sockets from the head until there's room for 266 * us to insert on the tail. In pre-locking revisions, this 267 * was a simple if(), but as we could be racing with other 268 * threads and soabort() requires dropping locks, we must 269 * loop waiting for the condition to be true. 270 */ 271 while (head->so_incqlen > head->so_qlimit) { 272 struct socket *sp; 273 sp = TAILQ_FIRST(&head->so_incomp); 274 TAILQ_REMOVE(&head->so_incomp, sp, so_list); 275 head->so_incqlen--; 276 sp->so_qstate &= ~SQ_INCOMP; 277 sp->so_head = NULL; 278 ACCEPT_UNLOCK(); 279 soabort(sp); 280 ACCEPT_LOCK(); 281 } 282 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list); 283 so->so_qstate |= SQ_INCOMP; 284 head->so_incqlen++; 285 } 286 ACCEPT_UNLOCK(); 287 if (connstatus) { 288 sorwakeup(head); 289 wakeup_one(&head->so_timeo); 290 } 291 return (so); 292} 293 294/* 295 * Socantsendmore indicates that no more data will be sent on the 296 * socket; it would normally be applied to a socket when the user 297 * informs the system that no more data is to be sent, by the protocol 298 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 299 * will be received, and will normally be applied to the socket by a 300 * protocol when it detects that the peer will send no more data. 301 * Data queued for reading in the socket may yet be read. 302 */ 303void 304socantsendmore_locked(so) 305 struct socket *so; 306{ 307 308 SOCKBUF_LOCK_ASSERT(&so->so_snd); 309 310 so->so_snd.sb_state |= SBS_CANTSENDMORE; 311 sowwakeup_locked(so); 312 mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 313} 314 315void 316socantsendmore(so) 317 struct socket *so; 318{ 319 320 SOCKBUF_LOCK(&so->so_snd); 321 socantsendmore_locked(so); 322 mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 323} 324 325void 326socantrcvmore_locked(so) 327 struct socket *so; 328{ 329 330 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 331 332 so->so_rcv.sb_state |= SBS_CANTRCVMORE; 333 sorwakeup_locked(so); 334 mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 335} 336 337void 338socantrcvmore(so) 339 struct socket *so; 340{ 341 342 SOCKBUF_LOCK(&so->so_rcv); 343 socantrcvmore_locked(so); 344 mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 345} 346 347/* 348 * Wait for data to arrive at/drain from a socket buffer. 349 */ 350int 351sbwait(sb) 352 struct sockbuf *sb; 353{ 354 355 SOCKBUF_LOCK_ASSERT(sb); 356 357 sb->sb_flags |= SB_WAIT; 358 return (msleep(&sb->sb_cc, &sb->sb_mtx, 359 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", 360 sb->sb_timeo)); 361} 362 363/* 364 * Lock a sockbuf already known to be locked; 365 * return any error returned from sleep (EINTR). 366 */ 367int 368sb_lock(sb) 369 register struct sockbuf *sb; 370{ 371 int error; 372 373 SOCKBUF_LOCK_ASSERT(sb); 374 375 while (sb->sb_flags & SB_LOCK) { 376 sb->sb_flags |= SB_WANT; 377 error = msleep(&sb->sb_flags, &sb->sb_mtx, 378 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, 379 "sblock", 0); 380 if (error) 381 return (error); 382 } 383 sb->sb_flags |= SB_LOCK; 384 return (0); 385} 386 387/* 388 * Wakeup processes waiting on a socket buffer. Do asynchronous 389 * notification via SIGIO if the socket has the SS_ASYNC flag set. 390 * 391 * Called with the socket buffer lock held; will release the lock by the end 392 * of the function. This allows the caller to acquire the socket buffer lock 393 * while testing for the need for various sorts of wakeup and hold it through 394 * to the point where it's no longer required. We currently hold the lock 395 * through calls out to other subsystems (with the exception of kqueue), and 396 * then release it to avoid lock order issues. It's not clear that's 397 * correct. 398 */ 399void 400sowakeup(so, sb) 401 register struct socket *so; 402 register struct sockbuf *sb; 403{ 404 405 SOCKBUF_LOCK_ASSERT(sb); 406 407 selwakeuppri(&sb->sb_sel, PSOCK); 408 sb->sb_flags &= ~SB_SEL; 409 if (sb->sb_flags & SB_WAIT) { 410 sb->sb_flags &= ~SB_WAIT; 411 wakeup(&sb->sb_cc); 412 } 413 KNOTE_LOCKED(&sb->sb_sel.si_note, 0); 414 SOCKBUF_UNLOCK(sb); 415 if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) 416 pgsigio(&so->so_sigio, SIGIO, 0); 417 if (sb->sb_flags & SB_UPCALL) 418 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 419 if (sb->sb_flags & SB_AIO) 420 aio_swake(so, sb); 421 mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); 422} 423 424/* 425 * Socket buffer (struct sockbuf) utility routines. 426 * 427 * Each socket contains two socket buffers: one for sending data and 428 * one for receiving data. Each buffer contains a queue of mbufs, 429 * information about the number of mbufs and amount of data in the 430 * queue, and other fields allowing select() statements and notification 431 * on data availability to be implemented. 432 * 433 * Data stored in a socket buffer is maintained as a list of records. 434 * Each record is a list of mbufs chained together with the m_next 435 * field. Records are chained together with the m_nextpkt field. The upper 436 * level routine soreceive() expects the following conventions to be 437 * observed when placing information in the receive buffer: 438 * 439 * 1. If the protocol requires each message be preceded by the sender's 440 * name, then a record containing that name must be present before 441 * any associated data (mbuf's must be of type MT_SONAME). 442 * 2. If the protocol supports the exchange of ``access rights'' (really 443 * just additional data associated with the message), and there are 444 * ``rights'' to be received, then a record containing this data 445 * should be present (mbuf's must be of type MT_RIGHTS). 446 * 3. If a name or rights record exists, then it must be followed by 447 * a data record, perhaps of zero length. 448 * 449 * Before using a new socket structure it is first necessary to reserve 450 * buffer space to the socket, by calling sbreserve(). This should commit 451 * some of the available buffer space in the system buffer pool for the 452 * socket (currently, it does nothing but enforce limits). The space 453 * should be released by calling sbrelease() when the socket is destroyed. 454 */ 455 456int 457soreserve(so, sndcc, rcvcc) 458 register struct socket *so; 459 u_long sndcc, rcvcc; 460{ 461 struct thread *td = curthread; 462 463 SOCKBUF_LOCK(&so->so_snd); 464 SOCKBUF_LOCK(&so->so_rcv); 465 if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) 466 goto bad; 467 if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) 468 goto bad2; 469 if (so->so_rcv.sb_lowat == 0) 470 so->so_rcv.sb_lowat = 1; 471 if (so->so_snd.sb_lowat == 0) 472 so->so_snd.sb_lowat = MCLBYTES; 473 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 474 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 475 SOCKBUF_UNLOCK(&so->so_rcv); 476 SOCKBUF_UNLOCK(&so->so_snd); 477 return (0); 478bad2: 479 sbrelease_locked(&so->so_snd, so); 480bad: 481 SOCKBUF_UNLOCK(&so->so_rcv); 482 SOCKBUF_UNLOCK(&so->so_snd); 483 return (ENOBUFS); 484} 485 486static int 487sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) 488{ 489 int error = 0; 490 u_long old_sb_max = sb_max; 491 492 error = SYSCTL_OUT(req, arg1, sizeof(u_long)); 493 if (error || !req->newptr) 494 return (error); 495 error = SYSCTL_IN(req, arg1, sizeof(u_long)); 496 if (error) 497 return (error); 498 if (sb_max < MSIZE + MCLBYTES) { 499 sb_max = old_sb_max; 500 return (EINVAL); 501 } 502 sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); 503 return (0); 504} 505 506/* 507 * Allot mbufs to a sockbuf. 508 * Attempt to scale mbmax so that mbcnt doesn't become limiting 509 * if buffering efficiency is near the normal case. 510 */ 511int 512sbreserve_locked(sb, cc, so, td) 513 struct sockbuf *sb; 514 u_long cc; 515 struct socket *so; 516 struct thread *td; 517{ 518 rlim_t sbsize_limit; 519 520 SOCKBUF_LOCK_ASSERT(sb); 521 522 /* 523 * td will only be NULL when we're in an interrupt 524 * (e.g. in tcp_input()) 525 */ 526 if (cc > sb_max_adj) 527 return (0); 528 if (td != NULL) { 529 PROC_LOCK(td->td_proc); 530 sbsize_limit = lim_cur(td->td_proc, RLIMIT_SBSIZE); 531 PROC_UNLOCK(td->td_proc); 532 } else 533 sbsize_limit = RLIM_INFINITY; 534 if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, 535 sbsize_limit)) 536 return (0); 537 sb->sb_mbmax = min(cc * sb_efficiency, sb_max); 538 if (sb->sb_lowat > sb->sb_hiwat) 539 sb->sb_lowat = sb->sb_hiwat; 540 return (1); 541} 542 543int 544sbreserve(sb, cc, so, td) 545 struct sockbuf *sb; 546 u_long cc; 547 struct socket *so; 548 struct thread *td; 549{ 550 int error; 551 552 SOCKBUF_LOCK(sb); 553 error = sbreserve_locked(sb, cc, so, td); 554 SOCKBUF_UNLOCK(sb); 555 return (error); 556} 557 558/* 559 * Free mbufs held by a socket, and reserved mbuf space. 560 */ 561void 562sbrelease_locked(sb, so) 563 struct sockbuf *sb; 564 struct socket *so; 565{ 566 567 SOCKBUF_LOCK_ASSERT(sb); 568 569 sbflush_locked(sb); 570 (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, 571 RLIM_INFINITY); 572 sb->sb_mbmax = 0; 573} 574 575void 576sbrelease(sb, so) 577 struct sockbuf *sb; 578 struct socket *so; 579{ 580 581 SOCKBUF_LOCK(sb); 582 sbrelease_locked(sb, so); 583 SOCKBUF_UNLOCK(sb); 584} 585/* 586 * Routines to add and remove 587 * data from an mbuf queue. 588 * 589 * The routines sbappend() or sbappendrecord() are normally called to 590 * append new mbufs to a socket buffer, after checking that adequate 591 * space is available, comparing the function sbspace() with the amount 592 * of data to be added. sbappendrecord() differs from sbappend() in 593 * that data supplied is treated as the beginning of a new record. 594 * To place a sender's address, optional access rights, and data in a 595 * socket receive buffer, sbappendaddr() should be used. To place 596 * access rights and data in a socket receive buffer, sbappendrights() 597 * should be used. In either case, the new data begins a new record. 598 * Note that unlike sbappend() and sbappendrecord(), these routines check 599 * for the caller that there will be enough space to store the data. 600 * Each fails if there is not enough space, or if it cannot find mbufs 601 * to store additional information in. 602 * 603 * Reliable protocols may use the socket send buffer to hold data 604 * awaiting acknowledgement. Data is normally copied from a socket 605 * send buffer in a protocol with m_copy for output to a peer, 606 * and then removing the data from the socket buffer with sbdrop() 607 * or sbdroprecord() when the data is acknowledged by the peer. 608 */ 609 610#ifdef SOCKBUF_DEBUG 611void 612sblastrecordchk(struct sockbuf *sb, const char *file, int line) 613{ 614 struct mbuf *m = sb->sb_mb; 615 616 SOCKBUF_LOCK_ASSERT(sb); 617 618 while (m && m->m_nextpkt) 619 m = m->m_nextpkt; 620 621 if (m != sb->sb_lastrecord) { 622 printf("%s: sb_mb %p sb_lastrecord %p last %p\n", 623 __func__, sb->sb_mb, sb->sb_lastrecord, m); 624 printf("packet chain:\n"); 625 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 626 printf("\t%p\n", m); 627 panic("%s from %s:%u", __func__, file, line); 628 } 629} 630 631void 632sblastmbufchk(struct sockbuf *sb, const char *file, int line) 633{ 634 struct mbuf *m = sb->sb_mb; 635 struct mbuf *n; 636 637 SOCKBUF_LOCK_ASSERT(sb); 638 639 while (m && m->m_nextpkt) 640 m = m->m_nextpkt; 641 642 while (m && m->m_next) 643 m = m->m_next; 644 645 if (m != sb->sb_mbtail) { 646 printf("%s: sb_mb %p sb_mbtail %p last %p\n", 647 __func__, sb->sb_mb, sb->sb_mbtail, m); 648 printf("packet tree:\n"); 649 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 650 printf("\t"); 651 for (n = m; n != NULL; n = n->m_next) 652 printf("%p ", n); 653 printf("\n"); 654 } 655 panic("%s from %s:%u", __func__, file, line); 656 } 657} 658#endif /* SOCKBUF_DEBUG */ 659 660#define SBLINKRECORD(sb, m0) do { \ 661 SOCKBUF_LOCK_ASSERT(sb); \ 662 if ((sb)->sb_lastrecord != NULL) \ 663 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 664 else \ 665 (sb)->sb_mb = (m0); \ 666 (sb)->sb_lastrecord = (m0); \ 667} while (/*CONSTCOND*/0) 668 669/* 670 * Append mbuf chain m to the last record in the 671 * socket buffer sb. The additional space associated 672 * the mbuf chain is recorded in sb. Empty mbufs are 673 * discarded and mbufs are compacted where possible. 674 */ 675void 676sbappend_locked(sb, m) 677 struct sockbuf *sb; 678 struct mbuf *m; 679{ 680 register struct mbuf *n; 681 682 SOCKBUF_LOCK_ASSERT(sb); 683 684 if (m == 0) 685 return; 686 687 SBLASTRECORDCHK(sb); 688 n = sb->sb_mb; 689 if (n) { 690 while (n->m_nextpkt) 691 n = n->m_nextpkt; 692 do { 693 if (n->m_flags & M_EOR) { 694 sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 695 return; 696 } 697 } while (n->m_next && (n = n->m_next)); 698 } else { 699 /* 700 * XXX Would like to simply use sb_mbtail here, but 701 * XXX I need to verify that I won't miss an EOR that 702 * XXX way. 703 */ 704 if ((n = sb->sb_lastrecord) != NULL) { 705 do { 706 if (n->m_flags & M_EOR) { 707 sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 708 return; 709 } 710 } while (n->m_next && (n = n->m_next)); 711 } else { 712 /* 713 * If this is the first record in the socket buffer, 714 * it's also the last record. 715 */ 716 sb->sb_lastrecord = m; 717 } 718 } 719 sbcompress(sb, m, n); 720 SBLASTRECORDCHK(sb); 721} 722 723/* 724 * Append mbuf chain m to the last record in the 725 * socket buffer sb. The additional space associated 726 * the mbuf chain is recorded in sb. Empty mbufs are 727 * discarded and mbufs are compacted where possible. 728 */ 729void 730sbappend(sb, m) 731 struct sockbuf *sb; 732 struct mbuf *m; 733{ 734 735 SOCKBUF_LOCK(sb); 736 sbappend_locked(sb, m); 737 SOCKBUF_UNLOCK(sb); 738} 739 740/* 741 * This version of sbappend() should only be used when the caller 742 * absolutely knows that there will never be more than one record 743 * in the socket buffer, that is, a stream protocol (such as TCP). 744 */ 745void 746sbappendstream_locked(struct sockbuf *sb, struct mbuf *m) 747{ 748 SOCKBUF_LOCK_ASSERT(sb); 749 750 KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); 751 KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); 752 753 SBLASTMBUFCHK(sb); 754 755 sbcompress(sb, m, sb->sb_mbtail); 756 757 sb->sb_lastrecord = sb->sb_mb; 758 SBLASTRECORDCHK(sb); 759} 760 761/* 762 * This version of sbappend() should only be used when the caller 763 * absolutely knows that there will never be more than one record 764 * in the socket buffer, that is, a stream protocol (such as TCP). 765 */ 766void 767sbappendstream(struct sockbuf *sb, struct mbuf *m) 768{ 769 770 SOCKBUF_LOCK(sb); 771 sbappendstream_locked(sb, m); 772 SOCKBUF_UNLOCK(sb); 773} 774 775#ifdef SOCKBUF_DEBUG 776void 777sbcheck(sb) 778 struct sockbuf *sb; 779{ 780 struct mbuf *m; 781 struct mbuf *n = 0; 782 u_long len = 0, mbcnt = 0; 783 784 SOCKBUF_LOCK_ASSERT(sb); 785 786 for (m = sb->sb_mb; m; m = n) { 787 n = m->m_nextpkt; 788 for (; m; m = m->m_next) { 789 len += m->m_len; 790 mbcnt += MSIZE; 791 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ 792 mbcnt += m->m_ext.ext_size; 793 } 794 } 795 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 796 printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc, 797 mbcnt, sb->sb_mbcnt); 798 panic("sbcheck"); 799 } 800} 801#endif 802 803/* 804 * As above, except the mbuf chain 805 * begins a new record. 806 */ 807void 808sbappendrecord_locked(sb, m0) 809 register struct sockbuf *sb; 810 register struct mbuf *m0; 811{ 812 register struct mbuf *m; 813 814 SOCKBUF_LOCK_ASSERT(sb); 815 816 if (m0 == 0) 817 return; 818 m = sb->sb_mb; 819 if (m) 820 while (m->m_nextpkt) 821 m = m->m_nextpkt; 822 /* 823 * Put the first mbuf on the queue. 824 * Note this permits zero length records. 825 */ 826 sballoc(sb, m0); 827 SBLASTRECORDCHK(sb); 828 SBLINKRECORD(sb, m0); 829 if (m) 830 m->m_nextpkt = m0; 831 else 832 sb->sb_mb = m0; 833 m = m0->m_next; 834 m0->m_next = 0; 835 if (m && (m0->m_flags & M_EOR)) { 836 m0->m_flags &= ~M_EOR; 837 m->m_flags |= M_EOR; 838 } 839 sbcompress(sb, m, m0); 840} 841 842/* 843 * As above, except the mbuf chain 844 * begins a new record. 845 */ 846void 847sbappendrecord(sb, m0) 848 register struct sockbuf *sb; 849 register struct mbuf *m0; 850{ 851 852 SOCKBUF_LOCK(sb); 853 sbappendrecord_locked(sb, m0); 854 SOCKBUF_UNLOCK(sb); 855} 856 857/* 858 * As above except that OOB data 859 * is inserted at the beginning of the sockbuf, 860 * but after any other OOB data. 861 */ 862void 863sbinsertoob_locked(sb, m0) 864 register struct sockbuf *sb; 865 register struct mbuf *m0; 866{ 867 register struct mbuf *m; 868 register struct mbuf **mp; 869 870 SOCKBUF_LOCK_ASSERT(sb); 871 872 if (m0 == 0) 873 return; 874 for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) { 875 m = *mp; 876 again: 877 switch (m->m_type) { 878 879 case MT_OOBDATA: 880 continue; /* WANT next train */ 881 882 case MT_CONTROL: 883 m = m->m_next; 884 if (m) 885 goto again; /* inspect THIS train further */ 886 } 887 break; 888 } 889 /* 890 * Put the first mbuf on the queue. 891 * Note this permits zero length records. 892 */ 893 sballoc(sb, m0); 894 m0->m_nextpkt = *mp; 895 *mp = m0; 896 m = m0->m_next; 897 m0->m_next = 0; 898 if (m && (m0->m_flags & M_EOR)) { 899 m0->m_flags &= ~M_EOR; 900 m->m_flags |= M_EOR; 901 } 902 sbcompress(sb, m, m0); 903} 904 905/* 906 * As above except that OOB data 907 * is inserted at the beginning of the sockbuf, 908 * but after any other OOB data. 909 */ 910void 911sbinsertoob(sb, m0) 912 register struct sockbuf *sb; 913 register struct mbuf *m0; 914{ 915 916 SOCKBUF_LOCK(sb); 917 sbinsertoob_locked(sb, m0); 918 SOCKBUF_UNLOCK(sb); 919} 920 921/* 922 * Append address and data, and optionally, control (ancillary) data 923 * to the receive queue of a socket. If present, 924 * m0 must include a packet header with total length. 925 * Returns 0 if no space in sockbuf or insufficient mbufs. 926 */ 927int 928sbappendaddr_locked(sb, asa, m0, control) 929 struct sockbuf *sb; 930 const struct sockaddr *asa; 931 struct mbuf *m0, *control; 932{ 933 struct mbuf *m, *n, *nlast; 934 int space = asa->sa_len; 935 936 SOCKBUF_LOCK_ASSERT(sb); 937 938 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 939 panic("sbappendaddr_locked"); 940 if (m0) 941 space += m0->m_pkthdr.len; 942 space += m_length(control, &n); 943 944 if (space > sbspace(sb)) 945 return (0); 946#if MSIZE <= 256 947 if (asa->sa_len > MLEN) 948 return (0); 949#endif 950 MGET(m, M_DONTWAIT, MT_SONAME); 951 if (m == 0) 952 return (0); 953 m->m_len = asa->sa_len; 954 bcopy(asa, mtod(m, caddr_t), asa->sa_len); 955 if (n) 956 n->m_next = m0; /* concatenate data to control */ 957 else 958 control = m0; 959 m->m_next = control; 960 for (n = m; n->m_next != NULL; n = n->m_next) 961 sballoc(sb, n); 962 sballoc(sb, n); 963 nlast = n; 964 SBLINKRECORD(sb, m); 965 966 sb->sb_mbtail = nlast; 967 SBLASTMBUFCHK(sb); 968 969 SBLASTRECORDCHK(sb); 970 return (1); 971} 972 973/* 974 * Append address and data, and optionally, control (ancillary) data 975 * to the receive queue of a socket. If present, 976 * m0 must include a packet header with total length. 977 * Returns 0 if no space in sockbuf or insufficient mbufs. 978 */ 979int 980sbappendaddr(sb, asa, m0, control) 981 struct sockbuf *sb; 982 const struct sockaddr *asa; 983 struct mbuf *m0, *control; 984{ 985 int retval; 986 987 SOCKBUF_LOCK(sb); 988 retval = sbappendaddr_locked(sb, asa, m0, control); 989 SOCKBUF_UNLOCK(sb); 990 return (retval); 991} 992 993int 994sbappendcontrol_locked(sb, m0, control) 995 struct sockbuf *sb; 996 struct mbuf *control, *m0; 997{ 998 struct mbuf *m, *n, *mlast; 999 int space; 1000 1001 SOCKBUF_LOCK_ASSERT(sb); 1002 1003 if (control == 0) 1004 panic("sbappendcontrol_locked"); 1005 space = m_length(control, &n) + m_length(m0, NULL); 1006 1007 if (space > sbspace(sb)) 1008 return (0); 1009 n->m_next = m0; /* concatenate data to control */ 1010 1011 SBLASTRECORDCHK(sb); 1012 1013 for (m = control; m->m_next; m = m->m_next) 1014 sballoc(sb, m); 1015 sballoc(sb, m); 1016 mlast = m; 1017 SBLINKRECORD(sb, control); 1018 1019 sb->sb_mbtail = mlast; 1020 SBLASTMBUFCHK(sb); 1021 1022 SBLASTRECORDCHK(sb); 1023 return (1); 1024} 1025 1026int 1027sbappendcontrol(sb, m0, control) 1028 struct sockbuf *sb; 1029 struct mbuf *control, *m0; 1030{ 1031 int retval; 1032 1033 SOCKBUF_LOCK(sb); 1034 retval = sbappendcontrol_locked(sb, m0, control); 1035 SOCKBUF_UNLOCK(sb); 1036 return (retval); 1037} 1038 1039/* 1040 * Append the data in mbuf chain (m) into the socket buffer sb following mbuf 1041 * (n). If (n) is NULL, the buffer is presumed empty. 1042 * 1043 * When the data is compressed, mbufs in the chain may be handled in one of 1044 * three ways: 1045 * 1046 * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no 1047 * record boundary, and no change in data type). 1048 * 1049 * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into 1050 * an mbuf already in the socket buffer. This can occur if an 1051 * appropriate mbuf exists, there is room, and no merging of data types 1052 * will occur. 1053 * 1054 * (3) The mbuf may be appended to the end of the existing mbuf chain. 1055 * 1056 * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as 1057 * end-of-record. 1058 */ 1059void 1060sbcompress(sb, m, n) 1061 register struct sockbuf *sb; 1062 register struct mbuf *m, *n; 1063{ 1064 register int eor = 0; 1065 register struct mbuf *o; 1066 1067 SOCKBUF_LOCK_ASSERT(sb); 1068 1069 while (m) { 1070 eor |= m->m_flags & M_EOR; 1071 if (m->m_len == 0 && 1072 (eor == 0 || 1073 (((o = m->m_next) || (o = n)) && 1074 o->m_type == m->m_type))) { 1075 if (sb->sb_lastrecord == m) 1076 sb->sb_lastrecord = m->m_next; 1077 m = m_free(m); 1078 continue; 1079 } 1080 if (n && (n->m_flags & M_EOR) == 0 && 1081 M_WRITABLE(n) && 1082 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 1083 m->m_len <= M_TRAILINGSPACE(n) && 1084 n->m_type == m->m_type) { 1085 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 1086 (unsigned)m->m_len); 1087 n->m_len += m->m_len; 1088 sb->sb_cc += m->m_len; 1089 if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 1090 /* XXX: Probably don't need.*/ 1091 sb->sb_ctl += m->m_len; 1092 m = m_free(m); 1093 continue; 1094 } 1095 if (n) 1096 n->m_next = m; 1097 else 1098 sb->sb_mb = m; 1099 sb->sb_mbtail = m; 1100 sballoc(sb, m); 1101 n = m; 1102 m->m_flags &= ~M_EOR; 1103 m = m->m_next; 1104 n->m_next = 0; 1105 } 1106 if (eor) { 1107 KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); 1108 n->m_flags |= eor; 1109 } 1110 SBLASTMBUFCHK(sb); 1111} 1112 1113/* 1114 * Free all mbufs in a sockbuf. 1115 * Check that all resources are reclaimed. 1116 */ 1117void 1118sbflush_locked(sb) 1119 register struct sockbuf *sb; 1120{ 1121 1122 SOCKBUF_LOCK_ASSERT(sb); 1123 1124 if (sb->sb_flags & SB_LOCK) 1125 panic("sbflush_locked: locked"); 1126 while (sb->sb_mbcnt) { 1127 /* 1128 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty: 1129 * we would loop forever. Panic instead. 1130 */ 1131 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len)) 1132 break; 1133 sbdrop_locked(sb, (int)sb->sb_cc); 1134 } 1135 if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt) 1136 panic("sbflush_locked: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt); 1137} 1138 1139void 1140sbflush(sb) 1141 register struct sockbuf *sb; 1142{ 1143 1144 SOCKBUF_LOCK(sb); 1145 sbflush_locked(sb); 1146 SOCKBUF_UNLOCK(sb); 1147} 1148 1149/* 1150 * Drop data from (the front of) a sockbuf. 1151 */ 1152void 1153sbdrop_locked(sb, len) 1154 register struct sockbuf *sb; 1155 register int len; 1156{ 1157 register struct mbuf *m; 1158 struct mbuf *next; 1159 1160 SOCKBUF_LOCK_ASSERT(sb); 1161 1162 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 1163 while (len > 0) { 1164 if (m == 0) { 1165 if (next == 0) 1166 panic("sbdrop"); 1167 m = next; 1168 next = m->m_nextpkt; 1169 continue; 1170 } 1171 if (m->m_len > len) { 1172 m->m_len -= len; 1173 m->m_data += len; 1174 sb->sb_cc -= len; 1175 if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 1176 sb->sb_ctl -= len; 1177 break; 1178 } 1179 len -= m->m_len; 1180 sbfree(sb, m); 1181 m = m_free(m); 1182 } 1183 while (m && m->m_len == 0) { 1184 sbfree(sb, m); 1185 m = m_free(m); 1186 } 1187 if (m) { 1188 sb->sb_mb = m; 1189 m->m_nextpkt = next; 1190 } else 1191 sb->sb_mb = next; 1192 /* 1193 * First part is an inline SB_EMPTY_FIXUP(). Second part 1194 * makes sure sb_lastrecord is up-to-date if we dropped 1195 * part of the last record. 1196 */ 1197 m = sb->sb_mb; 1198 if (m == NULL) { 1199 sb->sb_mbtail = NULL; 1200 sb->sb_lastrecord = NULL; 1201 } else if (m->m_nextpkt == NULL) { 1202 sb->sb_lastrecord = m; 1203 } 1204} 1205 1206/* 1207 * Drop data from (the front of) a sockbuf. 1208 */ 1209void 1210sbdrop(sb, len) 1211 register struct sockbuf *sb; 1212 register int len; 1213{ 1214 1215 SOCKBUF_LOCK(sb); 1216 sbdrop_locked(sb, len); 1217 SOCKBUF_UNLOCK(sb); 1218} 1219 1220/* 1221 * Drop a record off the front of a sockbuf 1222 * and move the next record to the front. 1223 */ 1224void 1225sbdroprecord_locked(sb) 1226 register struct sockbuf *sb; 1227{ 1228 register struct mbuf *m; 1229 1230 SOCKBUF_LOCK_ASSERT(sb); 1231 1232 m = sb->sb_mb; 1233 if (m) { 1234 sb->sb_mb = m->m_nextpkt; 1235 do { 1236 sbfree(sb, m); 1237 m = m_free(m); 1238 } while (m); 1239 } 1240 SB_EMPTY_FIXUP(sb); 1241} 1242 1243/* 1244 * Drop a record off the front of a sockbuf 1245 * and move the next record to the front. 1246 */ 1247void 1248sbdroprecord(sb) 1249 register struct sockbuf *sb; 1250{ 1251 1252 SOCKBUF_LOCK(sb); 1253 sbdroprecord_locked(sb); 1254 SOCKBUF_UNLOCK(sb); 1255} 1256 1257/* 1258 * Create a "control" mbuf containing the specified data 1259 * with the specified type for presentation on a socket buffer. 1260 */ 1261struct mbuf * 1262sbcreatecontrol(p, size, type, level) 1263 caddr_t p; 1264 register int size; 1265 int type, level; 1266{ 1267 register struct cmsghdr *cp; 1268 struct mbuf *m; 1269 1270 if (CMSG_SPACE((u_int)size) > MCLBYTES) 1271 return ((struct mbuf *) NULL); 1272 if (CMSG_SPACE((u_int)size) > MLEN) 1273 m = m_getcl(M_DONTWAIT, MT_CONTROL, 0); 1274 else 1275 m = m_get(M_DONTWAIT, MT_CONTROL); 1276 if (m == NULL) 1277 return ((struct mbuf *) NULL); 1278 cp = mtod(m, struct cmsghdr *); 1279 m->m_len = 0; 1280 KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m), 1281 ("sbcreatecontrol: short mbuf")); 1282 if (p != NULL) 1283 (void)memcpy(CMSG_DATA(cp), p, size); 1284 m->m_len = CMSG_SPACE(size); 1285 cp->cmsg_len = CMSG_LEN(size); 1286 cp->cmsg_level = level; 1287 cp->cmsg_type = type; 1288 return (m); 1289} 1290 1291/* 1292 * Some routines that return EOPNOTSUPP for entry points that are not 1293 * supported by a protocol. Fill in as needed. 1294 */
|
1299} 1300 1301int 1302pru_accept_notsupp(struct socket *so, struct sockaddr **nam) 1303{ 1304 return EOPNOTSUPP; 1305} 1306 1307int 1308pru_attach_notsupp(struct socket *so, int proto, struct thread *td) 1309{ 1310 return EOPNOTSUPP; 1311} 1312 1313int 1314pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) 1315{ 1316 return EOPNOTSUPP; 1317} 1318 1319int 1320pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) 1321{ 1322 return EOPNOTSUPP; 1323} 1324 1325int 1326pru_connect2_notsupp(struct socket *so1, struct socket *so2) 1327{ 1328 return EOPNOTSUPP; 1329} 1330 1331int 1332pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, 1333 struct ifnet *ifp, struct thread *td) 1334{ 1335 return EOPNOTSUPP; 1336} 1337 1338int 1339pru_detach_notsupp(struct socket *so) 1340{ 1341 return EOPNOTSUPP; 1342} 1343 1344int 1345pru_disconnect_notsupp(struct socket *so) 1346{ 1347 return EOPNOTSUPP; 1348} 1349 1350int 1351pru_listen_notsupp(struct socket *so, int backlog, struct thread *td) 1352{ 1353 return EOPNOTSUPP; 1354} 1355 1356int 1357pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam) 1358{ 1359 return EOPNOTSUPP; 1360} 1361 1362int 1363pru_rcvd_notsupp(struct socket *so, int flags) 1364{ 1365 return EOPNOTSUPP; 1366} 1367 1368int 1369pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags) 1370{ 1371 return EOPNOTSUPP; 1372} 1373 1374int 1375pru_send_notsupp(struct socket *so, int flags, struct mbuf *m, 1376 struct sockaddr *addr, struct mbuf *control, struct thread *td) 1377{ 1378 return EOPNOTSUPP; 1379} 1380 1381/* 1382 * This isn't really a ``null'' operation, but it's the default one 1383 * and doesn't do anything destructive. 1384 */ 1385int 1386pru_sense_null(struct socket *so, struct stat *sb) 1387{ 1388 sb->st_blksize = so->so_snd.sb_hiwat; 1389 return 0; 1390} 1391 1392int 1393pru_shutdown_notsupp(struct socket *so) 1394{ 1395 return EOPNOTSUPP; 1396} 1397 1398int 1399pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam) 1400{ 1401 return EOPNOTSUPP; 1402} 1403 1404int 1405pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio, 1406 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 1407{ 1408 return EOPNOTSUPP; 1409} 1410 1411int 1412pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, 1413 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, 1414 int *flagsp) 1415{ 1416 return EOPNOTSUPP; 1417} 1418 1419int 1420pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred, 1421 struct thread *td) 1422{ 1423 return EOPNOTSUPP; 1424} 1425 1426/* 1427 * For protocol types that don't keep cached copies of labels in their 1428 * pcbs, provide a null sosetlabel that does a NOOP. 1429 */ 1430void 1431pru_sosetlabel_null(struct socket *so) 1432{ 1433 1434} 1435 1436/* 1437 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME. 1438 */ 1439struct sockaddr * 1440sodupsockaddr(const struct sockaddr *sa, int mflags) 1441{ 1442 struct sockaddr *sa2; 1443 1444 sa2 = malloc(sa->sa_len, M_SONAME, mflags); 1445 if (sa2) 1446 bcopy(sa, sa2, sa->sa_len); 1447 return sa2; 1448} 1449 1450/* 1451 * Create an external-format (``xsocket'') structure using the information 1452 * in the kernel-format socket structure pointed to by so. This is done 1453 * to reduce the spew of irrelevant information over this interface, 1454 * to isolate user code from changes in the kernel structure, and 1455 * potentially to provide information-hiding if we decide that 1456 * some of this information should be hidden from users. 1457 */ 1458void 1459sotoxsocket(struct socket *so, struct xsocket *xso) 1460{ 1461 xso->xso_len = sizeof *xso; 1462 xso->xso_so = so; 1463 xso->so_type = so->so_type; 1464 xso->so_options = so->so_options; 1465 xso->so_linger = so->so_linger; 1466 xso->so_state = so->so_state; 1467 xso->so_pcb = so->so_pcb; 1468 xso->xso_protocol = so->so_proto->pr_protocol; 1469 xso->xso_family = so->so_proto->pr_domain->dom_family; 1470 xso->so_qlen = so->so_qlen; 1471 xso->so_incqlen = so->so_incqlen; 1472 xso->so_qlimit = so->so_qlimit; 1473 xso->so_timeo = so->so_timeo; 1474 xso->so_error = so->so_error; 1475 xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0; 1476 xso->so_oobmark = so->so_oobmark; 1477 sbtoxsockbuf(&so->so_snd, &xso->so_snd); 1478 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); 1479 xso->so_uid = so->so_cred->cr_uid; 1480} 1481 1482/* 1483 * This does the same for sockbufs. Note that the xsockbuf structure, 1484 * since it is always embedded in a socket, does not include a self 1485 * pointer nor a length. We make this entry point public in case 1486 * some other mechanism needs it. 1487 */ 1488void 1489sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) 1490{ 1491 xsb->sb_cc = sb->sb_cc; 1492 xsb->sb_hiwat = sb->sb_hiwat; 1493 xsb->sb_mbcnt = sb->sb_mbcnt; 1494 xsb->sb_mbmax = sb->sb_mbmax; 1495 xsb->sb_lowat = sb->sb_lowat; 1496 xsb->sb_flags = sb->sb_flags; 1497 xsb->sb_timeo = sb->sb_timeo; 1498} 1499 1500/* 1501 * Here is the definition of some of the basic objects in the kern.ipc 1502 * branch of the MIB. 1503 */ 1504SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC"); 1505 1506/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ 1507static int dummy; 1508SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, ""); 1509SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW, 1510 &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size"); 1511SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RDTUN, 1512 &maxsockets, 0, "Maximum number of sockets avaliable"); 1513SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, 1514 &sb_efficiency, 0, ""); 1515 1516/* 1517 * Initialise maxsockets 1518 */ 1519static void init_maxsockets(void *ignored) 1520{ 1521 TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); 1522 maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters)); 1523} 1524SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
| 1299} 1300 1301int 1302pru_accept_notsupp(struct socket *so, struct sockaddr **nam) 1303{ 1304 return EOPNOTSUPP; 1305} 1306 1307int 1308pru_attach_notsupp(struct socket *so, int proto, struct thread *td) 1309{ 1310 return EOPNOTSUPP; 1311} 1312 1313int 1314pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) 1315{ 1316 return EOPNOTSUPP; 1317} 1318 1319int 1320pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) 1321{ 1322 return EOPNOTSUPP; 1323} 1324 1325int 1326pru_connect2_notsupp(struct socket *so1, struct socket *so2) 1327{ 1328 return EOPNOTSUPP; 1329} 1330 1331int 1332pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, 1333 struct ifnet *ifp, struct thread *td) 1334{ 1335 return EOPNOTSUPP; 1336} 1337 1338int 1339pru_detach_notsupp(struct socket *so) 1340{ 1341 return EOPNOTSUPP; 1342} 1343 1344int 1345pru_disconnect_notsupp(struct socket *so) 1346{ 1347 return EOPNOTSUPP; 1348} 1349 1350int 1351pru_listen_notsupp(struct socket *so, int backlog, struct thread *td) 1352{ 1353 return EOPNOTSUPP; 1354} 1355 1356int 1357pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam) 1358{ 1359 return EOPNOTSUPP; 1360} 1361 1362int 1363pru_rcvd_notsupp(struct socket *so, int flags) 1364{ 1365 return EOPNOTSUPP; 1366} 1367 1368int 1369pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags) 1370{ 1371 return EOPNOTSUPP; 1372} 1373 1374int 1375pru_send_notsupp(struct socket *so, int flags, struct mbuf *m, 1376 struct sockaddr *addr, struct mbuf *control, struct thread *td) 1377{ 1378 return EOPNOTSUPP; 1379} 1380 1381/* 1382 * This isn't really a ``null'' operation, but it's the default one 1383 * and doesn't do anything destructive. 1384 */ 1385int 1386pru_sense_null(struct socket *so, struct stat *sb) 1387{ 1388 sb->st_blksize = so->so_snd.sb_hiwat; 1389 return 0; 1390} 1391 1392int 1393pru_shutdown_notsupp(struct socket *so) 1394{ 1395 return EOPNOTSUPP; 1396} 1397 1398int 1399pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam) 1400{ 1401 return EOPNOTSUPP; 1402} 1403 1404int 1405pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio, 1406 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 1407{ 1408 return EOPNOTSUPP; 1409} 1410 1411int 1412pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, 1413 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, 1414 int *flagsp) 1415{ 1416 return EOPNOTSUPP; 1417} 1418 1419int 1420pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred, 1421 struct thread *td) 1422{ 1423 return EOPNOTSUPP; 1424} 1425 1426/* 1427 * For protocol types that don't keep cached copies of labels in their 1428 * pcbs, provide a null sosetlabel that does a NOOP. 1429 */ 1430void 1431pru_sosetlabel_null(struct socket *so) 1432{ 1433 1434} 1435 1436/* 1437 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME. 1438 */ 1439struct sockaddr * 1440sodupsockaddr(const struct sockaddr *sa, int mflags) 1441{ 1442 struct sockaddr *sa2; 1443 1444 sa2 = malloc(sa->sa_len, M_SONAME, mflags); 1445 if (sa2) 1446 bcopy(sa, sa2, sa->sa_len); 1447 return sa2; 1448} 1449 1450/* 1451 * Create an external-format (``xsocket'') structure using the information 1452 * in the kernel-format socket structure pointed to by so. This is done 1453 * to reduce the spew of irrelevant information over this interface, 1454 * to isolate user code from changes in the kernel structure, and 1455 * potentially to provide information-hiding if we decide that 1456 * some of this information should be hidden from users. 1457 */ 1458void 1459sotoxsocket(struct socket *so, struct xsocket *xso) 1460{ 1461 xso->xso_len = sizeof *xso; 1462 xso->xso_so = so; 1463 xso->so_type = so->so_type; 1464 xso->so_options = so->so_options; 1465 xso->so_linger = so->so_linger; 1466 xso->so_state = so->so_state; 1467 xso->so_pcb = so->so_pcb; 1468 xso->xso_protocol = so->so_proto->pr_protocol; 1469 xso->xso_family = so->so_proto->pr_domain->dom_family; 1470 xso->so_qlen = so->so_qlen; 1471 xso->so_incqlen = so->so_incqlen; 1472 xso->so_qlimit = so->so_qlimit; 1473 xso->so_timeo = so->so_timeo; 1474 xso->so_error = so->so_error; 1475 xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0; 1476 xso->so_oobmark = so->so_oobmark; 1477 sbtoxsockbuf(&so->so_snd, &xso->so_snd); 1478 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); 1479 xso->so_uid = so->so_cred->cr_uid; 1480} 1481 1482/* 1483 * This does the same for sockbufs. Note that the xsockbuf structure, 1484 * since it is always embedded in a socket, does not include a self 1485 * pointer nor a length. We make this entry point public in case 1486 * some other mechanism needs it. 1487 */ 1488void 1489sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) 1490{ 1491 xsb->sb_cc = sb->sb_cc; 1492 xsb->sb_hiwat = sb->sb_hiwat; 1493 xsb->sb_mbcnt = sb->sb_mbcnt; 1494 xsb->sb_mbmax = sb->sb_mbmax; 1495 xsb->sb_lowat = sb->sb_lowat; 1496 xsb->sb_flags = sb->sb_flags; 1497 xsb->sb_timeo = sb->sb_timeo; 1498} 1499 1500/* 1501 * Here is the definition of some of the basic objects in the kern.ipc 1502 * branch of the MIB. 1503 */ 1504SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC"); 1505 1506/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ 1507static int dummy; 1508SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, ""); 1509SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW, 1510 &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size"); 1511SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RDTUN, 1512 &maxsockets, 0, "Maximum number of sockets avaliable"); 1513SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, 1514 &sb_efficiency, 0, ""); 1515 1516/* 1517 * Initialise maxsockets 1518 */ 1519static void init_maxsockets(void *ignored) 1520{ 1521 TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); 1522 maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters)); 1523} 1524SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
|