uipc_domain.c revision 195699
1168404Spjd/*- 2168404Spjd * Copyright (c) 1982, 1986, 1993 3177674Sjb * The Regents of the University of California. All rights reserved. 4168404Spjd * 5168404Spjd * Redistribution and use in source and binary forms, with or without 6177674Sjb * modification, are permitted provided that the following conditions 7168404Spjd * are met: 8177674Sjb * 1. Redistributions of source code must retain the above copyright 9168404Spjd * notice, this list of conditions and the following disclaimer. 10177674Sjb * 2. Redistributions in binary form must reproduce the above copyright 11168404Spjd * notice, this list of conditions and the following disclaimer in the 12177674Sjb * documentation and/or other materials provided with the distribution. 13168404Spjd * 4. Neither the name of the University nor the names of its contributors 14170431Spjd * may be used to endorse or promote products derived from this software 15177674Sjb * without specific prior written permission. 16191933Skmacy * 17170431Spjd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18177674Sjb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19170431Spjd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20170431Spjd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21185029Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22185029Spjd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23168404Spjd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24168792Sru * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25168404Spjd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26168792Sru * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27168792Sru * SUCH DAMAGE. 28168792Sru * 29168792Sru * @(#)uipc_domain.c 8.2 (Berkeley) 10/18/93 30185029Spjd */ 31168404Spjd 32168792Sru#include <sys/cdefs.h> 33185029Spjd__FBSDID("$FreeBSD: head/sys/kern/uipc_domain.c 195699 2009-07-14 22:48:30Z rwatson $"); 34185029Spjd 35168404Spjd#include <sys/param.h> 36204597Suqs#include <sys/socket.h> 37177674Sjb#include <sys/protosw.h> 38177674Sjb#include <sys/domain.h> 39177674Sjb#include <sys/eventhandler.h> 40177674Sjb#include <sys/mbuf.h> 41177674Sjb#include <sys/kernel.h> 42177674Sjb#include <sys/lock.h> 43177674Sjb#include <sys/mutex.h> 44177674Sjb#include <sys/socketvar.h> 45177674Sjb#include <sys/systm.h> 46168792Sru#include <sys/vimage.h> 47177674Sjb#include <vm/uma.h> 48185029Spjd 49185029Spjd/* 50185029Spjd * System initialization 51185029Spjd * 52185029Spjd * Note: domain initialization takes place on a per domain basis 53185029Spjd * as a result of traversing a SYSINIT linker set. Most likely, 54185029Spjd * each domain would want to call DOMAIN_SET(9) itself, which 55168404Spjd * would cause the domain to be added just after domaininit() 56168818Spjd * is called during startup. 57168818Spjd * 58168404Spjd * See DOMAIN_SET(9) for details on its use. 59168404Spjd */ 60168404Spjd 61168404Spjdstatic void domaininit(void *); 62189801SrdivackySYSINIT(domain, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, domaininit, NULL); 63189801Srdivacky 64168404Spjdstatic void domainfinalize(void *); 65SYSINIT(domainfin, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, domainfinalize, 66 NULL); 67 68static vnet_attach_fn net_init_domain; 69#ifdef VIMAGE 70static vnet_detach_fn net_detach_domain; 71#endif 72 73static struct callout pffast_callout; 74static struct callout pfslow_callout; 75 76static void pffasttimo(void *); 77static void pfslowtimo(void *); 78 79struct domain *domains; /* registered protocol domains */ 80int domain_init_status = 0; 81static struct mtx dom_mtx; /* domain list lock */ 82MTX_SYSINIT(domain, &dom_mtx, "domain list", MTX_DEF); 83 84/* 85 * Dummy protocol specific user requests function pointer array. 86 * All functions return EOPNOTSUPP. 87 */ 88struct pr_usrreqs nousrreqs = { 89 .pru_accept = pru_accept_notsupp, 90 .pru_attach = pru_attach_notsupp, 91 .pru_bind = pru_bind_notsupp, 92 .pru_connect = pru_connect_notsupp, 93 .pru_connect2 = pru_connect2_notsupp, 94 .pru_control = pru_control_notsupp, 95 .pru_disconnect = pru_disconnect_notsupp, 96 .pru_listen = pru_listen_notsupp, 97 .pru_peeraddr = pru_peeraddr_notsupp, 98 .pru_rcvd = pru_rcvd_notsupp, 99 .pru_rcvoob = pru_rcvoob_notsupp, 100 .pru_send = pru_send_notsupp, 101 .pru_sense = pru_sense_null, 102 .pru_shutdown = pru_shutdown_notsupp, 103 .pru_sockaddr = pru_sockaddr_notsupp, 104 .pru_sosend = pru_sosend_notsupp, 105 .pru_soreceive = pru_soreceive_notsupp, 106 .pru_sopoll = pru_sopoll_notsupp, 107}; 108 109#ifdef VIMAGE 110vnet_modinfo_t vnet_domain_modinfo = { 111 .vmi_id = VNET_MOD_DOMAIN, 112 .vmi_name = "domain", 113 .vmi_iattach = net_init_domain, 114 .vmi_idetach = net_detach_domain, 115}; 116#endif 117 118static void 119protosw_init(struct protosw *pr) 120{ 121 struct pr_usrreqs *pu; 122 123 pu = pr->pr_usrreqs; 124 KASSERT(pu != NULL, ("protosw_init: %ssw[%d] has no usrreqs!", 125 pr->pr_domain->dom_name, 126 (int)(pr - pr->pr_domain->dom_protosw))); 127 128 /* 129 * Protocol switch methods fall into three categories: mandatory, 130 * mandatory but protosw_init() provides a default, and optional. 131 * 132 * For true protocols (i.e., pru_attach != NULL), KASSERT truly 133 * mandatory methods with no defaults, and initialize defaults for 134 * other mandatory methods if the protocol hasn't defined an 135 * implementation (NULL function pointer). 136 */ 137#if 0 138 if (pu->pru_attach != NULL) { 139 KASSERT(pu->pru_abort != NULL, 140 ("protosw_init: %ssw[%d] pru_abort NULL", 141 pr->pr_domain->dom_name, 142 (int)(pr - pr->pr_domain->dom_protosw))); 143 KASSERT(pu->pru_send != NULL, 144 ("protosw_init: %ssw[%d] pru_send NULL", 145 pr->pr_domain->dom_name, 146 (int)(pr - pr->pr_domain->dom_protosw))); 147 } 148#endif 149 150#define DEFAULT(foo, bar) if ((foo) == NULL) (foo) = (bar) 151 DEFAULT(pu->pru_accept, pru_accept_notsupp); 152 DEFAULT(pu->pru_bind, pru_bind_notsupp); 153 DEFAULT(pu->pru_connect, pru_connect_notsupp); 154 DEFAULT(pu->pru_connect2, pru_connect2_notsupp); 155 DEFAULT(pu->pru_control, pru_control_notsupp); 156 DEFAULT(pu->pru_disconnect, pru_disconnect_notsupp); 157 DEFAULT(pu->pru_listen, pru_listen_notsupp); 158 DEFAULT(pu->pru_peeraddr, pru_peeraddr_notsupp); 159 DEFAULT(pu->pru_rcvd, pru_rcvd_notsupp); 160 DEFAULT(pu->pru_rcvoob, pru_rcvoob_notsupp); 161 DEFAULT(pu->pru_sense, pru_sense_null); 162 DEFAULT(pu->pru_shutdown, pru_shutdown_notsupp); 163 DEFAULT(pu->pru_sockaddr, pru_sockaddr_notsupp); 164 DEFAULT(pu->pru_sosend, sosend_generic); 165 DEFAULT(pu->pru_soreceive, soreceive_generic); 166 DEFAULT(pu->pru_sopoll, sopoll_generic); 167#undef DEFAULT 168 if (pr->pr_init) 169 (*pr->pr_init)(); 170} 171 172/* 173 * Add a new protocol domain to the list of supported domains 174 * Note: you cant unload it again because a socket may be using it. 175 * XXX can't fail at this time. 176 */ 177static int 178net_init_domain(const void *arg) 179{ 180 const struct domain *dp = arg; 181 struct protosw *pr; 182 183 if (dp->dom_init) 184 (*dp->dom_init)(); 185 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 186 protosw_init(pr); 187 /* 188 * update global information about maximums 189 */ 190 max_hdr = max_linkhdr + max_protohdr; 191 max_datalen = MHLEN - max_hdr; 192 if (max_datalen < 1) 193 panic("%s: max_datalen < 1", __func__); 194 return (0); 195} 196 197#ifdef VIMAGE 198/* 199 * Detach / free a domain instance. 200 */ 201static int 202net_detach_domain(const void *arg) 203{ 204 const struct domain *dp = arg; 205 struct protosw *pr; 206 207 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 208 if (pr->pr_destroy) 209 (*pr->pr_destroy)(); 210 if (dp->dom_destroy) 211 (*dp->dom_destroy)(); 212 213 return (0); 214} 215#endif 216 217/* 218 * Add a new protocol domain to the list of supported domains 219 * Note: you cant unload it again because a socket may be using it. 220 * XXX can't fail at this time. 221 */ 222void 223net_add_domain(void *data) 224{ 225 struct domain *dp; 226 227 dp = (struct domain *)data; 228 mtx_lock(&dom_mtx); 229 dp->dom_next = domains; 230 domains = dp; 231 232 KASSERT(domain_init_status >= 1, 233 ("attempt to net_add_domain(%s) before domaininit()", 234 dp->dom_name)); 235#ifndef INVARIANTS 236 if (domain_init_status < 1) 237 printf("WARNING: attempt to net_add_domain(%s) before " 238 "domaininit()\n", dp->dom_name); 239#endif 240#ifdef notyet 241 KASSERT(domain_init_status < 2, 242 ("attempt to net_add_domain(%s) after domainfinalize()", 243 dp->dom_name)); 244#else 245 if (domain_init_status >= 2) 246 printf("WARNING: attempt to net_add_domain(%s) after " 247 "domainfinalize()\n", dp->dom_name); 248#endif 249 mtx_unlock(&dom_mtx); 250#ifdef VIMAGE 251 vnet_mod_register_multi(&vnet_domain_modinfo, dp, dp->dom_name); 252#else 253 net_init_domain(dp); 254#endif 255} 256 257static void 258socket_zone_change(void *tag) 259{ 260 261 uma_zone_set_max(socket_zone, maxsockets); 262} 263 264/* ARGSUSED*/ 265static void 266domaininit(void *dummy) 267{ 268 269 /* 270 * Before we do any setup, make sure to initialize the 271 * zone allocator we get struct sockets from. 272 */ 273 socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL, 274 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 275 uma_zone_set_max(socket_zone, maxsockets); 276 EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL, 277 EVENTHANDLER_PRI_FIRST); 278 279 if (max_linkhdr < 16) /* XXX */ 280 max_linkhdr = 16; 281 282 callout_init(&pffast_callout, CALLOUT_MPSAFE); 283 callout_init(&pfslow_callout, CALLOUT_MPSAFE); 284 285 mtx_lock(&dom_mtx); 286 KASSERT(domain_init_status == 0, ("domaininit called too late!")); 287 domain_init_status = 1; 288 mtx_unlock(&dom_mtx); 289} 290 291/* ARGSUSED*/ 292static void 293domainfinalize(void *dummy) 294{ 295 296 mtx_lock(&dom_mtx); 297 KASSERT(domain_init_status == 1, ("domainfinalize called too late!")); 298 domain_init_status = 2; 299 mtx_unlock(&dom_mtx); 300 301 callout_reset(&pffast_callout, 1, pffasttimo, NULL); 302 callout_reset(&pfslow_callout, 1, pfslowtimo, NULL); 303} 304 305struct protosw * 306pffindtype(int family, int type) 307{ 308 struct domain *dp; 309 struct protosw *pr; 310 311 for (dp = domains; dp; dp = dp->dom_next) 312 if (dp->dom_family == family) 313 goto found; 314 return (0); 315found: 316 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 317 if (pr->pr_type && pr->pr_type == type) 318 return (pr); 319 return (0); 320} 321 322struct protosw * 323pffindproto(int family, int protocol, int type) 324{ 325 struct domain *dp; 326 struct protosw *pr; 327 struct protosw *maybe = 0; 328 329 if (family == 0) 330 return (0); 331 for (dp = domains; dp; dp = dp->dom_next) 332 if (dp->dom_family == family) 333 goto found; 334 return (0); 335found: 336 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { 337 if ((pr->pr_protocol == protocol) && (pr->pr_type == type)) 338 return (pr); 339 340 if (type == SOCK_RAW && pr->pr_type == SOCK_RAW && 341 pr->pr_protocol == 0 && maybe == (struct protosw *)0) 342 maybe = pr; 343 } 344 return (maybe); 345} 346 347/* 348 * The caller must make sure that the new protocol is fully set up and ready to 349 * accept requests before it is registered. 350 */ 351int 352pf_proto_register(int family, struct protosw *npr) 353{ 354 struct domain *dp; 355 struct protosw *pr, *fpr; 356 357 /* Sanity checks. */ 358 if (family == 0) 359 return (EPFNOSUPPORT); 360 if (npr->pr_type == 0) 361 return (EPROTOTYPE); 362 if (npr->pr_protocol == 0) 363 return (EPROTONOSUPPORT); 364 if (npr->pr_usrreqs == NULL) 365 return (ENXIO); 366 367 /* Try to find the specified domain based on the family. */ 368 for (dp = domains; dp; dp = dp->dom_next) 369 if (dp->dom_family == family) 370 goto found; 371 return (EPFNOSUPPORT); 372 373found: 374 /* Initialize backpointer to struct domain. */ 375 npr->pr_domain = dp; 376 fpr = NULL; 377 378 /* 379 * Protect us against races when two protocol registrations for 380 * the same protocol happen at the same time. 381 */ 382 mtx_lock(&dom_mtx); 383 384 /* The new protocol must not yet exist. */ 385 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { 386 if ((pr->pr_type == npr->pr_type) && 387 (pr->pr_protocol == npr->pr_protocol)) { 388 mtx_unlock(&dom_mtx); 389 return (EEXIST); /* XXX: Check only protocol? */ 390 } 391 /* While here, remember the first free spacer. */ 392 if ((fpr == NULL) && (pr->pr_protocol == PROTO_SPACER)) 393 fpr = pr; 394 } 395 396 /* If no free spacer is found we can't add the new protocol. */ 397 if (fpr == NULL) { 398 mtx_unlock(&dom_mtx); 399 return (ENOMEM); 400 } 401 402 /* Copy the new struct protosw over the spacer. */ 403 bcopy(npr, fpr, sizeof(*fpr)); 404 405 /* Job is done, no more protection required. */ 406 mtx_unlock(&dom_mtx); 407 408 /* Initialize and activate the protocol. */ 409 protosw_init(fpr); 410 411 return (0); 412} 413 414/* 415 * The caller must make sure the protocol and its functions correctly shut down 416 * all sockets and release all locks and memory references. 417 */ 418int 419pf_proto_unregister(int family, int protocol, int type) 420{ 421 struct domain *dp; 422 struct protosw *pr, *dpr; 423 424 /* Sanity checks. */ 425 if (family == 0) 426 return (EPFNOSUPPORT); 427 if (protocol == 0) 428 return (EPROTONOSUPPORT); 429 if (type == 0) 430 return (EPROTOTYPE); 431 432 /* Try to find the specified domain based on the family type. */ 433 for (dp = domains; dp; dp = dp->dom_next) 434 if (dp->dom_family == family) 435 goto found; 436 return (EPFNOSUPPORT); 437 438found: 439 dpr = NULL; 440 441 /* Lock out everyone else while we are manipulating the protosw. */ 442 mtx_lock(&dom_mtx); 443 444 /* The protocol must exist and only once. */ 445 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { 446 if ((pr->pr_type == type) && (pr->pr_protocol == protocol)) { 447 if (dpr != NULL) { 448 mtx_unlock(&dom_mtx); 449 return (EMLINK); /* Should not happen! */ 450 } else 451 dpr = pr; 452 } 453 } 454 455 /* Protocol does not exist. */ 456 if (dpr == NULL) { 457 mtx_unlock(&dom_mtx); 458 return (EPROTONOSUPPORT); 459 } 460 461 /* De-orbit the protocol and make the slot available again. */ 462 dpr->pr_type = 0; 463 dpr->pr_domain = dp; 464 dpr->pr_protocol = PROTO_SPACER; 465 dpr->pr_flags = 0; 466 dpr->pr_input = NULL; 467 dpr->pr_output = NULL; 468 dpr->pr_ctlinput = NULL; 469 dpr->pr_ctloutput = NULL; 470 dpr->pr_init = NULL; 471 dpr->pr_fasttimo = NULL; 472 dpr->pr_slowtimo = NULL; 473 dpr->pr_drain = NULL; 474 dpr->pr_usrreqs = &nousrreqs; 475 476 /* Job is done, not more protection required. */ 477 mtx_unlock(&dom_mtx); 478 479 return (0); 480} 481 482void 483pfctlinput(int cmd, struct sockaddr *sa) 484{ 485 struct domain *dp; 486 struct protosw *pr; 487 488 for (dp = domains; dp; dp = dp->dom_next) 489 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 490 if (pr->pr_ctlinput) 491 (*pr->pr_ctlinput)(cmd, sa, (void *)0); 492} 493 494void 495pfctlinput2(int cmd, struct sockaddr *sa, void *ctlparam) 496{ 497 struct domain *dp; 498 struct protosw *pr; 499 500 if (!sa) 501 return; 502 for (dp = domains; dp; dp = dp->dom_next) { 503 /* 504 * the check must be made by xx_ctlinput() anyways, to 505 * make sure we use data item pointed to by ctlparam in 506 * correct way. the following check is made just for safety. 507 */ 508 if (dp->dom_family != sa->sa_family) 509 continue; 510 511 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 512 if (pr->pr_ctlinput) 513 (*pr->pr_ctlinput)(cmd, sa, ctlparam); 514 } 515} 516 517static void 518pfslowtimo(void *arg) 519{ 520 struct domain *dp; 521 struct protosw *pr; 522 523 for (dp = domains; dp; dp = dp->dom_next) 524 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 525 if (pr->pr_slowtimo) 526 (*pr->pr_slowtimo)(); 527 callout_reset(&pfslow_callout, hz/2, pfslowtimo, NULL); 528} 529 530static void 531pffasttimo(void *arg) 532{ 533 struct domain *dp; 534 struct protosw *pr; 535 536 for (dp = domains; dp; dp = dp->dom_next) 537 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 538 if (pr->pr_fasttimo) 539 (*pr->pr_fasttimo)(); 540 callout_reset(&pffast_callout, hz/5, pffasttimo, NULL); 541} 542