uipc_domain.c revision 195699
1168404Spjd/*-
2168404Spjd * Copyright (c) 1982, 1986, 1993
3177674Sjb *	The Regents of the University of California.  All rights reserved.
4168404Spjd *
5168404Spjd * Redistribution and use in source and binary forms, with or without
6177674Sjb * modification, are permitted provided that the following conditions
7168404Spjd * are met:
8177674Sjb * 1. Redistributions of source code must retain the above copyright
9168404Spjd *    notice, this list of conditions and the following disclaimer.
10177674Sjb * 2. Redistributions in binary form must reproduce the above copyright
11168404Spjd *    notice, this list of conditions and the following disclaimer in the
12177674Sjb *    documentation and/or other materials provided with the distribution.
13168404Spjd * 4. Neither the name of the University nor the names of its contributors
14170431Spjd *    may be used to endorse or promote products derived from this software
15177674Sjb *    without specific prior written permission.
16191933Skmacy *
17170431Spjd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18177674Sjb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19170431Spjd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20170431Spjd * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21185029Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22185029Spjd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23168404Spjd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24168792Sru * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25168404Spjd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26168792Sru * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27168792Sru * SUCH DAMAGE.
28168792Sru *
29168792Sru *	@(#)uipc_domain.c	8.2 (Berkeley) 10/18/93
30185029Spjd */
31168404Spjd
32168792Sru#include <sys/cdefs.h>
33185029Spjd__FBSDID("$FreeBSD: head/sys/kern/uipc_domain.c 195699 2009-07-14 22:48:30Z rwatson $");
34185029Spjd
35168404Spjd#include <sys/param.h>
36204597Suqs#include <sys/socket.h>
37177674Sjb#include <sys/protosw.h>
38177674Sjb#include <sys/domain.h>
39177674Sjb#include <sys/eventhandler.h>
40177674Sjb#include <sys/mbuf.h>
41177674Sjb#include <sys/kernel.h>
42177674Sjb#include <sys/lock.h>
43177674Sjb#include <sys/mutex.h>
44177674Sjb#include <sys/socketvar.h>
45177674Sjb#include <sys/systm.h>
46168792Sru#include <sys/vimage.h>
47177674Sjb#include <vm/uma.h>
48185029Spjd
49185029Spjd/*
50185029Spjd * System initialization
51185029Spjd *
52185029Spjd * Note: domain initialization takes place on a per domain basis
53185029Spjd * as a result of traversing a SYSINIT linker set.  Most likely,
54185029Spjd * each domain would want to call DOMAIN_SET(9) itself, which
55168404Spjd * would cause the domain to be added just after domaininit()
56168818Spjd * is called during startup.
57168818Spjd *
58168404Spjd * See DOMAIN_SET(9) for details on its use.
59168404Spjd */
60168404Spjd
61168404Spjdstatic void domaininit(void *);
62189801SrdivackySYSINIT(domain, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, domaininit, NULL);
63189801Srdivacky
64168404Spjdstatic void domainfinalize(void *);
65SYSINIT(domainfin, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, domainfinalize,
66    NULL);
67
68static vnet_attach_fn net_init_domain;
69#ifdef VIMAGE
70static vnet_detach_fn net_detach_domain;
71#endif
72
73static struct callout pffast_callout;
74static struct callout pfslow_callout;
75
76static void	pffasttimo(void *);
77static void	pfslowtimo(void *);
78
79struct domain *domains;		/* registered protocol domains */
80int domain_init_status = 0;
81static struct mtx dom_mtx;		/* domain list lock */
82MTX_SYSINIT(domain, &dom_mtx, "domain list", MTX_DEF);
83
84/*
85 * Dummy protocol specific user requests function pointer array.
86 * All functions return EOPNOTSUPP.
87 */
88struct pr_usrreqs nousrreqs = {
89	.pru_accept =		pru_accept_notsupp,
90	.pru_attach =		pru_attach_notsupp,
91	.pru_bind =		pru_bind_notsupp,
92	.pru_connect =		pru_connect_notsupp,
93	.pru_connect2 =		pru_connect2_notsupp,
94	.pru_control =		pru_control_notsupp,
95	.pru_disconnect	=	pru_disconnect_notsupp,
96	.pru_listen =		pru_listen_notsupp,
97	.pru_peeraddr =		pru_peeraddr_notsupp,
98	.pru_rcvd =		pru_rcvd_notsupp,
99	.pru_rcvoob =		pru_rcvoob_notsupp,
100	.pru_send =		pru_send_notsupp,
101	.pru_sense =		pru_sense_null,
102	.pru_shutdown =		pru_shutdown_notsupp,
103	.pru_sockaddr =		pru_sockaddr_notsupp,
104	.pru_sosend =		pru_sosend_notsupp,
105	.pru_soreceive =	pru_soreceive_notsupp,
106	.pru_sopoll =		pru_sopoll_notsupp,
107};
108
109#ifdef VIMAGE
110vnet_modinfo_t vnet_domain_modinfo = {
111	.vmi_id		= VNET_MOD_DOMAIN,
112	.vmi_name	= "domain",
113	.vmi_iattach	= net_init_domain,
114	.vmi_idetach	= net_detach_domain,
115};
116#endif
117
118static void
119protosw_init(struct protosw *pr)
120{
121	struct pr_usrreqs *pu;
122
123	pu = pr->pr_usrreqs;
124	KASSERT(pu != NULL, ("protosw_init: %ssw[%d] has no usrreqs!",
125	    pr->pr_domain->dom_name,
126	    (int)(pr - pr->pr_domain->dom_protosw)));
127
128	/*
129	 * Protocol switch methods fall into three categories: mandatory,
130	 * mandatory but protosw_init() provides a default, and optional.
131	 *
132	 * For true protocols (i.e., pru_attach != NULL), KASSERT truly
133	 * mandatory methods with no defaults, and initialize defaults for
134	 * other mandatory methods if the protocol hasn't defined an
135	 * implementation (NULL function pointer).
136	 */
137#if 0
138	if (pu->pru_attach != NULL) {
139		KASSERT(pu->pru_abort != NULL,
140		    ("protosw_init: %ssw[%d] pru_abort NULL",
141		    pr->pr_domain->dom_name,
142		    (int)(pr - pr->pr_domain->dom_protosw)));
143		KASSERT(pu->pru_send != NULL,
144		    ("protosw_init: %ssw[%d] pru_send NULL",
145		    pr->pr_domain->dom_name,
146		    (int)(pr - pr->pr_domain->dom_protosw)));
147	}
148#endif
149
150#define DEFAULT(foo, bar)	if ((foo) == NULL)  (foo) = (bar)
151	DEFAULT(pu->pru_accept, pru_accept_notsupp);
152	DEFAULT(pu->pru_bind, pru_bind_notsupp);
153	DEFAULT(pu->pru_connect, pru_connect_notsupp);
154	DEFAULT(pu->pru_connect2, pru_connect2_notsupp);
155	DEFAULT(pu->pru_control, pru_control_notsupp);
156	DEFAULT(pu->pru_disconnect, pru_disconnect_notsupp);
157	DEFAULT(pu->pru_listen, pru_listen_notsupp);
158	DEFAULT(pu->pru_peeraddr, pru_peeraddr_notsupp);
159	DEFAULT(pu->pru_rcvd, pru_rcvd_notsupp);
160	DEFAULT(pu->pru_rcvoob, pru_rcvoob_notsupp);
161	DEFAULT(pu->pru_sense, pru_sense_null);
162	DEFAULT(pu->pru_shutdown, pru_shutdown_notsupp);
163	DEFAULT(pu->pru_sockaddr, pru_sockaddr_notsupp);
164	DEFAULT(pu->pru_sosend, sosend_generic);
165	DEFAULT(pu->pru_soreceive, soreceive_generic);
166	DEFAULT(pu->pru_sopoll, sopoll_generic);
167#undef DEFAULT
168	if (pr->pr_init)
169		(*pr->pr_init)();
170}
171
172/*
173 * Add a new protocol domain to the list of supported domains
174 * Note: you cant unload it again because a socket may be using it.
175 * XXX can't fail at this time.
176 */
177static int
178net_init_domain(const void *arg)
179{
180	const struct domain *dp = arg;
181	struct protosw *pr;
182
183	if (dp->dom_init)
184		(*dp->dom_init)();
185	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
186		protosw_init(pr);
187	/*
188	 * update global information about maximums
189	 */
190	max_hdr = max_linkhdr + max_protohdr;
191	max_datalen = MHLEN - max_hdr;
192	if (max_datalen < 1)
193		panic("%s: max_datalen < 1", __func__);
194	return (0);
195}
196
197#ifdef VIMAGE
198/*
199 * Detach / free a domain instance.
200 */
201static int
202net_detach_domain(const void *arg)
203{
204	const struct domain *dp = arg;
205	struct protosw *pr;
206
207	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
208		if (pr->pr_destroy)
209			(*pr->pr_destroy)();
210	if (dp->dom_destroy)
211		(*dp->dom_destroy)();
212
213	return (0);
214}
215#endif
216
217/*
218 * Add a new protocol domain to the list of supported domains
219 * Note: you cant unload it again because a socket may be using it.
220 * XXX can't fail at this time.
221 */
222void
223net_add_domain(void *data)
224{
225	struct domain *dp;
226
227	dp = (struct domain *)data;
228	mtx_lock(&dom_mtx);
229	dp->dom_next = domains;
230	domains = dp;
231
232	KASSERT(domain_init_status >= 1,
233	    ("attempt to net_add_domain(%s) before domaininit()",
234	    dp->dom_name));
235#ifndef INVARIANTS
236	if (domain_init_status < 1)
237		printf("WARNING: attempt to net_add_domain(%s) before "
238		    "domaininit()\n", dp->dom_name);
239#endif
240#ifdef notyet
241	KASSERT(domain_init_status < 2,
242	    ("attempt to net_add_domain(%s) after domainfinalize()",
243	    dp->dom_name));
244#else
245	if (domain_init_status >= 2)
246		printf("WARNING: attempt to net_add_domain(%s) after "
247		    "domainfinalize()\n", dp->dom_name);
248#endif
249	mtx_unlock(&dom_mtx);
250#ifdef VIMAGE
251	vnet_mod_register_multi(&vnet_domain_modinfo, dp, dp->dom_name);
252#else
253	net_init_domain(dp);
254#endif
255}
256
257static void
258socket_zone_change(void *tag)
259{
260
261	uma_zone_set_max(socket_zone, maxsockets);
262}
263
264/* ARGSUSED*/
265static void
266domaininit(void *dummy)
267{
268
269	/*
270	 * Before we do any setup, make sure to initialize the
271	 * zone allocator we get struct sockets from.
272	 */
273	socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
274	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
275	uma_zone_set_max(socket_zone, maxsockets);
276	EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
277		EVENTHANDLER_PRI_FIRST);
278
279	if (max_linkhdr < 16)		/* XXX */
280		max_linkhdr = 16;
281
282	callout_init(&pffast_callout, CALLOUT_MPSAFE);
283	callout_init(&pfslow_callout, CALLOUT_MPSAFE);
284
285	mtx_lock(&dom_mtx);
286	KASSERT(domain_init_status == 0, ("domaininit called too late!"));
287	domain_init_status = 1;
288	mtx_unlock(&dom_mtx);
289}
290
291/* ARGSUSED*/
292static void
293domainfinalize(void *dummy)
294{
295
296	mtx_lock(&dom_mtx);
297	KASSERT(domain_init_status == 1, ("domainfinalize called too late!"));
298	domain_init_status = 2;
299	mtx_unlock(&dom_mtx);
300
301	callout_reset(&pffast_callout, 1, pffasttimo, NULL);
302	callout_reset(&pfslow_callout, 1, pfslowtimo, NULL);
303}
304
305struct protosw *
306pffindtype(int family, int type)
307{
308	struct domain *dp;
309	struct protosw *pr;
310
311	for (dp = domains; dp; dp = dp->dom_next)
312		if (dp->dom_family == family)
313			goto found;
314	return (0);
315found:
316	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
317		if (pr->pr_type && pr->pr_type == type)
318			return (pr);
319	return (0);
320}
321
322struct protosw *
323pffindproto(int family, int protocol, int type)
324{
325	struct domain *dp;
326	struct protosw *pr;
327	struct protosw *maybe = 0;
328
329	if (family == 0)
330		return (0);
331	for (dp = domains; dp; dp = dp->dom_next)
332		if (dp->dom_family == family)
333			goto found;
334	return (0);
335found:
336	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
337		if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
338			return (pr);
339
340		if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
341		    pr->pr_protocol == 0 && maybe == (struct protosw *)0)
342			maybe = pr;
343	}
344	return (maybe);
345}
346
347/*
348 * The caller must make sure that the new protocol is fully set up and ready to
349 * accept requests before it is registered.
350 */
351int
352pf_proto_register(int family, struct protosw *npr)
353{
354	struct domain *dp;
355	struct protosw *pr, *fpr;
356
357	/* Sanity checks. */
358	if (family == 0)
359		return (EPFNOSUPPORT);
360	if (npr->pr_type == 0)
361		return (EPROTOTYPE);
362	if (npr->pr_protocol == 0)
363		return (EPROTONOSUPPORT);
364	if (npr->pr_usrreqs == NULL)
365		return (ENXIO);
366
367	/* Try to find the specified domain based on the family. */
368	for (dp = domains; dp; dp = dp->dom_next)
369		if (dp->dom_family == family)
370			goto found;
371	return (EPFNOSUPPORT);
372
373found:
374	/* Initialize backpointer to struct domain. */
375	npr->pr_domain = dp;
376	fpr = NULL;
377
378	/*
379	 * Protect us against races when two protocol registrations for
380	 * the same protocol happen at the same time.
381	 */
382	mtx_lock(&dom_mtx);
383
384	/* The new protocol must not yet exist. */
385	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
386		if ((pr->pr_type == npr->pr_type) &&
387		    (pr->pr_protocol == npr->pr_protocol)) {
388			mtx_unlock(&dom_mtx);
389			return (EEXIST);	/* XXX: Check only protocol? */
390		}
391		/* While here, remember the first free spacer. */
392		if ((fpr == NULL) && (pr->pr_protocol == PROTO_SPACER))
393			fpr = pr;
394	}
395
396	/* If no free spacer is found we can't add the new protocol. */
397	if (fpr == NULL) {
398		mtx_unlock(&dom_mtx);
399		return (ENOMEM);
400	}
401
402	/* Copy the new struct protosw over the spacer. */
403	bcopy(npr, fpr, sizeof(*fpr));
404
405	/* Job is done, no more protection required. */
406	mtx_unlock(&dom_mtx);
407
408	/* Initialize and activate the protocol. */
409	protosw_init(fpr);
410
411	return (0);
412}
413
414/*
415 * The caller must make sure the protocol and its functions correctly shut down
416 * all sockets and release all locks and memory references.
417 */
418int
419pf_proto_unregister(int family, int protocol, int type)
420{
421	struct domain *dp;
422	struct protosw *pr, *dpr;
423
424	/* Sanity checks. */
425	if (family == 0)
426		return (EPFNOSUPPORT);
427	if (protocol == 0)
428		return (EPROTONOSUPPORT);
429	if (type == 0)
430		return (EPROTOTYPE);
431
432	/* Try to find the specified domain based on the family type. */
433	for (dp = domains; dp; dp = dp->dom_next)
434		if (dp->dom_family == family)
435			goto found;
436	return (EPFNOSUPPORT);
437
438found:
439	dpr = NULL;
440
441	/* Lock out everyone else while we are manipulating the protosw. */
442	mtx_lock(&dom_mtx);
443
444	/* The protocol must exist and only once. */
445	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
446		if ((pr->pr_type == type) && (pr->pr_protocol == protocol)) {
447			if (dpr != NULL) {
448				mtx_unlock(&dom_mtx);
449				return (EMLINK);   /* Should not happen! */
450			} else
451				dpr = pr;
452		}
453	}
454
455	/* Protocol does not exist. */
456	if (dpr == NULL) {
457		mtx_unlock(&dom_mtx);
458		return (EPROTONOSUPPORT);
459	}
460
461	/* De-orbit the protocol and make the slot available again. */
462	dpr->pr_type = 0;
463	dpr->pr_domain = dp;
464	dpr->pr_protocol = PROTO_SPACER;
465	dpr->pr_flags = 0;
466	dpr->pr_input = NULL;
467	dpr->pr_output = NULL;
468	dpr->pr_ctlinput = NULL;
469	dpr->pr_ctloutput = NULL;
470	dpr->pr_init = NULL;
471	dpr->pr_fasttimo = NULL;
472	dpr->pr_slowtimo = NULL;
473	dpr->pr_drain = NULL;
474	dpr->pr_usrreqs = &nousrreqs;
475
476	/* Job is done, not more protection required. */
477	mtx_unlock(&dom_mtx);
478
479	return (0);
480}
481
482void
483pfctlinput(int cmd, struct sockaddr *sa)
484{
485	struct domain *dp;
486	struct protosw *pr;
487
488	for (dp = domains; dp; dp = dp->dom_next)
489		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
490			if (pr->pr_ctlinput)
491				(*pr->pr_ctlinput)(cmd, sa, (void *)0);
492}
493
494void
495pfctlinput2(int cmd, struct sockaddr *sa, void *ctlparam)
496{
497	struct domain *dp;
498	struct protosw *pr;
499
500	if (!sa)
501		return;
502	for (dp = domains; dp; dp = dp->dom_next) {
503		/*
504		 * the check must be made by xx_ctlinput() anyways, to
505		 * make sure we use data item pointed to by ctlparam in
506		 * correct way.  the following check is made just for safety.
507		 */
508		if (dp->dom_family != sa->sa_family)
509			continue;
510
511		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
512			if (pr->pr_ctlinput)
513				(*pr->pr_ctlinput)(cmd, sa, ctlparam);
514	}
515}
516
517static void
518pfslowtimo(void *arg)
519{
520	struct domain *dp;
521	struct protosw *pr;
522
523	for (dp = domains; dp; dp = dp->dom_next)
524		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
525			if (pr->pr_slowtimo)
526				(*pr->pr_slowtimo)();
527	callout_reset(&pfslow_callout, hz/2, pfslowtimo, NULL);
528}
529
530static void
531pffasttimo(void *arg)
532{
533	struct domain *dp;
534	struct protosw *pr;
535
536	for (dp = domains; dp; dp = dp->dom_next)
537		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
538			if (pr->pr_fasttimo)
539				(*pr->pr_fasttimo)();
540	callout_reset(&pffast_callout, hz/5, pffasttimo, NULL);
541}
542