uipc_domain.c revision 232051
1/*-
2 * Copyright (c) 1982, 1986, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)uipc_domain.c	8.2 (Berkeley) 10/18/93
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/uipc_domain.c 232051 2012-02-23 17:47:19Z brueffer $");
34
35#include <sys/param.h>
36#include <sys/socket.h>
37#include <sys/protosw.h>
38#include <sys/domain.h>
39#include <sys/eventhandler.h>
40#include <sys/mbuf.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/mutex.h>
44#include <sys/socketvar.h>
45#include <sys/systm.h>
46
47#include <net/vnet.h>
48
49#include <vm/uma.h>
50
51/*
52 * System initialization
53 *
54 * Note: domain initialization takes place on a per domain basis
55 * as a result of traversing a SYSINIT linker set.  Most likely,
56 * each domain would want to call DOMAIN_SET(9) itself, which
57 * would cause the domain to be added just after domaininit()
58 * is called during startup.
59 *
60 * See DOMAIN_SET(9) for details on its use.
61 */
62
63static void domaininit(void *);
64SYSINIT(domain, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, domaininit, NULL);
65
66static void domainfinalize(void *);
67SYSINIT(domainfin, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, domainfinalize,
68    NULL);
69
70static struct callout pffast_callout;
71static struct callout pfslow_callout;
72
73static void	pffasttimo(void *);
74static void	pfslowtimo(void *);
75
76struct domain *domains;		/* registered protocol domains */
77int domain_init_status = 0;
78static struct mtx dom_mtx;		/* domain list lock */
79MTX_SYSINIT(domain, &dom_mtx, "domain list", MTX_DEF);
80
81/*
82 * Dummy protocol specific user requests function pointer array.
83 * All functions return EOPNOTSUPP.
84 */
85struct pr_usrreqs nousrreqs = {
86	.pru_accept =		pru_accept_notsupp,
87	.pru_attach =		pru_attach_notsupp,
88	.pru_bind =		pru_bind_notsupp,
89	.pru_connect =		pru_connect_notsupp,
90	.pru_connect2 =		pru_connect2_notsupp,
91	.pru_control =		pru_control_notsupp,
92	.pru_disconnect	=	pru_disconnect_notsupp,
93	.pru_listen =		pru_listen_notsupp,
94	.pru_peeraddr =		pru_peeraddr_notsupp,
95	.pru_rcvd =		pru_rcvd_notsupp,
96	.pru_rcvoob =		pru_rcvoob_notsupp,
97	.pru_send =		pru_send_notsupp,
98	.pru_sense =		pru_sense_null,
99	.pru_shutdown =		pru_shutdown_notsupp,
100	.pru_sockaddr =		pru_sockaddr_notsupp,
101	.pru_sosend =		pru_sosend_notsupp,
102	.pru_soreceive =	pru_soreceive_notsupp,
103	.pru_sopoll =		pru_sopoll_notsupp,
104};
105
106static void
107protosw_init(struct protosw *pr)
108{
109	struct pr_usrreqs *pu;
110
111	pu = pr->pr_usrreqs;
112	KASSERT(pu != NULL, ("protosw_init: %ssw[%d] has no usrreqs!",
113	    pr->pr_domain->dom_name,
114	    (int)(pr - pr->pr_domain->dom_protosw)));
115
116	/*
117	 * Protocol switch methods fall into three categories: mandatory,
118	 * mandatory but protosw_init() provides a default, and optional.
119	 *
120	 * For true protocols (i.e., pru_attach != NULL), KASSERT truly
121	 * mandatory methods with no defaults, and initialize defaults for
122	 * other mandatory methods if the protocol hasn't defined an
123	 * implementation (NULL function pointer).
124	 */
125#if 0
126	if (pu->pru_attach != NULL) {
127		KASSERT(pu->pru_abort != NULL,
128		    ("protosw_init: %ssw[%d] pru_abort NULL",
129		    pr->pr_domain->dom_name,
130		    (int)(pr - pr->pr_domain->dom_protosw)));
131		KASSERT(pu->pru_send != NULL,
132		    ("protosw_init: %ssw[%d] pru_send NULL",
133		    pr->pr_domain->dom_name,
134		    (int)(pr - pr->pr_domain->dom_protosw)));
135	}
136#endif
137
138#define DEFAULT(foo, bar)	if ((foo) == NULL)  (foo) = (bar)
139	DEFAULT(pu->pru_accept, pru_accept_notsupp);
140	DEFAULT(pu->pru_bind, pru_bind_notsupp);
141	DEFAULT(pu->pru_connect, pru_connect_notsupp);
142	DEFAULT(pu->pru_connect2, pru_connect2_notsupp);
143	DEFAULT(pu->pru_control, pru_control_notsupp);
144	DEFAULT(pu->pru_disconnect, pru_disconnect_notsupp);
145	DEFAULT(pu->pru_listen, pru_listen_notsupp);
146	DEFAULT(pu->pru_peeraddr, pru_peeraddr_notsupp);
147	DEFAULT(pu->pru_rcvd, pru_rcvd_notsupp);
148	DEFAULT(pu->pru_rcvoob, pru_rcvoob_notsupp);
149	DEFAULT(pu->pru_sense, pru_sense_null);
150	DEFAULT(pu->pru_shutdown, pru_shutdown_notsupp);
151	DEFAULT(pu->pru_sockaddr, pru_sockaddr_notsupp);
152	DEFAULT(pu->pru_sosend, sosend_generic);
153	DEFAULT(pu->pru_soreceive, soreceive_generic);
154	DEFAULT(pu->pru_sopoll, sopoll_generic);
155#undef DEFAULT
156	if (pr->pr_init)
157		(*pr->pr_init)();
158}
159
160/*
161 * Add a new protocol domain to the list of supported domains
162 * Note: you cant unload it again because a socket may be using it.
163 * XXX can't fail at this time.
164 */
165void
166domain_init(void *arg)
167{
168	struct domain *dp = arg;
169	struct protosw *pr;
170
171	if (dp->dom_init)
172		(*dp->dom_init)();
173	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
174		protosw_init(pr);
175	/*
176	 * update global information about maximums
177	 */
178	max_hdr = max_linkhdr + max_protohdr;
179	max_datalen = MHLEN - max_hdr;
180	if (max_datalen < 1)
181		panic("%s: max_datalen < 1", __func__);
182}
183
184#ifdef VIMAGE
185void
186vnet_domain_init(void *arg)
187{
188
189	/* Virtualized case is no different -- call init functions. */
190	domain_init(arg);
191}
192
193void
194vnet_domain_uninit(void *arg)
195{
196	struct domain *dp = arg;
197	struct protosw *pr;
198
199	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
200		if (pr->pr_destroy)
201			(*pr->pr_destroy)();
202	if (dp->dom_destroy)
203		(*dp->dom_destroy)();
204}
205#endif
206
207/*
208 * Add a new protocol domain to the list of supported domains
209 * Note: you cant unload it again because a socket may be using it.
210 * XXX can't fail at this time.
211 */
212void
213domain_add(void *data)
214{
215	struct domain *dp;
216
217	dp = (struct domain *)data;
218	mtx_lock(&dom_mtx);
219	dp->dom_next = domains;
220	domains = dp;
221
222	KASSERT(domain_init_status >= 1,
223	    ("attempt to domain_add(%s) before domaininit()",
224	    dp->dom_name));
225#ifndef INVARIANTS
226	if (domain_init_status < 1)
227		printf("WARNING: attempt to domain_add(%s) before "
228		    "domaininit()\n", dp->dom_name);
229#endif
230#ifdef notyet
231	KASSERT(domain_init_status < 2,
232	    ("attempt to domain_add(%s) after domainfinalize()",
233	    dp->dom_name));
234#else
235	if (domain_init_status >= 2)
236		printf("WARNING: attempt to domain_add(%s) after "
237		    "domainfinalize()\n", dp->dom_name);
238#endif
239	mtx_unlock(&dom_mtx);
240}
241
242static void
243socket_zone_change(void *tag)
244{
245
246	uma_zone_set_max(socket_zone, maxsockets);
247}
248
249/* ARGSUSED*/
250static void
251domaininit(void *dummy)
252{
253
254	/*
255	 * Before we do any setup, make sure to initialize the
256	 * zone allocator we get struct sockets from.
257	 */
258	socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
259	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
260	uma_zone_set_max(socket_zone, maxsockets);
261	EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
262		EVENTHANDLER_PRI_FIRST);
263
264	if (max_linkhdr < 16)		/* XXX */
265		max_linkhdr = 16;
266
267	callout_init(&pffast_callout, CALLOUT_MPSAFE);
268	callout_init(&pfslow_callout, CALLOUT_MPSAFE);
269
270	mtx_lock(&dom_mtx);
271	KASSERT(domain_init_status == 0, ("domaininit called too late!"));
272	domain_init_status = 1;
273	mtx_unlock(&dom_mtx);
274}
275
276/* ARGSUSED*/
277static void
278domainfinalize(void *dummy)
279{
280
281	mtx_lock(&dom_mtx);
282	KASSERT(domain_init_status == 1, ("domainfinalize called too late!"));
283	domain_init_status = 2;
284	mtx_unlock(&dom_mtx);
285
286	callout_reset(&pffast_callout, 1, pffasttimo, NULL);
287	callout_reset(&pfslow_callout, 1, pfslowtimo, NULL);
288}
289
290struct protosw *
291pffindtype(int family, int type)
292{
293	struct domain *dp;
294	struct protosw *pr;
295
296	for (dp = domains; dp; dp = dp->dom_next)
297		if (dp->dom_family == family)
298			goto found;
299	return (0);
300found:
301	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
302		if (pr->pr_type && pr->pr_type == type)
303			return (pr);
304	return (0);
305}
306
307struct protosw *
308pffindproto(int family, int protocol, int type)
309{
310	struct domain *dp;
311	struct protosw *pr;
312	struct protosw *maybe = 0;
313
314	if (family == 0)
315		return (0);
316	for (dp = domains; dp; dp = dp->dom_next)
317		if (dp->dom_family == family)
318			goto found;
319	return (0);
320found:
321	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
322		if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
323			return (pr);
324
325		if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
326		    pr->pr_protocol == 0 && maybe == (struct protosw *)0)
327			maybe = pr;
328	}
329	return (maybe);
330}
331
332/*
333 * The caller must make sure that the new protocol is fully set up and ready to
334 * accept requests before it is registered.
335 */
336int
337pf_proto_register(int family, struct protosw *npr)
338{
339	VNET_ITERATOR_DECL(vnet_iter);
340	struct domain *dp;
341	struct protosw *pr, *fpr;
342
343	/* Sanity checks. */
344	if (family == 0)
345		return (EPFNOSUPPORT);
346	if (npr->pr_type == 0)
347		return (EPROTOTYPE);
348	if (npr->pr_protocol == 0)
349		return (EPROTONOSUPPORT);
350	if (npr->pr_usrreqs == NULL)
351		return (ENXIO);
352
353	/* Try to find the specified domain based on the family. */
354	for (dp = domains; dp; dp = dp->dom_next)
355		if (dp->dom_family == family)
356			goto found;
357	return (EPFNOSUPPORT);
358
359found:
360	/* Initialize backpointer to struct domain. */
361	npr->pr_domain = dp;
362	fpr = NULL;
363
364	/*
365	 * Protect us against races when two protocol registrations for
366	 * the same protocol happen at the same time.
367	 */
368	mtx_lock(&dom_mtx);
369
370	/* The new protocol must not yet exist. */
371	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
372		if ((pr->pr_type == npr->pr_type) &&
373		    (pr->pr_protocol == npr->pr_protocol)) {
374			mtx_unlock(&dom_mtx);
375			return (EEXIST);	/* XXX: Check only protocol? */
376		}
377		/* While here, remember the first free spacer. */
378		if ((fpr == NULL) && (pr->pr_protocol == PROTO_SPACER))
379			fpr = pr;
380	}
381
382	/* If no free spacer is found we can't add the new protocol. */
383	if (fpr == NULL) {
384		mtx_unlock(&dom_mtx);
385		return (ENOMEM);
386	}
387
388	/* Copy the new struct protosw over the spacer. */
389	bcopy(npr, fpr, sizeof(*fpr));
390
391	/* Job is done, no more protection required. */
392	mtx_unlock(&dom_mtx);
393
394	/* Initialize and activate the protocol. */
395	VNET_LIST_RLOCK();
396	VNET_FOREACH(vnet_iter) {
397		CURVNET_SET_QUIET(vnet_iter);
398		protosw_init(fpr);
399		CURVNET_RESTORE();
400	}
401	VNET_LIST_RUNLOCK();
402
403	return (0);
404}
405
406/*
407 * The caller must make sure the protocol and its functions correctly shut down
408 * all sockets and release all locks and memory references.
409 */
410int
411pf_proto_unregister(int family, int protocol, int type)
412{
413	struct domain *dp;
414	struct protosw *pr, *dpr;
415
416	/* Sanity checks. */
417	if (family == 0)
418		return (EPFNOSUPPORT);
419	if (protocol == 0)
420		return (EPROTONOSUPPORT);
421	if (type == 0)
422		return (EPROTOTYPE);
423
424	/* Try to find the specified domain based on the family type. */
425	for (dp = domains; dp; dp = dp->dom_next)
426		if (dp->dom_family == family)
427			goto found;
428	return (EPFNOSUPPORT);
429
430found:
431	dpr = NULL;
432
433	/* Lock out everyone else while we are manipulating the protosw. */
434	mtx_lock(&dom_mtx);
435
436	/* The protocol must exist and only once. */
437	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
438		if ((pr->pr_type == type) && (pr->pr_protocol == protocol)) {
439			if (dpr != NULL) {
440				mtx_unlock(&dom_mtx);
441				return (EMLINK);   /* Should not happen! */
442			} else
443				dpr = pr;
444		}
445	}
446
447	/* Protocol does not exist. */
448	if (dpr == NULL) {
449		mtx_unlock(&dom_mtx);
450		return (EPROTONOSUPPORT);
451	}
452
453	/* De-orbit the protocol and make the slot available again. */
454	dpr->pr_type = 0;
455	dpr->pr_domain = dp;
456	dpr->pr_protocol = PROTO_SPACER;
457	dpr->pr_flags = 0;
458	dpr->pr_input = NULL;
459	dpr->pr_output = NULL;
460	dpr->pr_ctlinput = NULL;
461	dpr->pr_ctloutput = NULL;
462	dpr->pr_init = NULL;
463	dpr->pr_fasttimo = NULL;
464	dpr->pr_slowtimo = NULL;
465	dpr->pr_drain = NULL;
466	dpr->pr_usrreqs = &nousrreqs;
467
468	/* Job is done, not more protection required. */
469	mtx_unlock(&dom_mtx);
470
471	return (0);
472}
473
474void
475pfctlinput(int cmd, struct sockaddr *sa)
476{
477	struct domain *dp;
478	struct protosw *pr;
479
480	for (dp = domains; dp; dp = dp->dom_next)
481		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
482			if (pr->pr_ctlinput)
483				(*pr->pr_ctlinput)(cmd, sa, (void *)0);
484}
485
486void
487pfctlinput2(int cmd, struct sockaddr *sa, void *ctlparam)
488{
489	struct domain *dp;
490	struct protosw *pr;
491
492	if (!sa)
493		return;
494	for (dp = domains; dp; dp = dp->dom_next) {
495		/*
496		 * the check must be made by xx_ctlinput() anyways, to
497		 * make sure we use data item pointed to by ctlparam in
498		 * correct way.  the following check is made just for safety.
499		 */
500		if (dp->dom_family != sa->sa_family)
501			continue;
502
503		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
504			if (pr->pr_ctlinput)
505				(*pr->pr_ctlinput)(cmd, sa, ctlparam);
506	}
507}
508
509static void
510pfslowtimo(void *arg)
511{
512	struct domain *dp;
513	struct protosw *pr;
514
515	for (dp = domains; dp; dp = dp->dom_next)
516		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
517			if (pr->pr_slowtimo)
518				(*pr->pr_slowtimo)();
519	callout_reset(&pfslow_callout, hz/2, pfslowtimo, NULL);
520}
521
522static void
523pffasttimo(void *arg)
524{
525	struct domain *dp;
526	struct protosw *pr;
527
528	for (dp = domains; dp; dp = dp->dom_next)
529		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
530			if (pr->pr_fasttimo)
531				(*pr->pr_fasttimo)();
532	callout_reset(&pffast_callout, hz/5, pffasttimo, NULL);
533}
534