uipc_domain.c revision 1.78
1/*	$NetBSD: uipc_domain.c,v 1.78 2009/03/11 05:55:22 mrg Exp $	*/
2
3/*
4 * Copyright (c) 1982, 1986, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	@(#)uipc_domain.c	8.3 (Berkeley) 2/14/95
32 */
33
34#include <sys/cdefs.h>
35__KERNEL_RCSID(0, "$NetBSD: uipc_domain.c,v 1.78 2009/03/11 05:55:22 mrg Exp $");
36
37#include <sys/param.h>
38#include <sys/socket.h>
39#include <sys/socketvar.h>
40#include <sys/protosw.h>
41#include <sys/domain.h>
42#include <sys/mbuf.h>
43#include <sys/time.h>
44#include <sys/kernel.h>
45#include <sys/systm.h>
46#include <sys/callout.h>
47#include <sys/queue.h>
48#include <sys/proc.h>
49#include <sys/sysctl.h>
50#include <sys/un.h>
51#include <sys/unpcb.h>
52#include <sys/file.h>
53#include <sys/filedesc.h>
54#include <sys/kauth.h>
55
56MALLOC_DECLARE(M_SOCKADDR);
57
58MALLOC_DEFINE(M_SOCKADDR, "sockaddr", "socket endpoints");
59
60void	pffasttimo(void *);
61void	pfslowtimo(void *);
62
63struct domainhead domains = STAILQ_HEAD_INITIALIZER(domains);
64static struct domain *domain_array[AF_MAX];
65
66callout_t pffasttimo_ch, pfslowtimo_ch;
67
68/*
69 * Current time values for fast and slow timeouts.  We can use u_int
70 * relatively safely.  The fast timer will roll over in 27 years and
71 * the slow timer in 68 years.
72 */
73u_int	pfslowtimo_now;
74u_int	pffasttimo_now;
75
76static struct sysctllog *domain_sysctllog;
77static void sysctl_net_setup(void);
78
79void
80domaininit(void)
81{
82	__link_set_decl(domains, struct domain);
83	struct domain * const * dpp;
84	struct domain *rt_domain = NULL;
85
86	sysctl_net_setup();
87
88	/*
89	 * Add all of the domains.  Make sure the PF_ROUTE
90	 * domain is added last.
91	 */
92	__link_set_foreach(dpp, domains) {
93		if ((*dpp)->dom_family == PF_ROUTE)
94			rt_domain = *dpp;
95		else
96			domain_attach(*dpp);
97	}
98	if (rt_domain)
99		domain_attach(rt_domain);
100
101	callout_init(&pffasttimo_ch, CALLOUT_MPSAFE);
102	callout_init(&pfslowtimo_ch, CALLOUT_MPSAFE);
103
104	callout_reset(&pffasttimo_ch, 1, pffasttimo, NULL);
105	callout_reset(&pfslowtimo_ch, 1, pfslowtimo, NULL);
106}
107
108void
109domain_attach(struct domain *dp)
110{
111	const struct protosw *pr;
112
113	STAILQ_INSERT_TAIL(&domains, dp, dom_link);
114	if (dp->dom_family < __arraycount(domain_array))
115		domain_array[dp->dom_family] = dp;
116
117	if (dp->dom_init)
118		(*dp->dom_init)();
119
120#ifdef MBUFTRACE
121	if (dp->dom_mowner.mo_name[0] == '\0') {
122		strncpy(dp->dom_mowner.mo_name, dp->dom_name,
123		    sizeof(dp->dom_mowner.mo_name));
124		MOWNER_ATTACH(&dp->dom_mowner);
125	}
126#endif
127	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
128		if (pr->pr_init)
129			(*pr->pr_init)();
130	}
131
132	if (max_linkhdr < 16)		/* XXX */
133		max_linkhdr = 16;
134	max_hdr = max_linkhdr + max_protohdr;
135	max_datalen = MHLEN - max_hdr;
136}
137
138struct domain *
139pffinddomain(int family)
140{
141	struct domain *dp;
142
143	if (family < __arraycount(domain_array) && domain_array[family] != NULL)
144		return domain_array[family];
145
146	DOMAIN_FOREACH(dp)
147		if (dp->dom_family == family)
148			return (dp);
149	return (NULL);
150}
151
152const struct protosw *
153pffindtype(int family, int type)
154{
155	struct domain *dp;
156	const struct protosw *pr;
157
158	dp = pffinddomain(family);
159	if (dp == NULL)
160		return (NULL);
161
162	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
163		if (pr->pr_type && pr->pr_type == type)
164			return (pr);
165
166	return (NULL);
167}
168
169const struct protosw *
170pffindproto(int family, int protocol, int type)
171{
172	struct domain *dp;
173	const struct protosw *pr;
174	const struct protosw *maybe = NULL;
175
176	if (family == 0)
177		return (NULL);
178
179	dp = pffinddomain(family);
180	if (dp == NULL)
181		return (NULL);
182
183	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
184		if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
185			return (pr);
186
187		if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
188		    pr->pr_protocol == 0 && maybe == NULL)
189			maybe = pr;
190	}
191	return (maybe);
192}
193
194void *
195sockaddr_addr(struct sockaddr *sa, socklen_t *slenp)
196{
197	const struct domain *dom;
198
199	if ((dom = pffinddomain(sa->sa_family)) == NULL ||
200	    dom->dom_sockaddr_addr == NULL)
201		return NULL;
202
203	return (*dom->dom_sockaddr_addr)(sa, slenp);
204}
205
206const void *
207sockaddr_const_addr(const struct sockaddr *sa, socklen_t *slenp)
208{
209	const struct domain *dom;
210
211	if ((dom = pffinddomain(sa->sa_family)) == NULL ||
212	    dom->dom_sockaddr_const_addr == NULL)
213		return NULL;
214
215	return (*dom->dom_sockaddr_const_addr)(sa, slenp);
216}
217
218const struct sockaddr *
219sockaddr_any(const struct sockaddr *sa)
220{
221	const struct domain *dom;
222
223	if ((dom = pffinddomain(sa->sa_family)) == NULL)
224		return NULL;
225
226	return dom->dom_sa_any;
227}
228
229const void *
230sockaddr_anyaddr(const struct sockaddr *sa, socklen_t *slenp)
231{
232	const struct sockaddr *any;
233
234	if ((any = sockaddr_any(sa)) == NULL)
235		return NULL;
236
237	return sockaddr_const_addr(any, slenp);
238}
239
240struct sockaddr *
241sockaddr_alloc(sa_family_t af, socklen_t socklen, int flags)
242{
243	struct sockaddr *sa;
244	socklen_t reallen = MAX(socklen, offsetof(struct sockaddr, sa_data[0]));
245
246	if ((sa = malloc(reallen, M_SOCKADDR, flags)) == NULL)
247		return NULL;
248
249	sa->sa_family = af;
250	sa->sa_len = reallen;
251	return sa;
252}
253
254struct sockaddr *
255sockaddr_copy(struct sockaddr *dst, socklen_t socklen,
256    const struct sockaddr *src)
257{
258	if (__predict_false(socklen < src->sa_len)) {
259		panic("%s: source too long, %d < %d bytes", __func__, socklen,
260		    src->sa_len);
261	}
262	return memcpy(dst, src, src->sa_len);
263}
264
265int
266sockaddr_cmp(const struct sockaddr *sa1, const struct sockaddr *sa2)
267{
268	int len, rc;
269	struct domain *dom;
270
271	if (sa1->sa_family != sa2->sa_family)
272		return sa1->sa_family - sa2->sa_family;
273
274	dom = pffinddomain(sa1->sa_family);
275
276	if (dom != NULL && dom->dom_sockaddr_cmp != NULL)
277		return (*dom->dom_sockaddr_cmp)(sa1, sa2);
278
279	len = MIN(sa1->sa_len, sa2->sa_len);
280
281	if (dom == NULL || dom->dom_sa_cmplen == 0) {
282		if ((rc = memcmp(sa1, sa2, len)) != 0)
283			return rc;
284		return sa1->sa_len - sa2->sa_len;
285	}
286
287	if ((rc = memcmp((const char *)sa1 + dom->dom_sa_cmpofs,
288		         (const char *)sa2 + dom->dom_sa_cmpofs,
289			 MIN(dom->dom_sa_cmplen,
290			     len - MIN(len, dom->dom_sa_cmpofs)))) != 0)
291		return rc;
292
293	return MIN(dom->dom_sa_cmplen + dom->dom_sa_cmpofs, sa1->sa_len) -
294	       MIN(dom->dom_sa_cmplen + dom->dom_sa_cmpofs, sa2->sa_len);
295}
296
297struct sockaddr *
298sockaddr_dup(const struct sockaddr *src, int flags)
299{
300	struct sockaddr *dst;
301
302	if ((dst = sockaddr_alloc(src->sa_family, src->sa_len, flags)) == NULL)
303		return NULL;
304
305	return sockaddr_copy(dst, dst->sa_len, src);
306}
307
308void
309sockaddr_free(struct sockaddr *sa)
310{
311	free(sa, M_SOCKADDR);
312}
313
314/*
315 * sysctl helper to stuff PF_LOCAL pcbs into sysctl structures
316 */
317static void
318sysctl_dounpcb(struct kinfo_pcb *pcb, const struct socket *so)
319{
320	struct unpcb *unp = sotounpcb(so);
321	struct sockaddr_un *un = unp->unp_addr;
322
323	memset(pcb, 0, sizeof(*pcb));
324
325	pcb->ki_family = so->so_proto->pr_domain->dom_family;
326	pcb->ki_type = so->so_proto->pr_type;
327	pcb->ki_protocol = so->so_proto->pr_protocol;
328	pcb->ki_pflags = unp->unp_flags;
329
330	pcb->ki_pcbaddr = PTRTOUINT64(unp);
331	/* pcb->ki_ppcbaddr = unp has no ppcb... */
332	pcb->ki_sockaddr = PTRTOUINT64(so);
333
334	pcb->ki_sostate = so->so_state;
335	/* pcb->ki_prstate = unp has no state... */
336
337	pcb->ki_rcvq = so->so_rcv.sb_cc;
338	pcb->ki_sndq = so->so_snd.sb_cc;
339
340	un = (struct sockaddr_un *)&pcb->ki_src;
341	/*
342	 * local domain sockets may bind without having a local
343	 * endpoint.  bleah!
344	 */
345	if (unp->unp_addr != NULL) {
346		un->sun_len = unp->unp_addr->sun_len;
347		un->sun_family = unp->unp_addr->sun_family;
348		strlcpy(un->sun_path, unp->unp_addr->sun_path,
349		    sizeof(pcb->ki_s));
350	}
351	else {
352		un->sun_len = offsetof(struct sockaddr_un, sun_path);
353		un->sun_family = pcb->ki_family;
354	}
355	if (unp->unp_conn != NULL) {
356		un = (struct sockaddr_un *)&pcb->ki_dst;
357		if (unp->unp_conn->unp_addr != NULL) {
358			un->sun_len = unp->unp_conn->unp_addr->sun_len;
359			un->sun_family = unp->unp_conn->unp_addr->sun_family;
360			un->sun_family = unp->unp_conn->unp_addr->sun_family;
361			strlcpy(un->sun_path, unp->unp_conn->unp_addr->sun_path,
362				sizeof(pcb->ki_d));
363		}
364		else {
365			un->sun_len = offsetof(struct sockaddr_un, sun_path);
366			un->sun_family = pcb->ki_family;
367		}
368	}
369
370	pcb->ki_inode = unp->unp_ino;
371	pcb->ki_vnode = PTRTOUINT64(unp->unp_vnode);
372	pcb->ki_conn = PTRTOUINT64(unp->unp_conn);
373	pcb->ki_refs = PTRTOUINT64(unp->unp_refs);
374	pcb->ki_nextref = PTRTOUINT64(unp->unp_nextref);
375}
376
377static int
378sysctl_unpcblist(SYSCTLFN_ARGS)
379{
380	struct file *fp, *dfp, *np;
381	struct socket *so;
382	struct kinfo_pcb pcb;
383	char *dp;
384	u_int op, arg;
385	size_t len, needed, elem_size, out_size;
386	int error, elem_count, pf, type, pf2;
387
388	if (namelen == 1 && name[0] == CTL_QUERY)
389		return (sysctl_query(SYSCTLFN_CALL(rnode)));
390
391	if (namelen != 4)
392		return (EINVAL);
393
394	if (oldp != NULL) {
395		len = *oldlenp;
396		elem_size = name[2];
397		elem_count = name[3];
398		if (elem_size != sizeof(pcb))
399			return EINVAL;
400	} else {
401		len = 0;
402		elem_size = sizeof(pcb);
403		elem_count = INT_MAX;
404	}
405	error = 0;
406	dp = oldp;
407	op = name[0];
408	arg = name[1];
409	out_size = elem_size;
410	needed = 0;
411
412	if (name - oname != 4)
413		return (EINVAL);
414
415	pf = oname[1];
416	type = oname[2];
417	pf2 = (oldp == NULL) ? 0 : pf;
418
419	/*
420	 * allocate dummy file descriptor to make position in list.
421	 */
422	sysctl_unlock();
423	if ((dfp = fgetdummy()) == NULL) {
424	 	sysctl_relock();
425		return ENOMEM;
426	}
427
428	/*
429	 * there's no "list" of local domain sockets, so we have
430	 * to walk the file list looking for them.  :-/
431	 */
432	mutex_enter(&filelist_lock);
433	LIST_FOREACH(fp, &filehead, f_list) {
434	    	np = LIST_NEXT(fp, f_list);
435		if (fp->f_count == 0 || fp->f_type != DTYPE_SOCKET ||
436		    fp->f_data == NULL)
437			continue;
438		if (kauth_authorize_generic(l->l_cred,
439		    KAUTH_GENERIC_CANSEE, fp->f_cred) != 0)
440			continue;
441		so = (struct socket *)fp->f_data;
442		if (so->so_type != type)
443			continue;
444		if (so->so_proto->pr_domain->dom_family != pf)
445			continue;
446		if (len >= elem_size && elem_count > 0) {
447			mutex_enter(&fp->f_lock);
448			fp->f_count++;
449			mutex_exit(&fp->f_lock);
450			LIST_INSERT_AFTER(fp, dfp, f_list);
451			mutex_exit(&filelist_lock);
452			sysctl_dounpcb(&pcb, so);
453			error = copyout(&pcb, dp, out_size);
454			closef(fp);
455			mutex_enter(&filelist_lock);
456			np = LIST_NEXT(dfp, f_list);
457			LIST_REMOVE(dfp, f_list);
458			if (error)
459				break;
460			dp += elem_size;
461			len -= elem_size;
462		}
463		needed += elem_size;
464		if (elem_count > 0 && elem_count != INT_MAX)
465			elem_count--;
466	}
467	mutex_exit(&filelist_lock);
468	fputdummy(dfp);
469 	*oldlenp = needed;
470	if (oldp == NULL)
471		*oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb);
472 	sysctl_relock();
473
474	return (error);
475}
476
477static void
478sysctl_net_setup()
479{
480
481	KASSERT(domain_sysctllog == NULL);
482	sysctl_createv(&domain_sysctllog, 0, NULL, NULL,
483		       CTLFLAG_PERMANENT,
484		       CTLTYPE_NODE, "net", NULL,
485		       NULL, 0, NULL, 0,
486		       CTL_NET, CTL_EOL);
487	sysctl_createv(&domain_sysctllog, 0, NULL, NULL,
488		       CTLFLAG_PERMANENT,
489		       CTLTYPE_NODE, "local",
490		       SYSCTL_DESCR("PF_LOCAL related settings"),
491		       NULL, 0, NULL, 0,
492		       CTL_NET, PF_LOCAL, CTL_EOL);
493	sysctl_createv(&domain_sysctllog, 0, NULL, NULL,
494		       CTLFLAG_PERMANENT,
495		       CTLTYPE_NODE, "stream",
496		       SYSCTL_DESCR("SOCK_STREAM settings"),
497		       NULL, 0, NULL, 0,
498		       CTL_NET, PF_LOCAL, SOCK_STREAM, CTL_EOL);
499	sysctl_createv(&domain_sysctllog, 0, NULL, NULL,
500		       CTLFLAG_PERMANENT,
501		       CTLTYPE_NODE, "dgram",
502		       SYSCTL_DESCR("SOCK_DGRAM settings"),
503		       NULL, 0, NULL, 0,
504		       CTL_NET, PF_LOCAL, SOCK_DGRAM, CTL_EOL);
505
506	sysctl_createv(&domain_sysctllog, 0, NULL, NULL,
507		       CTLFLAG_PERMANENT,
508		       CTLTYPE_STRUCT, "pcblist",
509		       SYSCTL_DESCR("SOCK_STREAM protocol control block list"),
510		       sysctl_unpcblist, 0, NULL, 0,
511		       CTL_NET, PF_LOCAL, SOCK_STREAM, CTL_CREATE, CTL_EOL);
512	sysctl_createv(&domain_sysctllog, 0, NULL, NULL,
513		       CTLFLAG_PERMANENT,
514		       CTLTYPE_STRUCT, "pcblist",
515		       SYSCTL_DESCR("SOCK_DGRAM protocol control block list"),
516		       sysctl_unpcblist, 0, NULL, 0,
517		       CTL_NET, PF_LOCAL, SOCK_DGRAM, CTL_CREATE, CTL_EOL);
518}
519
520void
521pfctlinput(int cmd, const struct sockaddr *sa)
522{
523	struct domain *dp;
524	const struct protosw *pr;
525
526	DOMAIN_FOREACH(dp) {
527		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
528			if (pr->pr_ctlinput != NULL)
529				(*pr->pr_ctlinput)(cmd, sa, NULL);
530		}
531	}
532}
533
534void
535pfctlinput2(int cmd, const struct sockaddr *sa, void *ctlparam)
536{
537	struct domain *dp;
538	const struct protosw *pr;
539
540	if (sa == NULL)
541		return;
542
543	DOMAIN_FOREACH(dp) {
544		/*
545		 * the check must be made by xx_ctlinput() anyways, to
546		 * make sure we use data item pointed to by ctlparam in
547		 * correct way.  the following check is made just for safety.
548		 */
549		if (dp->dom_family != sa->sa_family)
550			continue;
551
552		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
553			if (pr->pr_ctlinput != NULL)
554				(*pr->pr_ctlinput)(cmd, sa, ctlparam);
555		}
556	}
557}
558
559void
560pfslowtimo(void *arg)
561{
562	struct domain *dp;
563	const struct protosw *pr;
564
565	pfslowtimo_now++;
566
567	DOMAIN_FOREACH(dp) {
568		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
569			if (pr->pr_slowtimo)
570				(*pr->pr_slowtimo)();
571	}
572	callout_schedule(&pfslowtimo_ch, hz / 2);
573}
574
575void
576pffasttimo(void *arg)
577{
578	struct domain *dp;
579	const struct protosw *pr;
580
581	pffasttimo_now++;
582
583	DOMAIN_FOREACH(dp) {
584		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
585			if (pr->pr_fasttimo)
586				(*pr->pr_fasttimo)();
587	}
588	callout_schedule(&pffasttimo_ch, hz / 5);
589}
590