kern_conf.c revision 126077
1/*-
2 * Copyright (c) 1999-2002 Poul-Henning Kamp
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/kern/kern_conf.c 126077 2004-02-21 20:29:52Z phk $");
29
30#include <sys/param.h>
31#include <sys/kernel.h>
32#include <sys/systm.h>
33#include <sys/bio.h>
34#include <sys/lock.h>
35#include <sys/mutex.h>
36#include <sys/sysctl.h>
37#include <sys/module.h>
38#include <sys/malloc.h>
39#include <sys/conf.h>
40#include <sys/vnode.h>
41#include <sys/queue.h>
42#include <sys/poll.h>
43#include <sys/ctype.h>
44#include <machine/stdarg.h>
45
46static MALLOC_DEFINE(M_DEVT, "dev_t", "dev_t storage");
47
48/* Built at compile time from sys/conf/majors */
49extern unsigned char reserved_majors[256];
50
51/*
52 * This is the number of hash-buckets.  Experiements with 'real-life'
53 * udev_t's show that a prime halfway between two powers of two works
54 * best.
55 */
56#define DEVT_HASH 83
57
58/* The number of dev_t's we can create before malloc(9) kick in.  */
59#define DEVT_STASH 50
60
61static struct cdev devt_stash[DEVT_STASH];
62
63static LIST_HEAD(, cdev) dev_hash[DEVT_HASH];
64
65static LIST_HEAD(, cdev) dev_free;
66
67static int free_devt;
68SYSCTL_INT(_debug, OID_AUTO, free_devt, CTLFLAG_RW, &free_devt, 0, "");
69
70int
71nullop(void)
72{
73
74	return (0);
75}
76
77int
78eopnotsupp(void)
79{
80
81	return (EOPNOTSUPP);
82}
83
84static int
85enxio(void)
86{
87	return (ENXIO);
88}
89
90static int
91enodev(void)
92{
93	return (ENODEV);
94}
95
96/* Define a dead_cdevsw for use when devices leave unexpectedly. */
97
98#define dead_open	(d_open_t *)enxio
99#define dead_close	(d_close_t *)enxio
100#define dead_read	(d_read_t *)enxio
101#define dead_write	(d_write_t *)enxio
102#define dead_ioctl	(d_ioctl_t *)enxio
103#define dead_poll	(d_poll_t *)enodev
104#define dead_mmap	(d_mmap_t *)enodev
105
106static void
107dead_strategy(struct bio *bp)
108{
109
110	biofinish(bp, NULL, ENXIO);
111}
112
113#define dead_dump	(dumper_t *)enxio
114#define dead_kqfilter	(d_kqfilter_t *)enxio
115
116static struct cdevsw dead_cdevsw = {
117	.d_open =	dead_open,
118	.d_close =	dead_close,
119	.d_read =	dead_read,
120	.d_write =	dead_write,
121	.d_ioctl =	dead_ioctl,
122	.d_poll =	dead_poll,
123	.d_mmap =	dead_mmap,
124	.d_strategy =	dead_strategy,
125	.d_name =	"dead",
126	.d_maj =	255,
127	.d_dump =	dead_dump,
128	.d_kqfilter =	dead_kqfilter
129};
130
131/* Default methods if driver does not specify method */
132
133#define null_open	(d_open_t *)nullop
134#define null_close	(d_close_t *)nullop
135#define no_read		(d_read_t *)enodev
136#define no_write	(d_write_t *)enodev
137#define no_ioctl	(d_ioctl_t *)enodev
138#define no_mmap		(d_mmap_t *)enodev
139
140static int
141no_kqfilter(dev_t dev __unused, struct knote *kn __unused)
142{
143
144	return (1);
145}
146
147static void
148no_strategy(struct bio *bp)
149{
150
151	biofinish(bp, NULL, ENODEV);
152}
153
154static int
155no_poll(dev_t dev __unused, int events, struct thread *td __unused)
156{
157	/*
158	 * Return true for read/write.  If the user asked for something
159	 * special, return POLLNVAL, so that clients have a way of
160	 * determining reliably whether or not the extended
161	 * functionality is present without hard-coding knowledge
162	 * of specific filesystem implementations.
163	 * Stay in sync with vop_nopoll().
164	 */
165	if (events & ~POLLSTANDARD)
166		return (POLLNVAL);
167
168	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
169}
170
171#define no_dump		(dumper_t *)enodev
172
173struct cdevsw *
174devsw(dev_t dev)
175{
176	if (dev->si_devsw)
177		return (dev->si_devsw);
178	return (&dead_cdevsw);
179}
180
181/*
182 * dev_t and u_dev_t primitives
183 */
184
185int
186major(dev_t x)
187{
188	if (x == NODEV)
189		return NOUDEV;
190	return((x->si_udev >> 8) & 0xff);
191}
192
193int
194minor(dev_t x)
195{
196	if (x == NODEV)
197		return NOUDEV;
198	return(x->si_udev & 0xffff00ff);
199}
200
201int
202dev2unit(dev_t x)
203{
204	int i;
205
206	if (x == NODEV)
207		return NOUDEV;
208	i = minor(x);
209	return ((i & 0xff) | (i >> 8));
210}
211
212int
213unit2minor(int unit)
214{
215
216	KASSERT(unit <= 0xffffff, ("Invalid unit (%d) in unit2minor", unit));
217	return ((unit & 0xff) | ((unit << 8) & ~0xffff));
218}
219
220static dev_t
221allocdev(void)
222{
223	static int stashed;
224	struct cdev *si;
225
226	if (LIST_FIRST(&dev_free)) {
227		si = LIST_FIRST(&dev_free);
228		LIST_REMOVE(si, si_hash);
229	} else if (stashed >= DEVT_STASH) {
230		MALLOC(si, struct cdev *, sizeof(*si), M_DEVT,
231		    M_USE_RESERVE | M_ZERO | M_WAITOK);
232	} else {
233		si = devt_stash + stashed++;
234		bzero(si, sizeof *si);
235		si->si_flags |= SI_STASHED;
236	}
237	si->__si_namebuf[0] = '\0';
238	si->si_name = si->__si_namebuf;
239	LIST_INIT(&si->si_children);
240	TAILQ_INIT(&si->si_snapshots);
241	return (si);
242}
243
244dev_t
245makedev(int x, int y)
246{
247	struct cdev *si;
248	udev_t	udev;
249	int hash;
250
251	if (x == umajor(NOUDEV) && y == uminor(NOUDEV))
252		panic("makedev of NOUDEV");
253	udev = (x << 8) | y;
254	hash = udev % DEVT_HASH;
255	LIST_FOREACH(si, &dev_hash[hash], si_hash) {
256		if (si->si_udev == udev)
257			return (si);
258	}
259	si = allocdev();
260	si->si_udev = udev;
261	LIST_INSERT_HEAD(&dev_hash[hash], si, si_hash);
262	return (si);
263}
264
265void
266freedev(dev_t dev)
267{
268
269	if (!free_devt)
270		return;
271	if (SLIST_FIRST(&dev->si_hlist))
272		return;
273	if (dev->si_devsw || dev->si_drv1 || dev->si_drv2)
274		return;
275	LIST_REMOVE(dev, si_hash);
276	if (dev->si_flags & SI_STASHED) {
277		bzero(dev, sizeof(*dev));
278		dev->si_flags |= SI_STASHED;
279		LIST_INSERT_HEAD(&dev_free, dev, si_hash);
280	} else {
281		FREE(dev, M_DEVT);
282	}
283}
284
285udev_t
286dev2udev(dev_t x)
287{
288	if (x == NODEV)
289		return (NOUDEV);
290	return (x->si_udev);
291}
292
293dev_t
294udev2dev(udev_t x, int b)
295{
296
297	if (x == NOUDEV)
298		return (NODEV);
299	switch (b) {
300	case 0:
301		return (makedev(umajor(x), uminor(x)));
302	default:
303		Debugger("udev2dev(...,X)");
304		return (NODEV);
305	}
306}
307
308int
309uminor(udev_t dev)
310{
311	return (dev & 0xffff00ff);
312}
313
314int
315umajor(udev_t dev)
316{
317	return ((dev & 0xff00) >> 8);
318}
319
320udev_t
321makeudev(int x, int y)
322{
323	return ((x << 8) | y);
324}
325
326static void
327find_major(struct cdevsw *devsw)
328{
329	int i;
330
331	for (i = NUMCDEVSW - 1; i > 0; i--)
332		if (reserved_majors[i] != i)
333			break;
334	KASSERT(i > 0, ("Out of major numbers (%s)", devsw->d_name));
335	devsw->d_maj = i;
336	reserved_majors[i] = i;
337}
338
339static void
340prep_cdevsw(struct cdevsw *devsw)
341{
342
343	if (devsw->d_open == NULL)	devsw->d_open = null_open;
344	if (devsw->d_close == NULL)	devsw->d_close = null_close;
345	if (devsw->d_read == NULL)	devsw->d_read = no_read;
346	if (devsw->d_write == NULL)	devsw->d_write = no_write;
347	if (devsw->d_ioctl == NULL)	devsw->d_ioctl = no_ioctl;
348	if (devsw->d_poll == NULL)	devsw->d_poll = no_poll;
349	if (devsw->d_mmap == NULL)	devsw->d_mmap = no_mmap;
350	if (devsw->d_strategy == NULL)	devsw->d_strategy = no_strategy;
351	if (devsw->d_dump == NULL)	devsw->d_dump = no_dump;
352	if (devsw->d_kqfilter == NULL)	devsw->d_kqfilter = no_kqfilter;
353	if (devsw->d_maj == MAJOR_AUTO) {
354		find_major(devsw);
355	} else {
356		if (devsw->d_maj == 256)	/* XXX: tty_cons.c is magic */
357			devsw->d_maj = 0;
358		KASSERT(devsw->d_maj >= 0 && devsw->d_maj < 256,
359		    ("Invalid major (%d) in make_dev", devsw->d_maj));
360		if (reserved_majors[devsw->d_maj] != devsw->d_maj) {
361			printf("WARNING: driver \"%s\" used %s %d\n",
362			    devsw->d_name, "unreserved major device number",
363			    devsw->d_maj);
364			reserved_majors[devsw->d_maj] = devsw->d_maj;
365		}
366	}
367}
368
369dev_t
370make_dev(struct cdevsw *devsw, int minor, uid_t uid, gid_t gid, int perms,
371    const char *fmt, ...)
372{
373	dev_t dev;
374	va_list ap;
375	int i;
376
377	KASSERT((minor & ~0xffff00ff) == 0,
378	    ("Invalid minor (0x%x) in make_dev", minor));
379	prep_cdevsw(devsw);
380	dev = makedev(devsw->d_maj, minor);
381	if (dev->si_flags & SI_CHEAPCLONE &&
382	    dev->si_flags & SI_NAMED &&
383	    dev->si_devsw == devsw) {
384		/*
385		 * This is allowed as it removes races and generally
386		 * simplifies cloning devices.
387		 */
388		return (dev);
389	}
390	if (dev->si_flags & SI_NAMED) {
391		printf( "WARNING: Driver mistake: repeat make_dev(\"%s\")\n",
392		    dev->si_name);
393		panic("don't do that");
394	}
395	va_start(ap, fmt);
396	i = vsnrprintf(dev->__si_namebuf, sizeof dev->__si_namebuf, 32, fmt, ap);
397	if (i > (sizeof dev->__si_namebuf - 1)) {
398		printf("WARNING: Device name truncated! (%s)",
399		    dev->__si_namebuf);
400	}
401	va_end(ap);
402	dev->si_devsw = devsw;
403	dev->si_uid = uid;
404	dev->si_gid = gid;
405	dev->si_mode = perms;
406	dev->si_flags |= SI_NAMED;
407
408	devfs_create(dev);
409	return (dev);
410}
411
412int
413dev_named(dev_t pdev, const char *name)
414{
415	dev_t cdev;
416
417	if (strcmp(devtoname(pdev), name) == 0)
418		return (1);
419	LIST_FOREACH(cdev, &pdev->si_children, si_siblings)
420		if (strcmp(devtoname(cdev), name) == 0)
421			return (1);
422	return (0);
423}
424
425void
426dev_depends(dev_t pdev, dev_t cdev)
427{
428
429	cdev->si_parent = pdev;
430	cdev->si_flags |= SI_CHILD;
431	LIST_INSERT_HEAD(&pdev->si_children, cdev, si_siblings);
432}
433
434dev_t
435make_dev_alias(dev_t pdev, const char *fmt, ...)
436{
437	dev_t	dev;
438	va_list ap;
439	int i;
440
441	dev = allocdev();
442	dev->si_flags |= SI_ALIAS;
443	dev->si_flags |= SI_NAMED;
444	dev_depends(pdev, dev);
445	va_start(ap, fmt);
446	i = vsnrprintf(dev->__si_namebuf, sizeof dev->__si_namebuf, 32, fmt, ap);
447	if (i > (sizeof dev->__si_namebuf - 1)) {
448		printf("WARNING: Device name truncated! (%s)",
449		    dev->__si_namebuf);
450	}
451	va_end(ap);
452
453	devfs_create(dev);
454	return (dev);
455}
456
457void
458destroy_dev(dev_t dev)
459{
460
461	if (!(dev->si_flags & SI_NAMED)) {
462		printf( "WARNING: Driver mistake: destroy_dev on %d/%d\n",
463		    major(dev), minor(dev));
464		panic("don't do that");
465	}
466
467	devfs_destroy(dev);
468	dev->si_flags &= ~SI_NAMED;
469
470	if (dev->si_flags & SI_CHILD) {
471		LIST_REMOVE(dev, si_siblings);
472		dev->si_flags &= ~SI_CHILD;
473	}
474	while (!LIST_EMPTY(&dev->si_children))
475		destroy_dev(LIST_FIRST(&dev->si_children));
476	if (dev->si_flags & SI_CLONELIST) {
477		LIST_REMOVE(dev, si_clone);
478		dev->si_flags &= ~SI_CLONELIST;
479	}
480	dev->si_drv1 = 0;
481	dev->si_drv2 = 0;
482	dev->si_devsw = 0;
483	bzero(&dev->__si_u, sizeof(dev->__si_u));
484	dev->si_flags &= ~SI_ALIAS;
485	freedev(dev);
486}
487
488const char *
489devtoname(dev_t dev)
490{
491	char *p;
492	int mynor;
493
494	if (dev->si_name[0] == '#' || dev->si_name[0] == '\0') {
495		p = dev->si_name;
496		if (devsw(dev))
497			sprintf(p, "#%s/", devsw(dev)->d_name);
498		else
499			sprintf(p, "#%d/", major(dev));
500		p += strlen(p);
501		mynor = minor(dev);
502		if (mynor < 0 || mynor > 255)
503			sprintf(p, "%#x", (u_int)mynor);
504		else
505			sprintf(p, "%d", mynor);
506	}
507	return (dev->si_name);
508}
509
510int
511dev_stdclone(char *name, char **namep, const char *stem, int *unit)
512{
513	int u, i;
514
515	i = strlen(stem);
516	if (bcmp(stem, name, i) != 0)
517		return (0);
518	if (!isdigit(name[i]))
519		return (0);
520	u = 0;
521	if (name[i] == '0' && isdigit(name[i+1]))
522		return (0);
523	while (isdigit(name[i])) {
524		u *= 10;
525		u += name[i++] - '0';
526	}
527	if (u > 0xffffff)
528		return (0);
529	*unit = u;
530	if (namep)
531		*namep = &name[i];
532	if (name[i])
533		return (2);
534	return (1);
535}
536
537/*
538 * Helper functions for cloning device drivers.
539 *
540 * The objective here is to make it unnecessary for the device drivers to
541 * use rman or similar to manage their unit number space.  Due to the way
542 * we do "on-demand" devices, using rman or other "private" methods
543 * will be very tricky to lock down properly once we lock down this file.
544 *
545 * Instead we give the drivers these routines which puts the dev_t's that
546 * are to be managed on their own list, and gives the driver the ability
547 * to ask for the first free unit number or a given specified unit number.
548 *
549 * In addition these routines support paired devices (pty, nmdm and similar)
550 * by respecting a number of "flag" bits in the minor number.
551 *
552 */
553
554struct clonedevs {
555	LIST_HEAD(,cdev)	head;
556};
557
558int
559clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up, dev_t *dp, u_int extra)
560{
561	struct clonedevs *cd;
562	dev_t dev, dl, de;
563	int unit, low, u;
564
565	KASSERT(!(extra & CLONE_UNITMASK),
566	     ("Illegal extra bits (0x%x) in clone_create", extra));
567	KASSERT(*up <= CLONE_UNITMASK,
568	     ("Too high unit (0x%x) in clone_create", *up));
569
570	if (csw->d_maj == MAJOR_AUTO)
571		find_major(csw);
572	/* if clonedevs have not been initialized, we do it here */
573	cd = *cdp;
574	if (cd == NULL) {
575		cd = malloc(sizeof *cd, M_DEVBUF, M_WAITOK | M_ZERO);
576		LIST_INIT(&cd->head);
577		*cdp = cd;
578	}
579
580	/*
581	 * Search the list for a lot of things in one go:
582	 *   A preexisting match is returned immediately.
583	 *   The lowest free unit number if we are passed -1, and the place
584	 *	 in the list where we should insert that new element.
585	 *   The place to insert a specified unit number, if applicable
586	 *       the end of the list.
587	 */
588	unit = *up;
589	low = 0;
590	de = dl = NULL;
591	LIST_FOREACH(dev, &cd->head, si_clone) {
592		u = dev2unit(dev);
593		if (u == (unit | extra)) {
594			*dp = dev;
595			return (0);
596		}
597		if (unit == -1 && u == low) {
598			low++;
599			de = dev;
600			continue;
601		}
602		if (u > unit) {
603			dl = dev;
604			break;
605		}
606		de = dev;
607	}
608	if (unit == -1)
609		unit = low;
610	dev = makedev(csw->d_maj, unit2minor(unit | extra));
611	KASSERT(!(dev->si_flags & SI_CLONELIST),
612	    ("Dev %p should not be on clonelist", dev));
613	if (dl != NULL)
614		LIST_INSERT_BEFORE(dl, dev, si_clone);
615	else if (de != NULL)
616		LIST_INSERT_AFTER(de, dev, si_clone);
617	else
618		LIST_INSERT_HEAD(&cd->head, dev, si_clone);
619	dev->si_flags |= SI_CLONELIST;
620	*up = unit;
621	return (1);
622}
623
624/*
625 * Kill everything still on the list.  The driver should already have
626 * disposed of any softc hung of the dev_t's at this time.
627 */
628void
629clone_cleanup(struct clonedevs **cdp)
630{
631	dev_t dev, tdev;
632	struct clonedevs *cd;
633
634	cd = *cdp;
635	if (cd == NULL)
636		return;
637	LIST_FOREACH_SAFE(dev, &cd->head, si_clone, tdev) {
638		KASSERT(dev->si_flags & SI_NAMED,
639		    ("Driver has goofed in cloning underways udev %x", dev->si_udev));
640		destroy_dev(dev);
641	}
642	free(cd, M_DEVBUF);
643	*cdp = NULL;
644}
645
646/*
647 * Helper sysctl for devname(3).  We're given a {u}dev_t and return
648 * the name, if any, registered by the device driver.
649 */
650static int
651sysctl_devname(SYSCTL_HANDLER_ARGS)
652{
653	int error;
654	udev_t ud;
655	dev_t dev;
656
657	error = SYSCTL_IN(req, &ud, sizeof (ud));
658	if (error)
659		return (error);
660	if (ud == NOUDEV)
661		return(EINVAL);
662	dev = makedev(umajor(ud), uminor(ud));
663	if (dev->si_name[0] == '\0')
664		error = ENOENT;
665	else
666		error = SYSCTL_OUT(req, dev->si_name, strlen(dev->si_name) + 1);
667	freedev(dev);
668	return (error);
669}
670
671SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY,
672	NULL, 0, sysctl_devname, "", "devname(3) handler");
673