kern_conf.c revision 126081
1/*-
2 * Copyright (c) 1999-2002 Poul-Henning Kamp
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/kern/kern_conf.c 126081 2004-02-21 21:32:15Z phk $");
29
30#include <sys/param.h>
31#include <sys/kernel.h>
32#include <sys/systm.h>
33#include <sys/bio.h>
34#include <sys/lock.h>
35#include <sys/mutex.h>
36#include <sys/sysctl.h>
37#include <sys/module.h>
38#include <sys/malloc.h>
39#include <sys/conf.h>
40#include <sys/vnode.h>
41#include <sys/queue.h>
42#include <sys/poll.h>
43#include <sys/ctype.h>
44#include <sys/tty.h>
45#include <machine/stdarg.h>
46
47static MALLOC_DEFINE(M_DEVT, "dev_t", "dev_t storage");
48
49/* Built at compile time from sys/conf/majors */
50extern unsigned char reserved_majors[256];
51
52/*
53 * This is the number of hash-buckets.  Experiements with 'real-life'
54 * udev_t's show that a prime halfway between two powers of two works
55 * best.
56 */
57#define DEVT_HASH 83
58
59/* The number of dev_t's we can create before malloc(9) kick in.  */
60#define DEVT_STASH 50
61
62static struct cdev devt_stash[DEVT_STASH];
63
64static LIST_HEAD(, cdev) dev_hash[DEVT_HASH];
65
66static LIST_HEAD(, cdev) dev_free;
67
68static int free_devt;
69SYSCTL_INT(_debug, OID_AUTO, free_devt, CTLFLAG_RW, &free_devt, 0, "");
70
71static dev_t makedev(int x, int y);
72
73int
74nullop(void)
75{
76
77	return (0);
78}
79
80int
81eopnotsupp(void)
82{
83
84	return (EOPNOTSUPP);
85}
86
87static int
88enxio(void)
89{
90	return (ENXIO);
91}
92
93static int
94enodev(void)
95{
96	return (ENODEV);
97}
98
99/* Define a dead_cdevsw for use when devices leave unexpectedly. */
100
101#define dead_open	(d_open_t *)enxio
102#define dead_close	(d_close_t *)enxio
103#define dead_read	(d_read_t *)enxio
104#define dead_write	(d_write_t *)enxio
105#define dead_ioctl	(d_ioctl_t *)enxio
106#define dead_poll	(d_poll_t *)enodev
107#define dead_mmap	(d_mmap_t *)enodev
108
109static void
110dead_strategy(struct bio *bp)
111{
112
113	biofinish(bp, NULL, ENXIO);
114}
115
116#define dead_dump	(dumper_t *)enxio
117#define dead_kqfilter	(d_kqfilter_t *)enxio
118
119static struct cdevsw dead_cdevsw = {
120	.d_version =	D_VERSION,
121	.d_flags =	D_NEEDGIANT, /* XXX: does dead_strategy need this ? */
122	.d_open =	dead_open,
123	.d_close =	dead_close,
124	.d_read =	dead_read,
125	.d_write =	dead_write,
126	.d_ioctl =	dead_ioctl,
127	.d_poll =	dead_poll,
128	.d_mmap =	dead_mmap,
129	.d_strategy =	dead_strategy,
130	.d_name =	"dead",
131	.d_maj =	255,
132	.d_dump =	dead_dump,
133	.d_kqfilter =	dead_kqfilter
134};
135
136/* Default methods if driver does not specify method */
137
138#define null_open	(d_open_t *)nullop
139#define null_close	(d_close_t *)nullop
140#define no_read		(d_read_t *)enodev
141#define no_write	(d_write_t *)enodev
142#define no_ioctl	(d_ioctl_t *)enodev
143#define no_mmap		(d_mmap_t *)enodev
144
145static int
146no_kqfilter(dev_t dev __unused, struct knote *kn __unused)
147{
148
149	return (1);
150}
151
152static void
153no_strategy(struct bio *bp)
154{
155
156	biofinish(bp, NULL, ENODEV);
157}
158
159static int
160no_poll(dev_t dev __unused, int events, struct thread *td __unused)
161{
162	/*
163	 * Return true for read/write.  If the user asked for something
164	 * special, return POLLNVAL, so that clients have a way of
165	 * determining reliably whether or not the extended
166	 * functionality is present without hard-coding knowledge
167	 * of specific filesystem implementations.
168	 * Stay in sync with vop_nopoll().
169	 */
170	if (events & ~POLLSTANDARD)
171		return (POLLNVAL);
172
173	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
174}
175
176#define no_dump		(dumper_t *)enodev
177
178struct cdevsw *
179devsw(dev_t dev)
180{
181	if (dev->si_devsw)
182		return (dev->si_devsw);
183	return (&dead_cdevsw);
184}
185
186/*
187 * dev_t and u_dev_t primitives
188 */
189
190int
191major(dev_t x)
192{
193	if (x == NODEV)
194		return NOUDEV;
195	return((x->si_udev >> 8) & 0xff);
196}
197
198int
199minor(dev_t x)
200{
201	if (x == NODEV)
202		return NOUDEV;
203	return(x->si_udev & 0xffff00ff);
204}
205
206int
207dev2unit(dev_t x)
208{
209	int i;
210
211	if (x == NODEV)
212		return NOUDEV;
213	i = minor(x);
214	return ((i & 0xff) | (i >> 8));
215}
216
217int
218unit2minor(int unit)
219{
220
221	KASSERT(unit <= 0xffffff, ("Invalid unit (%d) in unit2minor", unit));
222	return ((unit & 0xff) | ((unit << 8) & ~0xffff));
223}
224
225static dev_t
226allocdev(void)
227{
228	static int stashed;
229	struct cdev *si;
230
231	if (LIST_FIRST(&dev_free)) {
232		si = LIST_FIRST(&dev_free);
233		LIST_REMOVE(si, si_hash);
234	} else if (stashed >= DEVT_STASH) {
235		MALLOC(si, struct cdev *, sizeof(*si), M_DEVT,
236		    M_USE_RESERVE | M_ZERO | M_WAITOK);
237	} else {
238		si = devt_stash + stashed++;
239		bzero(si, sizeof *si);
240		si->si_flags |= SI_STASHED;
241	}
242	si->__si_namebuf[0] = '\0';
243	si->si_name = si->__si_namebuf;
244	LIST_INIT(&si->si_children);
245	TAILQ_INIT(&si->si_snapshots);
246	return (si);
247}
248
249static dev_t
250makedev(int x, int y)
251{
252	struct cdev *si;
253	udev_t	udev;
254	int hash;
255
256	if (x == umajor(NOUDEV) && y == uminor(NOUDEV))
257		panic("makedev of NOUDEV");
258	udev = (x << 8) | y;
259	hash = udev % DEVT_HASH;
260	LIST_FOREACH(si, &dev_hash[hash], si_hash) {
261		if (si->si_udev == udev)
262			return (si);
263	}
264	si = allocdev();
265	si->si_udev = udev;
266	LIST_INSERT_HEAD(&dev_hash[hash], si, si_hash);
267	return (si);
268}
269
270void
271freedev(dev_t dev)
272{
273
274	if (!free_devt)
275		return;
276	if (SLIST_FIRST(&dev->si_hlist))
277		return;
278	if (dev->si_devsw || dev->si_drv1 || dev->si_drv2)
279		return;
280	LIST_REMOVE(dev, si_hash);
281	if (dev->si_flags & SI_STASHED) {
282		bzero(dev, sizeof(*dev));
283		dev->si_flags |= SI_STASHED;
284		LIST_INSERT_HEAD(&dev_free, dev, si_hash);
285	} else {
286		FREE(dev, M_DEVT);
287	}
288}
289
290udev_t
291dev2udev(dev_t x)
292{
293	if (x == NODEV)
294		return (NOUDEV);
295	return (x->si_udev);
296}
297
298dev_t
299udev2dev(udev_t udev)
300{
301	struct cdev *si;
302	int hash;
303
304	if (udev == NOUDEV)
305		return (NODEV);
306	hash = udev % DEVT_HASH;
307	LIST_FOREACH(si, &dev_hash[hash], si_hash) {
308		if (si->si_udev == udev)
309			return (si);
310	}
311	return (NODEV);
312}
313
314int
315uminor(udev_t dev)
316{
317	return (dev & 0xffff00ff);
318}
319
320int
321umajor(udev_t dev)
322{
323	return ((dev & 0xff00) >> 8);
324}
325
326udev_t
327makeudev(int x, int y)
328{
329	return ((x << 8) | y);
330}
331
332static void
333find_major(struct cdevsw *devsw)
334{
335	int i;
336
337	for (i = NUMCDEVSW - 1; i > 0; i--)
338		if (reserved_majors[i] != i)
339			break;
340	KASSERT(i > 0, ("Out of major numbers (%s)", devsw->d_name));
341	devsw->d_maj = i;
342	reserved_majors[i] = i;
343}
344
345static void
346prep_cdevsw(struct cdevsw *devsw)
347{
348
349	if (devsw->d_flags & D_TTY) {
350		if (devsw->d_read == NULL)	devsw->d_read = ttyread;
351		if (devsw->d_write == NULL)	devsw->d_write = ttywrite;
352		if (devsw->d_kqfilter == NULL)	devsw->d_kqfilter = ttykqfilter;
353		if (devsw->d_poll == NULL)	devsw->d_poll = ttypoll;
354	}
355
356	if (devsw->d_open == NULL)	devsw->d_open = null_open;
357	if (devsw->d_close == NULL)	devsw->d_close = null_close;
358	if (devsw->d_read == NULL)	devsw->d_read = no_read;
359	if (devsw->d_write == NULL)	devsw->d_write = no_write;
360	if (devsw->d_ioctl == NULL)	devsw->d_ioctl = no_ioctl;
361	if (devsw->d_poll == NULL)	devsw->d_poll = no_poll;
362	if (devsw->d_mmap == NULL)	devsw->d_mmap = no_mmap;
363	if (devsw->d_strategy == NULL)	devsw->d_strategy = no_strategy;
364	if (devsw->d_dump == NULL)	devsw->d_dump = no_dump;
365	if (devsw->d_kqfilter == NULL)	devsw->d_kqfilter = no_kqfilter;
366	if (devsw->d_maj == MAJOR_AUTO) {
367		find_major(devsw);
368	} else {
369		if (devsw->d_maj == 256)	/* XXX: tty_cons.c is magic */
370			devsw->d_maj = 0;
371		KASSERT(devsw->d_maj >= 0 && devsw->d_maj < 256,
372		    ("Invalid major (%d) in make_dev", devsw->d_maj));
373		if (reserved_majors[devsw->d_maj] != devsw->d_maj) {
374			printf("WARNING: driver \"%s\" used %s %d\n",
375			    devsw->d_name, "unreserved major device number",
376			    devsw->d_maj);
377			reserved_majors[devsw->d_maj] = devsw->d_maj;
378		}
379	}
380}
381
382dev_t
383make_dev(struct cdevsw *devsw, int minor, uid_t uid, gid_t gid, int perms,
384    const char *fmt, ...)
385{
386	dev_t dev;
387	va_list ap;
388	int i;
389
390	KASSERT((minor & ~0xffff00ff) == 0,
391	    ("Invalid minor (0x%x) in make_dev", minor));
392	prep_cdevsw(devsw);
393	dev = makedev(devsw->d_maj, minor);
394	if (dev->si_flags & SI_CHEAPCLONE &&
395	    dev->si_flags & SI_NAMED &&
396	    dev->si_devsw == devsw) {
397		/*
398		 * This is allowed as it removes races and generally
399		 * simplifies cloning devices.
400		 */
401		return (dev);
402	}
403	if (dev->si_flags & SI_NAMED) {
404		printf( "WARNING: Driver mistake: repeat make_dev(\"%s\")\n",
405		    dev->si_name);
406		panic("don't do that");
407	}
408	va_start(ap, fmt);
409	i = vsnrprintf(dev->__si_namebuf, sizeof dev->__si_namebuf, 32, fmt, ap);
410	if (i > (sizeof dev->__si_namebuf - 1)) {
411		printf("WARNING: Device name truncated! (%s)",
412		    dev->__si_namebuf);
413	}
414	va_end(ap);
415	dev->si_devsw = devsw;
416	dev->si_uid = uid;
417	dev->si_gid = gid;
418	dev->si_mode = perms;
419	dev->si_flags |= SI_NAMED;
420
421	devfs_create(dev);
422	return (dev);
423}
424
425int
426dev_named(dev_t pdev, const char *name)
427{
428	dev_t cdev;
429
430	if (strcmp(devtoname(pdev), name) == 0)
431		return (1);
432	LIST_FOREACH(cdev, &pdev->si_children, si_siblings)
433		if (strcmp(devtoname(cdev), name) == 0)
434			return (1);
435	return (0);
436}
437
438void
439dev_depends(dev_t pdev, dev_t cdev)
440{
441
442	cdev->si_parent = pdev;
443	cdev->si_flags |= SI_CHILD;
444	LIST_INSERT_HEAD(&pdev->si_children, cdev, si_siblings);
445}
446
447dev_t
448make_dev_alias(dev_t pdev, const char *fmt, ...)
449{
450	dev_t	dev;
451	va_list ap;
452	int i;
453
454	dev = allocdev();
455	dev->si_flags |= SI_ALIAS;
456	dev->si_flags |= SI_NAMED;
457	dev_depends(pdev, dev);
458	va_start(ap, fmt);
459	i = vsnrprintf(dev->__si_namebuf, sizeof dev->__si_namebuf, 32, fmt, ap);
460	if (i > (sizeof dev->__si_namebuf - 1)) {
461		printf("WARNING: Device name truncated! (%s)",
462		    dev->__si_namebuf);
463	}
464	va_end(ap);
465
466	devfs_create(dev);
467	return (dev);
468}
469
470void
471destroy_dev(dev_t dev)
472{
473
474	if (!(dev->si_flags & SI_NAMED)) {
475		printf( "WARNING: Driver mistake: destroy_dev on %d/%d\n",
476		    major(dev), minor(dev));
477		panic("don't do that");
478	}
479
480	devfs_destroy(dev);
481	dev->si_flags &= ~SI_NAMED;
482
483	if (dev->si_flags & SI_CHILD) {
484		LIST_REMOVE(dev, si_siblings);
485		dev->si_flags &= ~SI_CHILD;
486	}
487	while (!LIST_EMPTY(&dev->si_children))
488		destroy_dev(LIST_FIRST(&dev->si_children));
489	if (dev->si_flags & SI_CLONELIST) {
490		LIST_REMOVE(dev, si_clone);
491		dev->si_flags &= ~SI_CLONELIST;
492	}
493	dev->si_drv1 = 0;
494	dev->si_drv2 = 0;
495	dev->si_devsw = 0;
496	bzero(&dev->__si_u, sizeof(dev->__si_u));
497	dev->si_flags &= ~SI_ALIAS;
498	freedev(dev);
499}
500
501const char *
502devtoname(dev_t dev)
503{
504	char *p;
505	int mynor;
506
507	if (dev->si_name[0] == '#' || dev->si_name[0] == '\0') {
508		p = dev->si_name;
509		if (devsw(dev))
510			sprintf(p, "#%s/", devsw(dev)->d_name);
511		else
512			sprintf(p, "#%d/", major(dev));
513		p += strlen(p);
514		mynor = minor(dev);
515		if (mynor < 0 || mynor > 255)
516			sprintf(p, "%#x", (u_int)mynor);
517		else
518			sprintf(p, "%d", mynor);
519	}
520	return (dev->si_name);
521}
522
523int
524dev_stdclone(char *name, char **namep, const char *stem, int *unit)
525{
526	int u, i;
527
528	i = strlen(stem);
529	if (bcmp(stem, name, i) != 0)
530		return (0);
531	if (!isdigit(name[i]))
532		return (0);
533	u = 0;
534	if (name[i] == '0' && isdigit(name[i+1]))
535		return (0);
536	while (isdigit(name[i])) {
537		u *= 10;
538		u += name[i++] - '0';
539	}
540	if (u > 0xffffff)
541		return (0);
542	*unit = u;
543	if (namep)
544		*namep = &name[i];
545	if (name[i])
546		return (2);
547	return (1);
548}
549
550/*
551 * Helper functions for cloning device drivers.
552 *
553 * The objective here is to make it unnecessary for the device drivers to
554 * use rman or similar to manage their unit number space.  Due to the way
555 * we do "on-demand" devices, using rman or other "private" methods
556 * will be very tricky to lock down properly once we lock down this file.
557 *
558 * Instead we give the drivers these routines which puts the dev_t's that
559 * are to be managed on their own list, and gives the driver the ability
560 * to ask for the first free unit number or a given specified unit number.
561 *
562 * In addition these routines support paired devices (pty, nmdm and similar)
563 * by respecting a number of "flag" bits in the minor number.
564 *
565 */
566
567struct clonedevs {
568	LIST_HEAD(,cdev)	head;
569};
570
571int
572clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up, dev_t *dp, u_int extra)
573{
574	struct clonedevs *cd;
575	dev_t dev, dl, de;
576	int unit, low, u;
577
578	KASSERT(!(extra & CLONE_UNITMASK),
579	     ("Illegal extra bits (0x%x) in clone_create", extra));
580	KASSERT(*up <= CLONE_UNITMASK,
581	     ("Too high unit (0x%x) in clone_create", *up));
582
583	if (csw->d_maj == MAJOR_AUTO)
584		find_major(csw);
585	/* if clonedevs have not been initialized, we do it here */
586	cd = *cdp;
587	if (cd == NULL) {
588		cd = malloc(sizeof *cd, M_DEVBUF, M_WAITOK | M_ZERO);
589		LIST_INIT(&cd->head);
590		*cdp = cd;
591	}
592
593	/*
594	 * Search the list for a lot of things in one go:
595	 *   A preexisting match is returned immediately.
596	 *   The lowest free unit number if we are passed -1, and the place
597	 *	 in the list where we should insert that new element.
598	 *   The place to insert a specified unit number, if applicable
599	 *       the end of the list.
600	 */
601	unit = *up;
602	low = 0;
603	de = dl = NULL;
604	LIST_FOREACH(dev, &cd->head, si_clone) {
605		u = dev2unit(dev);
606		if (u == (unit | extra)) {
607			*dp = dev;
608			return (0);
609		}
610		if (unit == -1 && u == low) {
611			low++;
612			de = dev;
613			continue;
614		}
615		if (u > unit) {
616			dl = dev;
617			break;
618		}
619		de = dev;
620	}
621	if (unit == -1)
622		unit = low;
623	dev = makedev(csw->d_maj, unit2minor(unit | extra));
624	KASSERT(!(dev->si_flags & SI_CLONELIST),
625	    ("Dev %p should not be on clonelist", dev));
626	if (dl != NULL)
627		LIST_INSERT_BEFORE(dl, dev, si_clone);
628	else if (de != NULL)
629		LIST_INSERT_AFTER(de, dev, si_clone);
630	else
631		LIST_INSERT_HEAD(&cd->head, dev, si_clone);
632	dev->si_flags |= SI_CLONELIST;
633	*up = unit;
634	return (1);
635}
636
637/*
638 * Kill everything still on the list.  The driver should already have
639 * disposed of any softc hung of the dev_t's at this time.
640 */
641void
642clone_cleanup(struct clonedevs **cdp)
643{
644	dev_t dev, tdev;
645	struct clonedevs *cd;
646
647	cd = *cdp;
648	if (cd == NULL)
649		return;
650	LIST_FOREACH_SAFE(dev, &cd->head, si_clone, tdev) {
651		KASSERT(dev->si_flags & SI_NAMED,
652		    ("Driver has goofed in cloning underways udev %x", dev->si_udev));
653		destroy_dev(dev);
654	}
655	free(cd, M_DEVBUF);
656	*cdp = NULL;
657}
658
659/*
660 * Helper sysctl for devname(3).  We're given a {u}dev_t and return
661 * the name, if any, registered by the device driver.
662 */
663static int
664sysctl_devname(SYSCTL_HANDLER_ARGS)
665{
666	int error;
667	udev_t ud;
668	dev_t dev;
669
670	error = SYSCTL_IN(req, &ud, sizeof (ud));
671	if (error)
672		return (error);
673	if (ud == NOUDEV)
674		return(EINVAL);
675	dev = makedev(umajor(ud), uminor(ud));
676	if (dev->si_name[0] == '\0')
677		error = ENOENT;
678	else
679		error = SYSCTL_OUT(req, dev->si_name, strlen(dev->si_name) + 1);
680	freedev(dev);
681	return (error);
682}
683
684SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY,
685	NULL, 0, sysctl_devname, "", "devname(3) handler");
686