md.c revision 82435
1/*
2 * ----------------------------------------------------------------------------
3 * "THE BEER-WARE LICENSE" (Revision 42):
4 * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5 * can do whatever you want with this stuff. If we meet some day, and you think
6 * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7 * ----------------------------------------------------------------------------
8 *
9 * $FreeBSD: head/sys/dev/md/md.c 82435 2001-08-27 17:48:37Z sobomax $
10 *
11 */
12
13/*
14 * The following functions are based in the vn(4) driver: mdstart_swap(),
15 * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(),
16 * and as such under the following copyright:
17 *
18 * Copyright (c) 1988 University of Utah.
19 * Copyright (c) 1990, 1993
20 *	The Regents of the University of California.  All rights reserved.
21 *
22 * This code is derived from software contributed to Berkeley by
23 * the Systems Programming Group of the University of Utah Computer
24 * Science Department.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
28 * are met:
29 * 1. Redistributions of source code must retain the above copyright
30 *    notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must reproduce the above copyright
32 *    notice, this list of conditions and the following disclaimer in the
33 *    documentation and/or other materials provided with the distribution.
34 * 3. All advertising materials mentioning features or use of this software
35 *    must display the following acknowledgement:
36 *	This product includes software developed by the University of
37 *	California, Berkeley and its contributors.
38 * 4. Neither the name of the University nor the names of its contributors
39 *    may be used to endorse or promote products derived from this software
40 *    without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * from: Utah Hdr: vn.c 1.13 94/04/02
55 *
56 *	from: @(#)vn.c	8.6 (Berkeley) 4/1/94
57 * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03
58 */
59
60#include "opt_md.h"
61
62#include <sys/param.h>
63#include <sys/systm.h>
64#include <sys/bio.h>
65#include <sys/conf.h>
66#include <sys/devicestat.h>
67#include <sys/disk.h>
68#include <sys/fcntl.h>
69#include <sys/kernel.h>
70#include <sys/linker.h>
71#include <sys/lock.h>
72#include <sys/malloc.h>
73#include <sys/mdioctl.h>
74#include <sys/mutex.h>
75#include <sys/namei.h>
76#include <sys/proc.h>
77#include <sys/queue.h>
78#include <sys/sysctl.h>
79#include <sys/vnode.h>
80
81#include <machine/atomic.h>
82
83#include <vm/vm.h>
84#include <vm/vm_object.h>
85#include <vm/vm_page.h>
86#include <vm/vm_pager.h>
87#include <vm/vm_zone.h>
88#include <vm/swap_pager.h>
89
90#define MD_MODVER 1
91
92#ifndef MD_NSECT
93#define MD_NSECT (10000 * 2)
94#endif
95
96MALLOC_DEFINE(M_MD, "MD disk", "Memory Disk");
97MALLOC_DEFINE(M_MDSECT, "MD sectors", "Memory Disk Sectors");
98
99static int md_debug;
100SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, "");
101
102#if defined(MD_ROOT) && defined(MD_ROOT_SIZE)
103/* Image gets put here: */
104static u_char mfs_root[MD_ROOT_SIZE*1024] = "MFS Filesystem goes here";
105static u_char end_mfs_root[] __unused = "MFS Filesystem had better STOP here";
106#endif
107
108static int	mdrootready;
109static int	mdunits;
110static dev_t	status_dev = 0;
111
112
113#define CDEV_MAJOR	95
114
115static d_strategy_t mdstrategy;
116static d_open_t mdopen;
117static d_close_t mdclose;
118static d_ioctl_t mdioctl, mdctlioctl;
119
120static struct cdevsw md_cdevsw = {
121        /* open */      mdopen,
122        /* close */     mdclose,
123        /* read */      physread,
124        /* write */     physwrite,
125        /* ioctl */     mdioctl,
126        /* poll */      nopoll,
127        /* mmap */      nommap,
128        /* strategy */  mdstrategy,
129        /* name */      MD_NAME,
130        /* maj */       CDEV_MAJOR,
131        /* dump */      nodump,
132        /* psize */     nopsize,
133        /* flags */     D_DISK | D_CANFREE | D_MEMDISK,
134};
135
136static struct cdevsw mdctl_cdevsw = {
137        /* open */      nullopen,
138        /* close */     nullclose,
139        /* read */      noread,
140        /* write */     nowrite,
141        /* ioctl */     mdctlioctl,
142        /* poll */      nopoll,
143        /* mmap */      nommap,
144        /* strategy */  nostrategy,
145        /* name */      MD_NAME,
146        /* maj */       CDEV_MAJOR
147};
148
149static struct cdevsw mddisk_cdevsw;
150
151static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(&md_softc_list);
152
153struct md_s {
154	int unit;
155	LIST_ENTRY(md_s) list;
156	struct devstat stats;
157	struct bio_queue_head bio_queue;
158	struct disk disk;
159	dev_t dev;
160	int busy;
161	enum md_types type;
162	unsigned nsect;
163	unsigned opencount;
164	unsigned secsize;
165	unsigned flags;
166
167	/* MD_MALLOC related fields */
168	u_char **secp;
169
170	/* MD_PRELOAD related fields */
171	u_char *pl_ptr;
172	unsigned pl_len;
173
174	/* MD_VNODE related fields */
175	struct vnode *vnode;
176	struct ucred *cred;
177
178	/* MD_SWAP related fields */
179	vm_object_t object;
180};
181
182static int
183mdopen(dev_t dev, int flag, int fmt, struct proc *p)
184{
185	struct md_s *sc;
186	struct disklabel *dl;
187
188	if (md_debug)
189		printf("mdopen(%s %x %x %p)\n",
190			devtoname(dev), flag, fmt, p);
191
192	sc = dev->si_drv1;
193
194	dl = &sc->disk.d_label;
195	bzero(dl, sizeof(*dl));
196	dl->d_secsize = sc->secsize;
197	dl->d_nsectors = sc->nsect > 63 ? 63 : sc->nsect;
198	dl->d_ntracks = 1;
199	dl->d_secpercyl = dl->d_nsectors * dl->d_ntracks;
200	dl->d_secperunit = sc->nsect;
201	dl->d_ncylinders = dl->d_secperunit / dl->d_secpercyl;
202	sc->opencount++;
203	return (0);
204}
205
206static int
207mdclose(dev_t dev, int flags, int fmt, struct proc *p)
208{
209	struct md_s *sc = dev->si_drv1;
210
211	sc->opencount--;
212	return (0);
213}
214
215static int
216mdioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
217{
218
219	if (md_debug)
220		printf("mdioctl(%s %lx %p %x %p)\n",
221			devtoname(dev), cmd, addr, flags, p);
222
223	return (ENOIOCTL);
224}
225
226static void
227mdstart_malloc(struct md_s *sc)
228{
229	int i;
230	struct bio *bp;
231	devstat_trans_flags dop;
232	u_char *secp, **secpp, *dst;
233	unsigned secno, nsec, secval, uc;
234
235	for (;;) {
236		/* XXX: LOCK(unique unit numbers) */
237		bp = bioq_first(&sc->bio_queue);
238		if (bp)
239			bioq_remove(&sc->bio_queue, bp);
240		/* XXX: UNLOCK(unique unit numbers) */
241		if (!bp)
242			break;
243
244		devstat_start_transaction(&sc->stats);
245
246		if (bp->bio_cmd == BIO_DELETE)
247			dop = DEVSTAT_NO_DATA;
248		else if (bp->bio_cmd == BIO_READ)
249			dop = DEVSTAT_READ;
250		else
251			dop = DEVSTAT_WRITE;
252
253		nsec = bp->bio_bcount / sc->secsize;
254		secno = bp->bio_pblkno;
255		dst = bp->bio_data;
256		while (nsec--) {
257			secpp = &sc->secp[secno];
258			if ((uintptr_t)*secpp > 255) {
259				secp = *secpp;
260				secval = 0;
261			} else {
262				secp = NULL;
263				secval = (uintptr_t) *secpp;
264			}
265
266			if (md_debug > 2)
267				printf("%x %p %p %d\n",
268				    bp->bio_flags, secpp, secp, secval);
269
270			if (bp->bio_cmd == BIO_DELETE) {
271				if (!(sc->flags & MD_RESERVE) && secp != NULL) {
272					FREE(secp, M_MDSECT);
273					*secpp = 0;
274				}
275			} else if (bp->bio_cmd == BIO_READ) {
276				if (secp != NULL) {
277					bcopy(secp, dst, sc->secsize);
278				} else if (secval) {
279					for (i = 0; i < sc->secsize; i++)
280						dst[i] = secval;
281				} else {
282					bzero(dst, sc->secsize);
283				}
284			} else {
285				if (sc->flags & MD_COMPRESS) {
286					uc = dst[0];
287					for (i = 1; i < sc->secsize; i++)
288						if (dst[i] != uc)
289							break;
290				} else {
291					i = 0;
292					uc = 0;
293				}
294				if (i == sc->secsize) {
295					if (secp)
296						FREE(secp, M_MDSECT);
297					*secpp = (u_char *)(uintptr_t)uc;
298				} else {
299					if (secp == NULL)
300						MALLOC(secp, u_char *, sc->secsize, M_MDSECT, M_WAITOK);
301					bcopy(dst, secp, sc->secsize);
302					*secpp = secp;
303				}
304			}
305			secno++;
306			dst += sc->secsize;
307		}
308		bp->bio_resid = 0;
309		biofinish(bp, &sc->stats, 0);
310	}
311	return;
312}
313
314
315static void
316mdstart_preload(struct md_s *sc)
317{
318	struct bio *bp;
319	devstat_trans_flags dop;
320
321	for (;;) {
322		/* XXX: LOCK(unique unit numbers) */
323		bp = bioq_first(&sc->bio_queue);
324		if (bp)
325			bioq_remove(&sc->bio_queue, bp);
326		/* XXX: UNLOCK(unique unit numbers) */
327		if (!bp)
328			break;
329
330		devstat_start_transaction(&sc->stats);
331
332		if (bp->bio_cmd == BIO_DELETE) {
333			dop = DEVSTAT_NO_DATA;
334		} else if (bp->bio_cmd == BIO_READ) {
335			dop = DEVSTAT_READ;
336			bcopy(sc->pl_ptr + (bp->bio_pblkno << DEV_BSHIFT), bp->bio_data, bp->bio_bcount);
337		} else {
338			dop = DEVSTAT_WRITE;
339			bcopy(bp->bio_data, sc->pl_ptr + (bp->bio_pblkno << DEV_BSHIFT), bp->bio_bcount);
340		}
341		bp->bio_resid = 0;
342		biofinish(bp, &sc->stats, 0);
343	}
344	return;
345}
346
347static void
348mdstart_vnode(struct md_s *sc)
349{
350	int error;
351	struct bio *bp;
352	struct uio auio;
353	struct iovec aiov;
354	struct mount *mp;
355
356	/*
357	 * VNODE I/O
358	 *
359	 * If an error occurs, we set BIO_ERROR but we do not set
360	 * B_INVAL because (for a write anyway), the buffer is
361	 * still valid.
362	 */
363
364	for (;;) {
365		/* XXX: LOCK(unique unit numbers) */
366		bp = bioq_first(&sc->bio_queue);
367		if (bp)
368			bioq_remove(&sc->bio_queue, bp);
369		/* XXX: UNLOCK(unique unit numbers) */
370		if (!bp)
371			break;
372
373		devstat_start_transaction(&sc->stats);
374
375		bzero(&auio, sizeof(auio));
376
377		aiov.iov_base = bp->bio_data;
378		aiov.iov_len = bp->bio_bcount;
379		auio.uio_iov = &aiov;
380		auio.uio_iovcnt = 1;
381		auio.uio_offset = (vm_ooffset_t)bp->bio_pblkno * sc->secsize;
382		auio.uio_segflg = UIO_SYSSPACE;
383		if(bp->bio_cmd == BIO_READ)
384			auio.uio_rw = UIO_READ;
385		else
386			auio.uio_rw = UIO_WRITE;
387		auio.uio_resid = bp->bio_bcount;
388		auio.uio_procp = curproc;
389		if (VOP_ISLOCKED(sc->vnode, NULL))
390			vprint("unexpected md driver lock", sc->vnode);
391		if (bp->bio_cmd == BIO_READ) {
392			vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curproc);
393			error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
394		} else {
395			(void) vn_start_write(sc->vnode, &mp, V_WAIT);
396			vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curproc);
397			error = VOP_WRITE(sc->vnode, &auio, 0, sc->cred);
398			vn_finished_write(mp);
399		}
400		VOP_UNLOCK(sc->vnode, 0, curproc);
401		bp->bio_resid = auio.uio_resid;
402		biofinish(bp, &sc->stats, error);
403	}
404	return;
405}
406
407static void
408mdstart_swap(struct md_s *sc)
409{
410	struct bio *bp;
411
412	for (;;) {
413		/* XXX: LOCK(unique unit numbers) */
414		bp = bioq_first(&sc->bio_queue);
415		if (bp)
416			bioq_remove(&sc->bio_queue, bp);
417		/* XXX: UNLOCK(unique unit numbers) */
418		if (!bp)
419			break;
420
421#if 0
422		devstat_start_transaction(&sc->stats);
423#endif
424
425		if ((bp->bio_cmd == BIO_DELETE) && (sc->flags & MD_RESERVE))
426			biodone(bp);
427		else
428			vm_pager_strategy(sc->object, bp);
429
430#if 0
431		devstat_end_transaction_bio(&sc->stats, bp);
432#endif
433	}
434	return;
435}
436
437static void
438mdstrategy(struct bio *bp)
439{
440	struct md_s *sc;
441
442	if (md_debug > 1)
443		printf("mdstrategy(%p) %s %x, %d, %ld, %p)\n",
444		    bp, devtoname(bp->bio_dev), bp->bio_flags, bp->bio_blkno,
445		    bp->bio_bcount / DEV_BSIZE, bp->bio_data);
446
447	sc = bp->bio_dev->si_drv1;
448
449	/* XXX: LOCK(sc->lock) */
450	bioqdisksort(&sc->bio_queue, bp);
451	/* XXX: UNLOCK(sc->lock) */
452
453	if (atomic_cmpset_int(&sc->busy, 0, 1) == 0)
454		return;
455
456	switch (sc->type) {
457	case MD_MALLOC:
458		mdstart_malloc(sc);
459		break;
460	case MD_PRELOAD:
461		mdstart_preload(sc);
462		break;
463	case MD_VNODE:
464		mdstart_vnode(sc);
465		break;
466	case MD_SWAP:
467		mdstart_swap(sc);
468		break;
469	default:
470		panic("Impossible md(type)");
471		break;
472	}
473	sc->busy = 0;
474}
475
476static struct md_s *
477mdfind(int unit)
478{
479	struct md_s *sc;
480
481	/* XXX: LOCK(unique unit numbers) */
482	LIST_FOREACH(sc, &md_softc_list, list) {
483		if (sc->unit == unit)
484			break;
485	}
486	/* XXX: UNLOCK(unique unit numbers) */
487	return (sc);
488}
489
490static struct md_s *
491mdnew(int unit)
492{
493	struct md_s *sc;
494	int max = -1;
495
496	/* XXX: LOCK(unique unit numbers) */
497	LIST_FOREACH(sc, &md_softc_list, list) {
498		if (sc->unit == unit) {
499			/* XXX: UNLOCK(unique unit numbers) */
500			return (NULL);
501		}
502		if (sc->unit > max)
503			max = sc->unit;
504	}
505	if (unit == -1)
506		unit = max + 1;
507	if (unit > DKMAXUNIT)
508		return (NULL);
509	MALLOC(sc, struct md_s *, sizeof(*sc), M_MD, M_WAITOK | M_ZERO);
510	sc->unit = unit;
511	LIST_INSERT_HEAD(&md_softc_list, sc, list);
512	/* XXX: UNLOCK(unique unit numbers) */
513	return (sc);
514}
515
516static void
517mdinit(struct md_s *sc)
518{
519
520	bioq_init(&sc->bio_queue);
521	devstat_add_entry(&sc->stats, MD_NAME, sc->unit, sc->secsize,
522		DEVSTAT_NO_ORDERED_TAGS,
523		DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
524		DEVSTAT_PRIORITY_OTHER);
525	sc->dev = disk_create(sc->unit, &sc->disk, 0, &md_cdevsw, &mddisk_cdevsw);
526	sc->dev->si_drv1 = sc;
527}
528
529/*
530 * XXX: we should check that the range they feed us is mapped.
531 * XXX: we should implement read-only.
532 */
533
534static int
535mdcreate_preload(struct md_ioctl *mdio)
536{
537	struct md_s *sc;
538
539	if (mdio->md_size == 0)
540		return (EINVAL);
541	if (mdio->md_options & ~(MD_AUTOUNIT))
542		return (EINVAL);
543	if (mdio->md_options & MD_AUTOUNIT) {
544		sc = mdnew(-1);
545		if (sc == NULL)
546			return (ENOMEM);
547		mdio->md_unit = sc->unit;
548	} else {
549		sc = mdnew(mdio->md_unit);
550		if (sc == NULL)
551			return (EBUSY);
552	}
553	sc->type = MD_PRELOAD;
554	sc->secsize = DEV_BSIZE;
555	sc->nsect = mdio->md_size;
556	sc->flags = mdio->md_options & MD_FORCE;
557	/* Cast to pointer size, then to pointer to avoid warning */
558	sc->pl_ptr = (u_char *)(uintptr_t)mdio->md_base;
559	sc->pl_len = (mdio->md_size << DEV_BSHIFT);
560	mdinit(sc);
561	return (0);
562}
563
564
565static int
566mdcreate_malloc(struct md_ioctl *mdio)
567{
568	struct md_s *sc;
569	unsigned u;
570
571	if (mdio->md_size == 0)
572		return (EINVAL);
573	if (mdio->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE))
574		return (EINVAL);
575	/* Compression doesn't make sense if we have reserved space */
576	if (mdio->md_options & MD_RESERVE)
577		mdio->md_options &= ~MD_COMPRESS;
578	if (mdio->md_options & MD_AUTOUNIT) {
579		sc = mdnew(-1);
580		if (sc == NULL)
581			return (ENOMEM);
582		mdio->md_unit = sc->unit;
583	} else {
584		sc = mdnew(mdio->md_unit);
585		if (sc == NULL)
586			return (EBUSY);
587	}
588	sc->type = MD_MALLOC;
589	sc->secsize = DEV_BSIZE;
590	sc->nsect = mdio->md_size;
591	sc->flags = mdio->md_options & (MD_COMPRESS | MD_FORCE);
592	MALLOC(sc->secp, u_char **, sc->nsect * sizeof(u_char *), M_MD, M_WAITOK | M_ZERO);
593	if (mdio->md_options & MD_RESERVE) {
594		for (u = 0; u < sc->nsect; u++)
595			MALLOC(sc->secp[u], u_char *, DEV_BSIZE, M_MDSECT, M_WAITOK | M_ZERO);
596	}
597	printf("%s%d: Malloc disk\n", MD_NAME, sc->unit);
598	mdinit(sc);
599	return (0);
600}
601
602
603static int
604mdsetcred(struct md_s *sc, struct ucred *cred)
605{
606	char *tmpbuf;
607	int error = 0;
608
609	/*
610	 * Set credits in our softc
611	 */
612
613	if (sc->cred)
614		crfree(sc->cred);
615	sc->cred = crdup(cred);
616
617	/*
618	 * Horrible kludge to establish credentials for NFS  XXX.
619	 */
620
621	if (sc->vnode) {
622		struct uio auio;
623		struct iovec aiov;
624
625		tmpbuf = malloc(sc->secsize, M_TEMP, M_WAITOK);
626		bzero(&auio, sizeof(auio));
627
628		aiov.iov_base = tmpbuf;
629		aiov.iov_len = sc->secsize;
630		auio.uio_iov = &aiov;
631		auio.uio_iovcnt = 1;
632		auio.uio_offset = 0;
633		auio.uio_rw = UIO_READ;
634		auio.uio_segflg = UIO_SYSSPACE;
635		auio.uio_resid = aiov.iov_len;
636		vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curproc);
637		error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
638		VOP_UNLOCK(sc->vnode, 0, curproc);
639		free(tmpbuf, M_TEMP);
640	}
641	return (error);
642}
643
644static int
645mdcreate_vnode(struct md_ioctl *mdio, struct proc *p)
646{
647	struct md_s *sc;
648	struct vattr vattr;
649	struct nameidata nd;
650	int error, flags;
651
652	if (mdio->md_options & MD_AUTOUNIT) {
653		sc = mdnew(-1);
654		mdio->md_unit = sc->unit;
655	} else {
656		sc = mdnew(mdio->md_unit);
657	}
658	if (sc == NULL)
659		return (EBUSY);
660
661	sc->type = MD_VNODE;
662	sc->flags = mdio->md_options & MD_FORCE;
663
664	flags = FREAD|FWRITE;
665	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, mdio->md_file, p);
666	error = vn_open(&nd, &flags, 0);
667	if (error) {
668		if (error != EACCES && error != EPERM && error != EROFS)
669			return (error);
670		flags &= ~FWRITE;
671		sc->flags |= MD_READONLY;
672		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, mdio->md_file, p);
673		error = vn_open(&nd, &flags, 0);
674		if (error)
675			return (error);
676	}
677	NDFREE(&nd, NDF_ONLY_PNBUF);
678	if (nd.ni_vp->v_type != VREG ||
679	    (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p))) {
680		VOP_UNLOCK(nd.ni_vp, 0, p);
681		(void) vn_close(nd.ni_vp, flags, p->p_ucred, p);
682		return (error ? error : EINVAL);
683	}
684	VOP_UNLOCK(nd.ni_vp, 0, p);
685	sc->secsize = DEV_BSIZE;
686	sc->vnode = nd.ni_vp;
687
688	/*
689	 * If the size is specified, override the file attributes.
690	 */
691	if (mdio->md_size)
692		sc->nsect = mdio->md_size;
693	else
694		sc->nsect = vattr.va_size / sc->secsize; /* XXX: round up ? */
695	if (sc->nsect == 0) {
696		(void) vn_close(nd.ni_vp, flags, p->p_ucred, p);
697		return (EINVAL);
698	}
699	error = mdsetcred(sc, p->p_ucred);
700	if (error) {
701		(void) vn_close(nd.ni_vp, flags, p->p_ucred, p);
702		return (error);
703	}
704	mdinit(sc);
705	return (0);
706}
707
708static int
709mddestroy(struct md_s *sc, struct proc *p)
710{
711	unsigned u;
712
713	GIANT_REQUIRED;
714
715	if (sc->dev != NULL) {
716		devstat_remove_entry(&sc->stats);
717		disk_destroy(sc->dev);
718	}
719	if (sc->vnode != NULL)
720		(void)vn_close(sc->vnode, sc->flags & MD_READONLY ?
721		    FREAD : (FREAD|FWRITE), sc->cred, p);
722	if (sc->cred != NULL)
723		crfree(sc->cred);
724	if (sc->object != NULL) {
725		vm_pager_deallocate(sc->object);
726	}
727	if (sc->secp != NULL) {
728		for (u = 0; u < sc->nsect; u++)
729			if ((uintptr_t)sc->secp[u] > 255)
730				FREE(sc->secp[u], M_MDSECT);
731		FREE(sc->secp, M_MD);
732	}
733
734	/* XXX: LOCK(unique unit numbers) */
735	LIST_REMOVE(sc, list);
736	/* XXX: UNLOCK(unique unit numbers) */
737	FREE(sc, M_MD);
738	return (0);
739}
740
741static int
742mdcreate_swap(struct md_ioctl *mdio, struct proc *p)
743{
744	int error;
745	struct md_s *sc;
746
747	GIANT_REQUIRED;
748
749	if (mdio->md_options & MD_AUTOUNIT) {
750		sc = mdnew(-1);
751		mdio->md_unit = sc->unit;
752	} else {
753		sc = mdnew(mdio->md_unit);
754	}
755	if (sc == NULL)
756		return (EBUSY);
757
758	sc->type = MD_SWAP;
759
760	/*
761	 * Range check.  Disallow negative sizes or any size less then the
762	 * size of a page.  Then round to a page.
763	 */
764
765	if (mdio->md_size == 0) {
766		mddestroy(sc, p);
767		return (EDOM);
768	}
769
770	/*
771	 * Allocate an OBJT_SWAP object.
772	 *
773	 * sc_secsize is PAGE_SIZE'd
774	 *
775	 * mdio->size is in DEV_BSIZE'd chunks.
776	 * Note the truncation.
777	 */
778
779	sc->secsize = PAGE_SIZE;
780	sc->nsect = mdio->md_size / (PAGE_SIZE / DEV_BSIZE);
781	sc->object = vm_pager_allocate(OBJT_SWAP, NULL, sc->secsize * (vm_offset_t)sc->nsect, VM_PROT_DEFAULT, 0);
782	sc->flags = mdio->md_options & MD_FORCE;
783	if (mdio->md_options & MD_RESERVE) {
784		if (swap_pager_reserve(sc->object, 0, sc->nsect) < 0) {
785			vm_pager_deallocate(sc->object);
786			sc->object = NULL;
787			mddestroy(sc, p);
788			return (EDOM);
789		}
790	}
791	error = mdsetcred(sc, p->p_ucred);
792	if (error)
793		mddestroy(sc, p);
794	else
795		mdinit(sc);
796	return (error);
797}
798
799static int
800mddetach(int unit, struct proc *p)
801{
802	struct md_s *sc;
803
804	sc = mdfind(unit);
805	if (sc == NULL)
806		return (ENOENT);
807	if (sc->opencount != 0 && !(sc->flags & MD_FORCE))
808		return (EBUSY);
809	switch(sc->type) {
810	case MD_VNODE:
811	case MD_SWAP:
812	case MD_MALLOC:
813	case MD_PRELOAD:
814		return (mddestroy(sc, p));
815	default:
816		return (EOPNOTSUPP);
817	}
818}
819
820static int
821mdctlioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
822{
823	struct md_ioctl *mdio;
824	struct md_s *sc;
825
826	if (md_debug)
827		printf("mdctlioctl(%s %lx %p %x %p)\n",
828			devtoname(dev), cmd, addr, flags, p);
829
830	mdio = (struct md_ioctl *)addr;
831	switch (cmd) {
832	case MDIOCATTACH:
833		switch (mdio->md_type) {
834		case MD_MALLOC:
835			return (mdcreate_malloc(mdio));
836		case MD_PRELOAD:
837			return (mdcreate_preload(mdio));
838		case MD_VNODE:
839			return (mdcreate_vnode(mdio, p));
840		case MD_SWAP:
841			return (mdcreate_swap(mdio, p));
842		default:
843			return (EINVAL);
844		}
845	case MDIOCDETACH:
846		if (mdio->md_file != NULL || mdio->md_size != 0 ||
847		    mdio->md_options != 0)
848			return (EINVAL);
849		return (mddetach(mdio->md_unit, p));
850	case MDIOCQUERY:
851		sc = mdfind(mdio->md_unit);
852		if (sc == NULL)
853			return (ENOENT);
854		mdio->md_type = sc->type;
855		mdio->md_options = sc->flags;
856		switch (sc->type) {
857		case MD_MALLOC:
858			mdio->md_size = sc->nsect;
859			break;
860		case MD_PRELOAD:
861			mdio->md_size = sc->nsect;
862			(u_char *)(uintptr_t)mdio->md_base = sc->pl_ptr;
863			break;
864		case MD_SWAP:
865			mdio->md_size = sc->nsect * (PAGE_SIZE / DEV_BSIZE);
866			break;
867		case MD_VNODE:
868			mdio->md_size = sc->nsect;
869			/* XXX fill this in */
870			mdio->md_file = NULL;
871			break;
872		}
873		return (0);
874	default:
875		return (ENOIOCTL);
876	};
877	return (ENOIOCTL);
878}
879
880static void
881md_preloaded(u_char *image, unsigned length)
882{
883	struct md_s *sc;
884
885	sc = mdnew(-1);
886	if (sc == NULL)
887		return;
888	sc->type = MD_PRELOAD;
889	sc->secsize = DEV_BSIZE;
890	sc->nsect = length / DEV_BSIZE;
891	sc->pl_ptr = image;
892	sc->pl_len = length;
893	if (sc->unit == 0)
894		mdrootready = 1;
895	mdinit(sc);
896}
897
898static void
899md_drvinit(void *unused)
900{
901
902	caddr_t mod;
903	caddr_t c;
904	u_char *ptr, *name, *type;
905	unsigned len;
906
907#ifdef MD_ROOT_SIZE
908	md_preloaded(mfs_root, MD_ROOT_SIZE*1024);
909#endif
910	mod = NULL;
911	while ((mod = preload_search_next_name(mod)) != NULL) {
912		name = (char *)preload_search_info(mod, MODINFO_NAME);
913		type = (char *)preload_search_info(mod, MODINFO_TYPE);
914		if (name == NULL)
915			continue;
916		if (type == NULL)
917			continue;
918		if (strcmp(type, "md_image") && strcmp(type, "mfs_root"))
919			continue;
920		c = preload_search_info(mod, MODINFO_ADDR);
921		ptr = *(u_char **)c;
922		c = preload_search_info(mod, MODINFO_SIZE);
923		len = *(unsigned *)c;
924		printf("%s%d: Preloaded image <%s> %d bytes at %p\n",
925		    MD_NAME, mdunits, name, len, ptr);
926		md_preloaded(ptr, len);
927	}
928	status_dev = make_dev(&mdctl_cdevsw, 0xffff00ff, UID_ROOT, GID_WHEEL,
929	    0600, MDCTL_NAME);
930}
931
932static int
933md_modevent(module_t mod, int type, void *data)
934{
935	int error;
936	struct md_s *sc;
937
938	switch (type) {
939	case MOD_LOAD:
940		md_drvinit(NULL);
941		break;
942	case MOD_UNLOAD:
943		LIST_FOREACH(sc, &md_softc_list, list) {
944			error = mddetach(sc->unit, curproc);
945			if (error != 0)
946				return (error);
947		}
948		if (status_dev)
949			destroy_dev(status_dev);
950		status_dev = 0;
951		break;
952	default:
953		break;
954	}
955	return (0);
956}
957
958static moduledata_t md_mod = {
959	MD_NAME,
960	md_modevent,
961	NULL
962};
963DECLARE_MODULE(md, md_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+CDEV_MAJOR);
964MODULE_VERSION(md, MD_MODVER);
965
966
967#ifdef MD_ROOT
968static void
969md_takeroot(void *junk)
970{
971	if (mdrootready)
972		rootdevnames[0] = "ufs:/dev/md0c";
973}
974
975SYSINIT(md_root, SI_SUB_MOUNT_ROOT, SI_ORDER_FIRST, md_takeroot, NULL);
976#endif
977