Deleted Added
full compact
md.c (98747) md.c (102291)
1/*
2 * ----------------------------------------------------------------------------
3 * "THE BEER-WARE LICENSE" (Revision 42):
4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
5 * can do whatever you want with this stuff. If we meet some day, and you think
6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
7 * ----------------------------------------------------------------------------
8 *
1/*
2 * ----------------------------------------------------------------------------
3 * "THE BEER-WARE LICENSE" (Revision 42):
4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
5 * can do whatever you want with this stuff. If we meet some day, and you think
6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
7 * ----------------------------------------------------------------------------
8 *
9 * $FreeBSD: head/sys/dev/md/md.c 98747 2002-06-24 12:07:02Z mux $
9 * $FreeBSD: head/sys/dev/md/md.c 102291 2002-08-22 21:24:01Z archie $
10 *
11 */
12
13/*
14 * The following functions are based in the vn(4) driver: mdstart_swap(),
15 * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(),
16 * and as such under the following copyright:
17 *
18 * Copyright (c) 1988 University of Utah.
19 * Copyright (c) 1990, 1993
20 * The Regents of the University of California. All rights reserved.
21 *
22 * This code is derived from software contributed to Berkeley by
23 * the Systems Programming Group of the University of Utah Computer
24 * Science Department.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
28 * are met:
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must reproduce the above copyright
32 * notice, this list of conditions and the following disclaimer in the
33 * documentation and/or other materials provided with the distribution.
34 * 3. All advertising materials mentioning features or use of this software
35 * must display the following acknowledgement:
36 * This product includes software developed by the University of
37 * California, Berkeley and its contributors.
38 * 4. Neither the name of the University nor the names of its contributors
39 * may be used to endorse or promote products derived from this software
40 * without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * from: Utah Hdr: vn.c 1.13 94/04/02
55 *
56 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94
57 * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03
58 */
59
60#include "opt_md.h"
61
62#include <sys/param.h>
63#include <sys/systm.h>
64#include <sys/bio.h>
65#include <sys/conf.h>
66#include <sys/devicestat.h>
67#include <sys/disk.h>
68#include <sys/fcntl.h>
69#include <sys/kernel.h>
70#include <sys/kthread.h>
71#include <sys/linker.h>
72#include <sys/lock.h>
73#include <sys/malloc.h>
74#include <sys/mdioctl.h>
75#include <sys/mutex.h>
76#include <sys/namei.h>
77#include <sys/proc.h>
78#include <sys/queue.h>
79#include <sys/stdint.h>
80#include <sys/sysctl.h>
81#include <sys/vnode.h>
82
83#include <vm/vm.h>
84#include <vm/vm_object.h>
85#include <vm/vm_page.h>
86#include <vm/vm_pager.h>
87#include <vm/swap_pager.h>
88#include <vm/uma.h>
89
90#define MD_MODVER 1
91
92#define MD_SHUTDOWN 0x10000 /* Tell worker thread to terminate. */
93
94#ifndef MD_NSECT
95#define MD_NSECT (10000 * 2)
96#endif
97
98static MALLOC_DEFINE(M_MD, "MD disk", "Memory Disk");
99static MALLOC_DEFINE(M_MDSECT, "MD sectors", "Memory Disk Sectors");
100
101static int md_debug;
102SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, "");
103
104#if defined(MD_ROOT) && defined(MD_ROOT_SIZE)
105/* Image gets put here: */
106static u_char mfs_root[MD_ROOT_SIZE*1024] = "MFS Filesystem goes here";
107static u_char end_mfs_root[] __unused = "MFS Filesystem had better STOP here";
108#endif
109
110static int mdrootready;
111static int mdunits;
112static dev_t status_dev = 0;
113
114#define CDEV_MAJOR 95
115
116static d_strategy_t mdstrategy;
117static d_open_t mdopen;
118static d_close_t mdclose;
119static d_ioctl_t mdioctl, mdctlioctl;
120
121static struct cdevsw md_cdevsw = {
122 /* open */ mdopen,
123 /* close */ mdclose,
124 /* read */ physread,
125 /* write */ physwrite,
126 /* ioctl */ mdioctl,
127 /* poll */ nopoll,
128 /* mmap */ nommap,
129 /* strategy */ mdstrategy,
130 /* name */ MD_NAME,
131 /* maj */ CDEV_MAJOR,
132 /* dump */ nodump,
133 /* psize */ nopsize,
134 /* flags */ D_DISK | D_CANFREE | D_MEMDISK,
135};
136
137static struct cdevsw mdctl_cdevsw = {
138 /* open */ nullopen,
139 /* close */ nullclose,
140 /* read */ noread,
141 /* write */ nowrite,
142 /* ioctl */ mdctlioctl,
143 /* poll */ nopoll,
144 /* mmap */ nommap,
145 /* strategy */ nostrategy,
146 /* name */ MD_NAME,
147 /* maj */ CDEV_MAJOR
148};
149
150static struct cdevsw mddisk_cdevsw;
151
152static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(&md_softc_list);
153
154#define NINDIR (PAGE_SIZE / sizeof(uintptr_t))
155#define NMASK (NINDIR-1)
156static int nshift;
157
158struct indir {
159 uintptr_t *array;
160 uint total;
161 uint used;
162 uint shift;
163};
164
165struct md_s {
166 int unit;
167 LIST_ENTRY(md_s) list;
168 struct devstat stats;
169 struct bio_queue_head bio_queue;
170 struct disk disk;
171 dev_t dev;
172 enum md_types type;
173 unsigned nsect;
174 unsigned opencount;
175 unsigned secsize;
176 unsigned flags;
177 char name[20];
178 struct proc *procp;
179
180 /* MD_MALLOC related fields */
181 struct indir *indir;
182 uma_zone_t uma;
183
184 /* MD_PRELOAD related fields */
185 u_char *pl_ptr;
186 unsigned pl_len;
187
188 /* MD_VNODE related fields */
189 struct vnode *vnode;
190 struct ucred *cred;
191
192 /* MD_SWAP related fields */
193 vm_object_t object;
194};
195
196static int mddestroy(struct md_s *sc, struct thread *td);
197
198static struct indir *
199new_indir(uint shift)
200{
201 struct indir *ip;
202
203 ip = malloc(sizeof *ip, M_MD, M_NOWAIT | M_ZERO);
204 if (ip == NULL)
205 return (NULL);
206 ip->array = malloc(sizeof(uintptr_t) * NINDIR,
207 M_MDSECT, M_NOWAIT | M_ZERO);
208 if (ip->array == NULL) {
209 free(ip, M_MD);
210 return (NULL);
211 }
212 ip->total = NINDIR;
213 ip->shift = shift;
214 return (ip);
215}
216
217static void
218del_indir(struct indir *ip)
219{
220
221 free(ip->array, M_MDSECT);
222 free(ip, M_MD);
223}
224
225static void
226destroy_indir(struct md_s *sc, struct indir *ip)
227{
228 int i;
229
230 for (i = 0; i < NINDIR; i++) {
231 if (!ip->array[i])
232 continue;
233 if (ip->shift)
234 destroy_indir(sc, (struct indir*)(ip->array[i]));
235 else if (ip->array[i] > 255)
236 uma_zfree(sc->uma, (void *)(ip->array[i]));
237 }
238 del_indir(ip);
239}
240
241/*
242 * This function does the math and alloctes the top level "indir" structure
243 * for a device of "size" sectors.
244 */
245
246static struct indir *
247dimension(off_t size)
248{
249 off_t rcnt;
250 struct indir *ip;
251 int i, layer;
252
253 rcnt = size;
254 layer = 0;
255 while (rcnt > NINDIR) {
256 rcnt /= NINDIR;
257 layer++;
258 }
259 /* figure out log2(NINDIR) */
260 for (i = NINDIR, nshift = -1; i; nshift++)
261 i >>= 1;
262
263 /*
264 * XXX: the top layer is probably not fully populated, so we allocate
265 * too much space for ip->array in new_indir() here.
266 */
267 ip = new_indir(layer * nshift);
268 return (ip);
269}
270
271/*
272 * Read a given sector
273 */
274
275static uintptr_t
276s_read(struct indir *ip, off_t offset)
277{
278 struct indir *cip;
279 int idx;
280 uintptr_t up;
281
282 if (md_debug > 1)
283 printf("s_read(%jd)\n", (intmax_t)offset);
284 up = 0;
285 for (cip = ip; cip != NULL;) {
286 if (cip->shift) {
287 idx = (offset >> cip->shift) & NMASK;
288 up = cip->array[idx];
289 cip = (struct indir *)up;
290 continue;
291 }
292 idx = offset & NMASK;
293 return (cip->array[idx]);
294 }
295 return (0);
296}
297
298/*
299 * Write a given sector, prune the tree if the value is 0
300 */
301
302static int
303s_write(struct indir *ip, off_t offset, uintptr_t ptr)
304{
305 struct indir *cip, *lip[10];
306 int idx, li;
307 uintptr_t up;
308
309 if (md_debug > 1)
310 printf("s_write(%jd, %p)\n", (intmax_t)offset, (void *)ptr);
311 up = 0;
312 li = 0;
313 cip = ip;
314 for (;;) {
315 lip[li++] = cip;
316 if (cip->shift) {
317 idx = (offset >> cip->shift) & NMASK;
318 up = cip->array[idx];
319 if (up != 0) {
320 cip = (struct indir *)up;
321 continue;
322 }
323 /* Allocate branch */
324 cip->array[idx] =
325 (uintptr_t)new_indir(cip->shift - nshift);
326 if (cip->array[idx] == 0)
327 return (ENOMEM);
328 cip->used++;
329 up = cip->array[idx];
330 cip = (struct indir *)up;
331 continue;
332 }
333 /* leafnode */
334 idx = offset & NMASK;
335 up = cip->array[idx];
336 if (up != 0)
337 cip->used--;
338 cip->array[idx] = ptr;
339 if (ptr != 0)
340 cip->used++;
341 break;
342 }
343 if (cip->used != 0 || li == 1)
344 return (0);
345 li--;
346 while (cip->used == 0 && cip != ip) {
347 li--;
348 idx = (offset >> lip[li]->shift) & NMASK;
349 up = lip[li]->array[idx];
350 KASSERT(up == (uintptr_t)cip, ("md screwed up"));
351 del_indir(cip);
10 *
11 */
12
13/*
14 * The following functions are based in the vn(4) driver: mdstart_swap(),
15 * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(),
16 * and as such under the following copyright:
17 *
18 * Copyright (c) 1988 University of Utah.
19 * Copyright (c) 1990, 1993
20 * The Regents of the University of California. All rights reserved.
21 *
22 * This code is derived from software contributed to Berkeley by
23 * the Systems Programming Group of the University of Utah Computer
24 * Science Department.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
28 * are met:
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must reproduce the above copyright
32 * notice, this list of conditions and the following disclaimer in the
33 * documentation and/or other materials provided with the distribution.
34 * 3. All advertising materials mentioning features or use of this software
35 * must display the following acknowledgement:
36 * This product includes software developed by the University of
37 * California, Berkeley and its contributors.
38 * 4. Neither the name of the University nor the names of its contributors
39 * may be used to endorse or promote products derived from this software
40 * without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * from: Utah Hdr: vn.c 1.13 94/04/02
55 *
56 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94
57 * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03
58 */
59
60#include "opt_md.h"
61
62#include <sys/param.h>
63#include <sys/systm.h>
64#include <sys/bio.h>
65#include <sys/conf.h>
66#include <sys/devicestat.h>
67#include <sys/disk.h>
68#include <sys/fcntl.h>
69#include <sys/kernel.h>
70#include <sys/kthread.h>
71#include <sys/linker.h>
72#include <sys/lock.h>
73#include <sys/malloc.h>
74#include <sys/mdioctl.h>
75#include <sys/mutex.h>
76#include <sys/namei.h>
77#include <sys/proc.h>
78#include <sys/queue.h>
79#include <sys/stdint.h>
80#include <sys/sysctl.h>
81#include <sys/vnode.h>
82
83#include <vm/vm.h>
84#include <vm/vm_object.h>
85#include <vm/vm_page.h>
86#include <vm/vm_pager.h>
87#include <vm/swap_pager.h>
88#include <vm/uma.h>
89
90#define MD_MODVER 1
91
92#define MD_SHUTDOWN 0x10000 /* Tell worker thread to terminate. */
93
94#ifndef MD_NSECT
95#define MD_NSECT (10000 * 2)
96#endif
97
98static MALLOC_DEFINE(M_MD, "MD disk", "Memory Disk");
99static MALLOC_DEFINE(M_MDSECT, "MD sectors", "Memory Disk Sectors");
100
101static int md_debug;
102SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, "");
103
104#if defined(MD_ROOT) && defined(MD_ROOT_SIZE)
105/* Image gets put here: */
106static u_char mfs_root[MD_ROOT_SIZE*1024] = "MFS Filesystem goes here";
107static u_char end_mfs_root[] __unused = "MFS Filesystem had better STOP here";
108#endif
109
110static int mdrootready;
111static int mdunits;
112static dev_t status_dev = 0;
113
114#define CDEV_MAJOR 95
115
116static d_strategy_t mdstrategy;
117static d_open_t mdopen;
118static d_close_t mdclose;
119static d_ioctl_t mdioctl, mdctlioctl;
120
121static struct cdevsw md_cdevsw = {
122 /* open */ mdopen,
123 /* close */ mdclose,
124 /* read */ physread,
125 /* write */ physwrite,
126 /* ioctl */ mdioctl,
127 /* poll */ nopoll,
128 /* mmap */ nommap,
129 /* strategy */ mdstrategy,
130 /* name */ MD_NAME,
131 /* maj */ CDEV_MAJOR,
132 /* dump */ nodump,
133 /* psize */ nopsize,
134 /* flags */ D_DISK | D_CANFREE | D_MEMDISK,
135};
136
137static struct cdevsw mdctl_cdevsw = {
138 /* open */ nullopen,
139 /* close */ nullclose,
140 /* read */ noread,
141 /* write */ nowrite,
142 /* ioctl */ mdctlioctl,
143 /* poll */ nopoll,
144 /* mmap */ nommap,
145 /* strategy */ nostrategy,
146 /* name */ MD_NAME,
147 /* maj */ CDEV_MAJOR
148};
149
150static struct cdevsw mddisk_cdevsw;
151
152static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(&md_softc_list);
153
154#define NINDIR (PAGE_SIZE / sizeof(uintptr_t))
155#define NMASK (NINDIR-1)
156static int nshift;
157
158struct indir {
159 uintptr_t *array;
160 uint total;
161 uint used;
162 uint shift;
163};
164
165struct md_s {
166 int unit;
167 LIST_ENTRY(md_s) list;
168 struct devstat stats;
169 struct bio_queue_head bio_queue;
170 struct disk disk;
171 dev_t dev;
172 enum md_types type;
173 unsigned nsect;
174 unsigned opencount;
175 unsigned secsize;
176 unsigned flags;
177 char name[20];
178 struct proc *procp;
179
180 /* MD_MALLOC related fields */
181 struct indir *indir;
182 uma_zone_t uma;
183
184 /* MD_PRELOAD related fields */
185 u_char *pl_ptr;
186 unsigned pl_len;
187
188 /* MD_VNODE related fields */
189 struct vnode *vnode;
190 struct ucred *cred;
191
192 /* MD_SWAP related fields */
193 vm_object_t object;
194};
195
196static int mddestroy(struct md_s *sc, struct thread *td);
197
198static struct indir *
199new_indir(uint shift)
200{
201 struct indir *ip;
202
203 ip = malloc(sizeof *ip, M_MD, M_NOWAIT | M_ZERO);
204 if (ip == NULL)
205 return (NULL);
206 ip->array = malloc(sizeof(uintptr_t) * NINDIR,
207 M_MDSECT, M_NOWAIT | M_ZERO);
208 if (ip->array == NULL) {
209 free(ip, M_MD);
210 return (NULL);
211 }
212 ip->total = NINDIR;
213 ip->shift = shift;
214 return (ip);
215}
216
217static void
218del_indir(struct indir *ip)
219{
220
221 free(ip->array, M_MDSECT);
222 free(ip, M_MD);
223}
224
225static void
226destroy_indir(struct md_s *sc, struct indir *ip)
227{
228 int i;
229
230 for (i = 0; i < NINDIR; i++) {
231 if (!ip->array[i])
232 continue;
233 if (ip->shift)
234 destroy_indir(sc, (struct indir*)(ip->array[i]));
235 else if (ip->array[i] > 255)
236 uma_zfree(sc->uma, (void *)(ip->array[i]));
237 }
238 del_indir(ip);
239}
240
241/*
242 * This function does the math and alloctes the top level "indir" structure
243 * for a device of "size" sectors.
244 */
245
246static struct indir *
247dimension(off_t size)
248{
249 off_t rcnt;
250 struct indir *ip;
251 int i, layer;
252
253 rcnt = size;
254 layer = 0;
255 while (rcnt > NINDIR) {
256 rcnt /= NINDIR;
257 layer++;
258 }
259 /* figure out log2(NINDIR) */
260 for (i = NINDIR, nshift = -1; i; nshift++)
261 i >>= 1;
262
263 /*
264 * XXX: the top layer is probably not fully populated, so we allocate
265 * too much space for ip->array in new_indir() here.
266 */
267 ip = new_indir(layer * nshift);
268 return (ip);
269}
270
271/*
272 * Read a given sector
273 */
274
275static uintptr_t
276s_read(struct indir *ip, off_t offset)
277{
278 struct indir *cip;
279 int idx;
280 uintptr_t up;
281
282 if (md_debug > 1)
283 printf("s_read(%jd)\n", (intmax_t)offset);
284 up = 0;
285 for (cip = ip; cip != NULL;) {
286 if (cip->shift) {
287 idx = (offset >> cip->shift) & NMASK;
288 up = cip->array[idx];
289 cip = (struct indir *)up;
290 continue;
291 }
292 idx = offset & NMASK;
293 return (cip->array[idx]);
294 }
295 return (0);
296}
297
298/*
299 * Write a given sector, prune the tree if the value is 0
300 */
301
302static int
303s_write(struct indir *ip, off_t offset, uintptr_t ptr)
304{
305 struct indir *cip, *lip[10];
306 int idx, li;
307 uintptr_t up;
308
309 if (md_debug > 1)
310 printf("s_write(%jd, %p)\n", (intmax_t)offset, (void *)ptr);
311 up = 0;
312 li = 0;
313 cip = ip;
314 for (;;) {
315 lip[li++] = cip;
316 if (cip->shift) {
317 idx = (offset >> cip->shift) & NMASK;
318 up = cip->array[idx];
319 if (up != 0) {
320 cip = (struct indir *)up;
321 continue;
322 }
323 /* Allocate branch */
324 cip->array[idx] =
325 (uintptr_t)new_indir(cip->shift - nshift);
326 if (cip->array[idx] == 0)
327 return (ENOMEM);
328 cip->used++;
329 up = cip->array[idx];
330 cip = (struct indir *)up;
331 continue;
332 }
333 /* leafnode */
334 idx = offset & NMASK;
335 up = cip->array[idx];
336 if (up != 0)
337 cip->used--;
338 cip->array[idx] = ptr;
339 if (ptr != 0)
340 cip->used++;
341 break;
342 }
343 if (cip->used != 0 || li == 1)
344 return (0);
345 li--;
346 while (cip->used == 0 && cip != ip) {
347 li--;
348 idx = (offset >> lip[li]->shift) & NMASK;
349 up = lip[li]->array[idx];
350 KASSERT(up == (uintptr_t)cip, ("md screwed up"));
351 del_indir(cip);
352 lip[li]->array[idx] = NULL;
352 lip[li]->array[idx] = 0;
353 lip[li]->used--;
354 cip = lip[li];
355 }
356 return (0);
357}
358
359static int
360mdopen(dev_t dev, int flag, int fmt, struct thread *td)
361{
362 struct md_s *sc;
363 struct disklabel *dl;
364
365 if (md_debug)
366 printf("mdopen(%s %x %x %p)\n",
367 devtoname(dev), flag, fmt, td);
368
369 sc = dev->si_drv1;
370
371 dl = &sc->disk.d_label;
372 bzero(dl, sizeof(*dl));
373 dl->d_secsize = sc->secsize;
374 dl->d_nsectors = sc->nsect > 63 ? 63 : sc->nsect;
375 dl->d_ntracks = 1;
376 dl->d_secpercyl = dl->d_nsectors * dl->d_ntracks;
377 dl->d_secperunit = sc->nsect;
378 dl->d_ncylinders = dl->d_secperunit / dl->d_secpercyl;
379 sc->opencount++;
380 return (0);
381}
382
383static int
384mdclose(dev_t dev, int flags, int fmt, struct thread *td)
385{
386 struct md_s *sc = dev->si_drv1;
387
388 sc->opencount--;
389 return (0);
390}
391
392static int
393mdioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
394{
395
396 if (md_debug)
397 printf("mdioctl(%s %lx %p %x %p)\n",
398 devtoname(dev), cmd, addr, flags, td);
399
400 return (ENOIOCTL);
401}
402
403static int
404mdstart_malloc(struct md_s *sc, struct bio *bp)
405{
406 int i, error;
407 u_char *dst;
408 unsigned secno, nsec, uc;
409 uintptr_t sp, osp;
410
411 nsec = bp->bio_bcount / sc->secsize;
412 secno = bp->bio_pblkno;
413 dst = bp->bio_data;
414 error = 0;
415 while (nsec--) {
416 osp = s_read(sc->indir, secno);
417 if (bp->bio_cmd == BIO_DELETE) {
418 if (osp != 0)
419 error = s_write(sc->indir, secno, 0);
420 } else if (bp->bio_cmd == BIO_READ) {
421 if (osp == 0)
422 bzero(dst, sc->secsize);
423 else if (osp <= 255)
424 for (i = 0; i < sc->secsize; i++)
425 dst[i] = osp;
426 else
427 bcopy((void *)osp, dst, sc->secsize);
428 osp = 0;
429 } else if (bp->bio_cmd == BIO_WRITE) {
430 if (sc->flags & MD_COMPRESS) {
431 uc = dst[0];
432 for (i = 1; i < sc->secsize; i++)
433 if (dst[i] != uc)
434 break;
435 } else {
436 i = 0;
437 uc = 0;
438 }
439 if (i == sc->secsize) {
440 if (osp != uc)
441 error = s_write(sc->indir, secno, uc);
442 } else {
443 if (osp <= 255) {
444 sp = (uintptr_t) uma_zalloc(
445 sc->uma, M_NOWAIT);
446 if (sp == 0) {
447 error = ENOSPC;
448 break;
449 }
450 bcopy(dst, (void *)sp, sc->secsize);
451 error = s_write(sc->indir, secno, sp);
452 } else {
453 bcopy(dst, (void *)osp, sc->secsize);
454 osp = 0;
455 }
456 }
457 } else {
458 error = EOPNOTSUPP;
459 }
460 if (osp > 255)
461 uma_zfree(sc->uma, (void*)osp);
462 if (error)
463 break;
464 secno++;
465 dst += sc->secsize;
466 }
467 bp->bio_resid = 0;
468 return (error);
469}
470
471static int
472mdstart_preload(struct md_s *sc, struct bio *bp)
473{
474
475 if (bp->bio_cmd == BIO_DELETE) {
476 } else if (bp->bio_cmd == BIO_READ) {
477 bcopy(sc->pl_ptr + (bp->bio_pblkno << DEV_BSHIFT), bp->bio_data, bp->bio_bcount);
478 } else {
479 bcopy(bp->bio_data, sc->pl_ptr + (bp->bio_pblkno << DEV_BSHIFT), bp->bio_bcount);
480 }
481 bp->bio_resid = 0;
482 return (0);
483}
484
485static int
486mdstart_vnode(struct md_s *sc, struct bio *bp)
487{
488 int error;
489 struct uio auio;
490 struct iovec aiov;
491 struct mount *mp;
492
493 /*
494 * VNODE I/O
495 *
496 * If an error occurs, we set BIO_ERROR but we do not set
497 * B_INVAL because (for a write anyway), the buffer is
498 * still valid.
499 */
500
501 bzero(&auio, sizeof(auio));
502
503 aiov.iov_base = bp->bio_data;
504 aiov.iov_len = bp->bio_bcount;
505 auio.uio_iov = &aiov;
506 auio.uio_iovcnt = 1;
507 auio.uio_offset = (vm_ooffset_t)bp->bio_pblkno * sc->secsize;
508 auio.uio_segflg = UIO_SYSSPACE;
509 if(bp->bio_cmd == BIO_READ)
510 auio.uio_rw = UIO_READ;
511 else
512 auio.uio_rw = UIO_WRITE;
513 auio.uio_resid = bp->bio_bcount;
514 auio.uio_td = curthread;
515 /*
516 * When reading set IO_DIRECT to try to avoid double-caching
517 * the data. When writing IO_DIRECT is not optimal, but we
518 * must set IO_NOWDRAIN to avoid a wdrain deadlock.
519 */
520 if (bp->bio_cmd == BIO_READ) {
521 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
522 error = VOP_READ(sc->vnode, &auio, IO_DIRECT, sc->cred);
523 } else {
524 (void) vn_start_write(sc->vnode, &mp, V_WAIT);
525 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
526 error = VOP_WRITE(sc->vnode, &auio, IO_NOWDRAIN, sc->cred);
527 vn_finished_write(mp);
528 }
529 VOP_UNLOCK(sc->vnode, 0, curthread);
530 bp->bio_resid = auio.uio_resid;
531 return (error);
532}
533
534static int
535mdstart_swap(struct md_s *sc, struct bio *bp)
536{
537
538 if ((bp->bio_cmd == BIO_DELETE) && (sc->flags & MD_RESERVE))
539 biodone(bp);
540 else
541 vm_pager_strategy(sc->object, bp);
542 return (-1);
543}
544
545static void
546mdstrategy(struct bio *bp)
547{
548 struct md_s *sc;
549
550 if (md_debug > 1)
551 printf("mdstrategy(%p) %s %x, %lld, %ld, %p)\n",
552 (void *)bp, devtoname(bp->bio_dev), bp->bio_flags,
553 (long long)bp->bio_blkno, bp->bio_bcount / DEV_BSIZE,
554 (void *)bp->bio_data);
555
556 sc = bp->bio_dev->si_drv1;
557
558 /* XXX: LOCK(sc->lock) */
559 bioqdisksort(&sc->bio_queue, bp);
560 /* XXX: UNLOCK(sc->lock) */
561
562 wakeup(sc);
563}
564
565static void
566md_kthread(void *arg)
567{
568 struct md_s *sc;
569 struct bio *bp;
570 int error;
571
572 sc = arg;
573 curthread->td_base_pri = PRIBIO;
574
575 mtx_lock(&Giant);
576 for (;;) {
577 /* XXX: LOCK(unique unit numbers) */
578 bp = bioq_first(&sc->bio_queue);
579 if (bp)
580 bioq_remove(&sc->bio_queue, bp);
581 /* XXX: UNLOCK(unique unit numbers) */
582 if (!bp) {
583 tsleep(sc, PRIBIO, "mdwait", 0);
584 if (sc->flags & MD_SHUTDOWN) {
585 sc->procp = NULL;
586 wakeup(&sc->procp);
587 kthread_exit(0);
588 }
589 continue;
590 }
591
592 switch (sc->type) {
593 case MD_MALLOC:
594 devstat_start_transaction(&sc->stats);
595 error = mdstart_malloc(sc, bp);
596 break;
597 case MD_PRELOAD:
598 devstat_start_transaction(&sc->stats);
599 error = mdstart_preload(sc, bp);
600 break;
601 case MD_VNODE:
602 devstat_start_transaction(&sc->stats);
603 error = mdstart_vnode(sc, bp);
604 break;
605 case MD_SWAP:
606 error = mdstart_swap(sc, bp);
607 break;
608 default:
609 panic("Impossible md(type)");
610 break;
611 }
612
613 if (error != -1)
614 biofinish(bp, &sc->stats, error);
615 }
616}
617
618static struct md_s *
619mdfind(int unit)
620{
621 struct md_s *sc;
622
623 /* XXX: LOCK(unique unit numbers) */
624 LIST_FOREACH(sc, &md_softc_list, list) {
625 if (sc->unit == unit)
626 break;
627 }
628 /* XXX: UNLOCK(unique unit numbers) */
629 return (sc);
630}
631
632static struct md_s *
633mdnew(int unit)
634{
635 struct md_s *sc;
636 int error, max = -1;
637
638 /* XXX: LOCK(unique unit numbers) */
639 LIST_FOREACH(sc, &md_softc_list, list) {
640 if (sc->unit == unit) {
641 /* XXX: UNLOCK(unique unit numbers) */
642 return (NULL);
643 }
644 if (sc->unit > max)
645 max = sc->unit;
646 }
647 if (unit == -1)
648 unit = max + 1;
649 if (unit > DKMAXUNIT)
650 return (NULL);
651 sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO);
652 sc->unit = unit;
653 sprintf(sc->name, "md%d", unit);
654 error = kthread_create(md_kthread, sc, &sc->procp, 0, "%s", sc->name);
655 if (error) {
656 free(sc, M_MD);
657 return (NULL);
658 }
659 LIST_INSERT_HEAD(&md_softc_list, sc, list);
660 /* XXX: UNLOCK(unique unit numbers) */
661 return (sc);
662}
663
664static void
665mdinit(struct md_s *sc)
666{
667
668 bioq_init(&sc->bio_queue);
669 devstat_add_entry(&sc->stats, MD_NAME, sc->unit, sc->secsize,
670 DEVSTAT_NO_ORDERED_TAGS,
671 DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
672 DEVSTAT_PRIORITY_OTHER);
673 sc->dev = disk_create(sc->unit, &sc->disk, 0, &md_cdevsw, &mddisk_cdevsw);
674 sc->dev->si_drv1 = sc;
675}
676
677/*
678 * XXX: we should check that the range they feed us is mapped.
679 * XXX: we should implement read-only.
680 */
681
682static int
683mdcreate_preload(struct md_ioctl *mdio)
684{
685 struct md_s *sc;
686
687 if (mdio->md_size == 0)
688 return (EINVAL);
689 if (mdio->md_options & ~(MD_AUTOUNIT))
690 return (EINVAL);
691 if (mdio->md_options & MD_AUTOUNIT) {
692 sc = mdnew(-1);
693 if (sc == NULL)
694 return (ENOMEM);
695 mdio->md_unit = sc->unit;
696 } else {
697 sc = mdnew(mdio->md_unit);
698 if (sc == NULL)
699 return (EBUSY);
700 }
701 sc->type = MD_PRELOAD;
702 sc->secsize = DEV_BSIZE;
703 sc->nsect = mdio->md_size;
704 sc->flags = mdio->md_options & MD_FORCE;
705 /* Cast to pointer size, then to pointer to avoid warning */
706 sc->pl_ptr = (u_char *)(uintptr_t)mdio->md_base;
707 sc->pl_len = (mdio->md_size << DEV_BSHIFT);
708 mdinit(sc);
709 return (0);
710}
711
712
713static int
714mdcreate_malloc(struct md_ioctl *mdio)
715{
716 struct md_s *sc;
717 off_t u;
718 uintptr_t sp;
719 int error;
720
721 error = 0;
722 if (mdio->md_size == 0)
723 return (EINVAL);
724 if (mdio->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE))
725 return (EINVAL);
726 /* Compression doesn't make sense if we have reserved space */
727 if (mdio->md_options & MD_RESERVE)
728 mdio->md_options &= ~MD_COMPRESS;
729 if (mdio->md_options & MD_AUTOUNIT) {
730 sc = mdnew(-1);
731 if (sc == NULL)
732 return (ENOMEM);
733 mdio->md_unit = sc->unit;
734 } else {
735 sc = mdnew(mdio->md_unit);
736 if (sc == NULL)
737 return (EBUSY);
738 }
739 sc->type = MD_MALLOC;
740 sc->secsize = DEV_BSIZE;
741 sc->nsect = mdio->md_size;
742 sc->flags = mdio->md_options & (MD_COMPRESS | MD_FORCE);
743 sc->indir = dimension(sc->nsect);
744 sc->uma = uma_zcreate(sc->name, sc->secsize,
745 NULL, NULL, NULL, NULL, 0x1ff, 0);
746 if (mdio->md_options & MD_RESERVE) {
747 for (u = 0; u < sc->nsect; u++) {
748 sp = (uintptr_t) uma_zalloc(sc->uma, M_NOWAIT | M_ZERO);
749 if (sp != 0)
750 error = s_write(sc->indir, u, sp);
751 else
752 error = ENOMEM;
753 if (error)
754 break;
755 }
756 }
757 if (!error) {
758 printf("%s%d: Malloc disk\n", MD_NAME, sc->unit);
759 mdinit(sc);
760 } else
761 mddestroy(sc, NULL);
762 return (error);
763}
764
765
766static int
767mdsetcred(struct md_s *sc, struct ucred *cred)
768{
769 char *tmpbuf;
770 int error = 0;
771
772 /*
773 * Set credits in our softc
774 */
775
776 if (sc->cred)
777 crfree(sc->cred);
778 sc->cred = crhold(cred);
779
780 /*
781 * Horrible kludge to establish credentials for NFS XXX.
782 */
783
784 if (sc->vnode) {
785 struct uio auio;
786 struct iovec aiov;
787
788 tmpbuf = malloc(sc->secsize, M_TEMP, M_WAITOK);
789 bzero(&auio, sizeof(auio));
790
791 aiov.iov_base = tmpbuf;
792 aiov.iov_len = sc->secsize;
793 auio.uio_iov = &aiov;
794 auio.uio_iovcnt = 1;
795 auio.uio_offset = 0;
796 auio.uio_rw = UIO_READ;
797 auio.uio_segflg = UIO_SYSSPACE;
798 auio.uio_resid = aiov.iov_len;
799 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
800 error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
801 VOP_UNLOCK(sc->vnode, 0, curthread);
802 free(tmpbuf, M_TEMP);
803 }
804 return (error);
805}
806
807static int
808mdcreate_vnode(struct md_ioctl *mdio, struct thread *td)
809{
810 struct md_s *sc;
811 struct vattr vattr;
812 struct nameidata nd;
813 int error, flags;
814
815 flags = FREAD|FWRITE;
816 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, mdio->md_file, td);
817 error = vn_open(&nd, &flags, 0);
818 if (error) {
819 if (error != EACCES && error != EPERM && error != EROFS)
820 return (error);
821 flags &= ~FWRITE;
822 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, mdio->md_file, td);
823 error = vn_open(&nd, &flags, 0);
824 if (error)
825 return (error);
826 }
827 NDFREE(&nd, NDF_ONLY_PNBUF);
828 if (nd.ni_vp->v_type != VREG ||
829 (error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred, td))) {
830 VOP_UNLOCK(nd.ni_vp, 0, td);
831 (void) vn_close(nd.ni_vp, flags, td->td_ucred, td);
832 return (error ? error : EINVAL);
833 }
834 VOP_UNLOCK(nd.ni_vp, 0, td);
835
836 if (mdio->md_options & MD_AUTOUNIT) {
837 sc = mdnew(-1);
838 mdio->md_unit = sc->unit;
839 } else {
840 sc = mdnew(mdio->md_unit);
841 }
842 if (sc == NULL) {
843 (void) vn_close(nd.ni_vp, flags, td->td_ucred, td);
844 return (EBUSY);
845 }
846
847 sc->type = MD_VNODE;
848 sc->flags = mdio->md_options & MD_FORCE;
849 if (!(flags & FWRITE))
850 sc->flags |= MD_READONLY;
851 sc->secsize = DEV_BSIZE;
852 sc->vnode = nd.ni_vp;
853
854 /*
855 * If the size is specified, override the file attributes.
856 */
857 if (mdio->md_size)
858 sc->nsect = mdio->md_size;
859 else
860 sc->nsect = vattr.va_size / sc->secsize; /* XXX: round up ? */
861 if (sc->nsect == 0) {
862 mddestroy(sc, td);
863 return (EINVAL);
864 }
865 error = mdsetcred(sc, td->td_ucred);
866 if (error) {
867 mddestroy(sc, td);
868 return (error);
869 }
870 mdinit(sc);
871 return (0);
872}
873
874static int
875mddestroy(struct md_s *sc, struct thread *td)
876{
877
878 GIANT_REQUIRED;
879
880 if (sc->dev != NULL) {
881 devstat_remove_entry(&sc->stats);
882 disk_destroy(sc->dev);
883 }
884 sc->flags |= MD_SHUTDOWN;
885 wakeup(sc);
886 while (sc->procp != NULL)
887 tsleep(&sc->procp, PRIBIO, "mddestroy", hz / 10);
888 if (sc->vnode != NULL)
889 (void)vn_close(sc->vnode, sc->flags & MD_READONLY ?
890 FREAD : (FREAD|FWRITE), sc->cred, td);
891 if (sc->cred != NULL)
892 crfree(sc->cred);
893 if (sc->object != NULL) {
894 vm_pager_deallocate(sc->object);
895 }
896 if (sc->indir)
897 destroy_indir(sc, sc->indir);
898 if (sc->uma)
899 uma_zdestroy(sc->uma);
900
901 /* XXX: LOCK(unique unit numbers) */
902 LIST_REMOVE(sc, list);
903 /* XXX: UNLOCK(unique unit numbers) */
904 free(sc, M_MD);
905 return (0);
906}
907
908static int
909mdcreate_swap(struct md_ioctl *mdio, struct thread *td)
910{
911 int error;
912 struct md_s *sc;
913
914 GIANT_REQUIRED;
915
916 if (mdio->md_options & MD_AUTOUNIT) {
917 sc = mdnew(-1);
918 mdio->md_unit = sc->unit;
919 } else {
920 sc = mdnew(mdio->md_unit);
921 }
922 if (sc == NULL)
923 return (EBUSY);
924
925 sc->type = MD_SWAP;
926
927 /*
928 * Range check. Disallow negative sizes or any size less then the
929 * size of a page. Then round to a page.
930 */
931
932 if (mdio->md_size == 0) {
933 mddestroy(sc, td);
934 return (EDOM);
935 }
936
937 /*
938 * Allocate an OBJT_SWAP object.
939 *
940 * sc_secsize is PAGE_SIZE'd
941 *
942 * mdio->size is in DEV_BSIZE'd chunks.
943 * Note the truncation.
944 */
945
946 sc->secsize = PAGE_SIZE;
947 sc->nsect = mdio->md_size / (PAGE_SIZE / DEV_BSIZE);
948 sc->object = vm_pager_allocate(OBJT_SWAP, NULL, sc->secsize * (vm_offset_t)sc->nsect, VM_PROT_DEFAULT, 0);
949 sc->flags = mdio->md_options & MD_FORCE;
950 if (mdio->md_options & MD_RESERVE) {
951 if (swap_pager_reserve(sc->object, 0, sc->nsect) < 0) {
952 vm_pager_deallocate(sc->object);
953 sc->object = NULL;
954 mddestroy(sc, td);
955 return (EDOM);
956 }
957 }
958 error = mdsetcred(sc, td->td_ucred);
959 if (error)
960 mddestroy(sc, td);
961 else
962 mdinit(sc);
963 return (error);
964}
965
966static int
967mddetach(int unit, struct thread *td)
968{
969 struct md_s *sc;
970
971 sc = mdfind(unit);
972 if (sc == NULL)
973 return (ENOENT);
974 if (sc->opencount != 0 && !(sc->flags & MD_FORCE))
975 return (EBUSY);
976 switch(sc->type) {
977 case MD_VNODE:
978 case MD_SWAP:
979 case MD_MALLOC:
980 case MD_PRELOAD:
981 return (mddestroy(sc, td));
982 default:
983 return (EOPNOTSUPP);
984 }
985}
986
987static int
988mdctlioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
989{
990 struct md_ioctl *mdio;
991 struct md_s *sc;
992
993 if (md_debug)
994 printf("mdctlioctl(%s %lx %p %x %p)\n",
995 devtoname(dev), cmd, addr, flags, td);
996
997 /*
998 * We assert the version number in the individual ioctl
999 * handlers instead of out here because (a) it is possible we
1000 * may add another ioctl in the future which doesn't read an
1001 * mdio, and (b) the correct return value for an unknown ioctl
1002 * is ENOIOCTL, not EINVAL.
1003 */
1004 mdio = (struct md_ioctl *)addr;
1005 switch (cmd) {
1006 case MDIOCATTACH:
1007 if (mdio->md_version != MDIOVERSION)
1008 return (EINVAL);
1009 switch (mdio->md_type) {
1010 case MD_MALLOC:
1011 return (mdcreate_malloc(mdio));
1012 case MD_PRELOAD:
1013 return (mdcreate_preload(mdio));
1014 case MD_VNODE:
1015 return (mdcreate_vnode(mdio, td));
1016 case MD_SWAP:
1017 return (mdcreate_swap(mdio, td));
1018 default:
1019 return (EINVAL);
1020 }
1021 case MDIOCDETACH:
1022 if (mdio->md_version != MDIOVERSION)
1023 return (EINVAL);
1024 if (mdio->md_file != NULL || mdio->md_size != 0 ||
1025 mdio->md_options != 0)
1026 return (EINVAL);
1027 return (mddetach(mdio->md_unit, td));
1028 case MDIOCQUERY:
1029 if (mdio->md_version != MDIOVERSION)
1030 return (EINVAL);
1031 sc = mdfind(mdio->md_unit);
1032 if (sc == NULL)
1033 return (ENOENT);
1034 mdio->md_type = sc->type;
1035 mdio->md_options = sc->flags;
1036 switch (sc->type) {
1037 case MD_MALLOC:
1038 mdio->md_size = sc->nsect;
1039 break;
1040 case MD_PRELOAD:
1041 mdio->md_size = sc->nsect;
1042 (u_char *)(uintptr_t)mdio->md_base = sc->pl_ptr;
1043 break;
1044 case MD_SWAP:
1045 mdio->md_size = sc->nsect * (PAGE_SIZE / DEV_BSIZE);
1046 break;
1047 case MD_VNODE:
1048 mdio->md_size = sc->nsect;
1049 /* XXX fill this in */
1050 mdio->md_file = NULL;
1051 break;
1052 }
1053 return (0);
1054 default:
1055 return (ENOIOCTL);
1056 };
1057 return (ENOIOCTL);
1058}
1059
1060static void
1061md_preloaded(u_char *image, unsigned length)
1062{
1063 struct md_s *sc;
1064
1065 sc = mdnew(-1);
1066 if (sc == NULL)
1067 return;
1068 sc->type = MD_PRELOAD;
1069 sc->secsize = DEV_BSIZE;
1070 sc->nsect = length / DEV_BSIZE;
1071 sc->pl_ptr = image;
1072 sc->pl_len = length;
1073 if (sc->unit == 0)
1074 mdrootready = 1;
1075 mdinit(sc);
1076}
1077
1078static void
1079md_drvinit(void *unused)
1080{
1081
1082 caddr_t mod;
1083 caddr_t c;
1084 u_char *ptr, *name, *type;
1085 unsigned len;
1086
1087#ifdef MD_ROOT_SIZE
1088 md_preloaded(mfs_root, MD_ROOT_SIZE*1024);
1089#endif
1090 mod = NULL;
1091 while ((mod = preload_search_next_name(mod)) != NULL) {
1092 name = (char *)preload_search_info(mod, MODINFO_NAME);
1093 type = (char *)preload_search_info(mod, MODINFO_TYPE);
1094 if (name == NULL)
1095 continue;
1096 if (type == NULL)
1097 continue;
1098 if (strcmp(type, "md_image") && strcmp(type, "mfs_root"))
1099 continue;
1100 c = preload_search_info(mod, MODINFO_ADDR);
1101 ptr = *(u_char **)c;
1102 c = preload_search_info(mod, MODINFO_SIZE);
1103 len = *(unsigned *)c;
1104 printf("%s%d: Preloaded image <%s> %d bytes at %p\n",
1105 MD_NAME, mdunits, name, len, ptr);
1106 md_preloaded(ptr, len);
1107 }
1108 status_dev = make_dev(&mdctl_cdevsw, 0xffff00ff, UID_ROOT, GID_WHEEL,
1109 0600, MDCTL_NAME);
1110}
1111
1112static int
1113md_modevent(module_t mod, int type, void *data)
1114{
1115 int error;
1116 struct md_s *sc;
1117
1118 switch (type) {
1119 case MOD_LOAD:
1120 md_drvinit(NULL);
1121 break;
1122 case MOD_UNLOAD:
1123 LIST_FOREACH(sc, &md_softc_list, list) {
1124 error = mddetach(sc->unit, curthread);
1125 if (error != 0)
1126 return (error);
1127 }
1128 if (status_dev)
1129 destroy_dev(status_dev);
1130 status_dev = 0;
1131 break;
1132 default:
1133 break;
1134 }
1135 return (0);
1136}
1137
1138static moduledata_t md_mod = {
1139 MD_NAME,
1140 md_modevent,
1141 NULL
1142};
1143DECLARE_MODULE(md, md_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+CDEV_MAJOR);
1144MODULE_VERSION(md, MD_MODVER);
1145
1146
1147#ifdef MD_ROOT
1148static void
1149md_takeroot(void *junk)
1150{
1151 if (mdrootready)
1152 rootdevnames[0] = "ufs:/dev/md0c";
1153}
1154
1155SYSINIT(md_root, SI_SUB_MOUNT_ROOT, SI_ORDER_FIRST, md_takeroot, NULL);
1156#endif
353 lip[li]->used--;
354 cip = lip[li];
355 }
356 return (0);
357}
358
359static int
360mdopen(dev_t dev, int flag, int fmt, struct thread *td)
361{
362 struct md_s *sc;
363 struct disklabel *dl;
364
365 if (md_debug)
366 printf("mdopen(%s %x %x %p)\n",
367 devtoname(dev), flag, fmt, td);
368
369 sc = dev->si_drv1;
370
371 dl = &sc->disk.d_label;
372 bzero(dl, sizeof(*dl));
373 dl->d_secsize = sc->secsize;
374 dl->d_nsectors = sc->nsect > 63 ? 63 : sc->nsect;
375 dl->d_ntracks = 1;
376 dl->d_secpercyl = dl->d_nsectors * dl->d_ntracks;
377 dl->d_secperunit = sc->nsect;
378 dl->d_ncylinders = dl->d_secperunit / dl->d_secpercyl;
379 sc->opencount++;
380 return (0);
381}
382
383static int
384mdclose(dev_t dev, int flags, int fmt, struct thread *td)
385{
386 struct md_s *sc = dev->si_drv1;
387
388 sc->opencount--;
389 return (0);
390}
391
392static int
393mdioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
394{
395
396 if (md_debug)
397 printf("mdioctl(%s %lx %p %x %p)\n",
398 devtoname(dev), cmd, addr, flags, td);
399
400 return (ENOIOCTL);
401}
402
403static int
404mdstart_malloc(struct md_s *sc, struct bio *bp)
405{
406 int i, error;
407 u_char *dst;
408 unsigned secno, nsec, uc;
409 uintptr_t sp, osp;
410
411 nsec = bp->bio_bcount / sc->secsize;
412 secno = bp->bio_pblkno;
413 dst = bp->bio_data;
414 error = 0;
415 while (nsec--) {
416 osp = s_read(sc->indir, secno);
417 if (bp->bio_cmd == BIO_DELETE) {
418 if (osp != 0)
419 error = s_write(sc->indir, secno, 0);
420 } else if (bp->bio_cmd == BIO_READ) {
421 if (osp == 0)
422 bzero(dst, sc->secsize);
423 else if (osp <= 255)
424 for (i = 0; i < sc->secsize; i++)
425 dst[i] = osp;
426 else
427 bcopy((void *)osp, dst, sc->secsize);
428 osp = 0;
429 } else if (bp->bio_cmd == BIO_WRITE) {
430 if (sc->flags & MD_COMPRESS) {
431 uc = dst[0];
432 for (i = 1; i < sc->secsize; i++)
433 if (dst[i] != uc)
434 break;
435 } else {
436 i = 0;
437 uc = 0;
438 }
439 if (i == sc->secsize) {
440 if (osp != uc)
441 error = s_write(sc->indir, secno, uc);
442 } else {
443 if (osp <= 255) {
444 sp = (uintptr_t) uma_zalloc(
445 sc->uma, M_NOWAIT);
446 if (sp == 0) {
447 error = ENOSPC;
448 break;
449 }
450 bcopy(dst, (void *)sp, sc->secsize);
451 error = s_write(sc->indir, secno, sp);
452 } else {
453 bcopy(dst, (void *)osp, sc->secsize);
454 osp = 0;
455 }
456 }
457 } else {
458 error = EOPNOTSUPP;
459 }
460 if (osp > 255)
461 uma_zfree(sc->uma, (void*)osp);
462 if (error)
463 break;
464 secno++;
465 dst += sc->secsize;
466 }
467 bp->bio_resid = 0;
468 return (error);
469}
470
471static int
472mdstart_preload(struct md_s *sc, struct bio *bp)
473{
474
475 if (bp->bio_cmd == BIO_DELETE) {
476 } else if (bp->bio_cmd == BIO_READ) {
477 bcopy(sc->pl_ptr + (bp->bio_pblkno << DEV_BSHIFT), bp->bio_data, bp->bio_bcount);
478 } else {
479 bcopy(bp->bio_data, sc->pl_ptr + (bp->bio_pblkno << DEV_BSHIFT), bp->bio_bcount);
480 }
481 bp->bio_resid = 0;
482 return (0);
483}
484
485static int
486mdstart_vnode(struct md_s *sc, struct bio *bp)
487{
488 int error;
489 struct uio auio;
490 struct iovec aiov;
491 struct mount *mp;
492
493 /*
494 * VNODE I/O
495 *
496 * If an error occurs, we set BIO_ERROR but we do not set
497 * B_INVAL because (for a write anyway), the buffer is
498 * still valid.
499 */
500
501 bzero(&auio, sizeof(auio));
502
503 aiov.iov_base = bp->bio_data;
504 aiov.iov_len = bp->bio_bcount;
505 auio.uio_iov = &aiov;
506 auio.uio_iovcnt = 1;
507 auio.uio_offset = (vm_ooffset_t)bp->bio_pblkno * sc->secsize;
508 auio.uio_segflg = UIO_SYSSPACE;
509 if(bp->bio_cmd == BIO_READ)
510 auio.uio_rw = UIO_READ;
511 else
512 auio.uio_rw = UIO_WRITE;
513 auio.uio_resid = bp->bio_bcount;
514 auio.uio_td = curthread;
515 /*
516 * When reading set IO_DIRECT to try to avoid double-caching
517 * the data. When writing IO_DIRECT is not optimal, but we
518 * must set IO_NOWDRAIN to avoid a wdrain deadlock.
519 */
520 if (bp->bio_cmd == BIO_READ) {
521 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
522 error = VOP_READ(sc->vnode, &auio, IO_DIRECT, sc->cred);
523 } else {
524 (void) vn_start_write(sc->vnode, &mp, V_WAIT);
525 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
526 error = VOP_WRITE(sc->vnode, &auio, IO_NOWDRAIN, sc->cred);
527 vn_finished_write(mp);
528 }
529 VOP_UNLOCK(sc->vnode, 0, curthread);
530 bp->bio_resid = auio.uio_resid;
531 return (error);
532}
533
534static int
535mdstart_swap(struct md_s *sc, struct bio *bp)
536{
537
538 if ((bp->bio_cmd == BIO_DELETE) && (sc->flags & MD_RESERVE))
539 biodone(bp);
540 else
541 vm_pager_strategy(sc->object, bp);
542 return (-1);
543}
544
545static void
546mdstrategy(struct bio *bp)
547{
548 struct md_s *sc;
549
550 if (md_debug > 1)
551 printf("mdstrategy(%p) %s %x, %lld, %ld, %p)\n",
552 (void *)bp, devtoname(bp->bio_dev), bp->bio_flags,
553 (long long)bp->bio_blkno, bp->bio_bcount / DEV_BSIZE,
554 (void *)bp->bio_data);
555
556 sc = bp->bio_dev->si_drv1;
557
558 /* XXX: LOCK(sc->lock) */
559 bioqdisksort(&sc->bio_queue, bp);
560 /* XXX: UNLOCK(sc->lock) */
561
562 wakeup(sc);
563}
564
565static void
566md_kthread(void *arg)
567{
568 struct md_s *sc;
569 struct bio *bp;
570 int error;
571
572 sc = arg;
573 curthread->td_base_pri = PRIBIO;
574
575 mtx_lock(&Giant);
576 for (;;) {
577 /* XXX: LOCK(unique unit numbers) */
578 bp = bioq_first(&sc->bio_queue);
579 if (bp)
580 bioq_remove(&sc->bio_queue, bp);
581 /* XXX: UNLOCK(unique unit numbers) */
582 if (!bp) {
583 tsleep(sc, PRIBIO, "mdwait", 0);
584 if (sc->flags & MD_SHUTDOWN) {
585 sc->procp = NULL;
586 wakeup(&sc->procp);
587 kthread_exit(0);
588 }
589 continue;
590 }
591
592 switch (sc->type) {
593 case MD_MALLOC:
594 devstat_start_transaction(&sc->stats);
595 error = mdstart_malloc(sc, bp);
596 break;
597 case MD_PRELOAD:
598 devstat_start_transaction(&sc->stats);
599 error = mdstart_preload(sc, bp);
600 break;
601 case MD_VNODE:
602 devstat_start_transaction(&sc->stats);
603 error = mdstart_vnode(sc, bp);
604 break;
605 case MD_SWAP:
606 error = mdstart_swap(sc, bp);
607 break;
608 default:
609 panic("Impossible md(type)");
610 break;
611 }
612
613 if (error != -1)
614 biofinish(bp, &sc->stats, error);
615 }
616}
617
618static struct md_s *
619mdfind(int unit)
620{
621 struct md_s *sc;
622
623 /* XXX: LOCK(unique unit numbers) */
624 LIST_FOREACH(sc, &md_softc_list, list) {
625 if (sc->unit == unit)
626 break;
627 }
628 /* XXX: UNLOCK(unique unit numbers) */
629 return (sc);
630}
631
632static struct md_s *
633mdnew(int unit)
634{
635 struct md_s *sc;
636 int error, max = -1;
637
638 /* XXX: LOCK(unique unit numbers) */
639 LIST_FOREACH(sc, &md_softc_list, list) {
640 if (sc->unit == unit) {
641 /* XXX: UNLOCK(unique unit numbers) */
642 return (NULL);
643 }
644 if (sc->unit > max)
645 max = sc->unit;
646 }
647 if (unit == -1)
648 unit = max + 1;
649 if (unit > DKMAXUNIT)
650 return (NULL);
651 sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO);
652 sc->unit = unit;
653 sprintf(sc->name, "md%d", unit);
654 error = kthread_create(md_kthread, sc, &sc->procp, 0, "%s", sc->name);
655 if (error) {
656 free(sc, M_MD);
657 return (NULL);
658 }
659 LIST_INSERT_HEAD(&md_softc_list, sc, list);
660 /* XXX: UNLOCK(unique unit numbers) */
661 return (sc);
662}
663
664static void
665mdinit(struct md_s *sc)
666{
667
668 bioq_init(&sc->bio_queue);
669 devstat_add_entry(&sc->stats, MD_NAME, sc->unit, sc->secsize,
670 DEVSTAT_NO_ORDERED_TAGS,
671 DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
672 DEVSTAT_PRIORITY_OTHER);
673 sc->dev = disk_create(sc->unit, &sc->disk, 0, &md_cdevsw, &mddisk_cdevsw);
674 sc->dev->si_drv1 = sc;
675}
676
677/*
678 * XXX: we should check that the range they feed us is mapped.
679 * XXX: we should implement read-only.
680 */
681
682static int
683mdcreate_preload(struct md_ioctl *mdio)
684{
685 struct md_s *sc;
686
687 if (mdio->md_size == 0)
688 return (EINVAL);
689 if (mdio->md_options & ~(MD_AUTOUNIT))
690 return (EINVAL);
691 if (mdio->md_options & MD_AUTOUNIT) {
692 sc = mdnew(-1);
693 if (sc == NULL)
694 return (ENOMEM);
695 mdio->md_unit = sc->unit;
696 } else {
697 sc = mdnew(mdio->md_unit);
698 if (sc == NULL)
699 return (EBUSY);
700 }
701 sc->type = MD_PRELOAD;
702 sc->secsize = DEV_BSIZE;
703 sc->nsect = mdio->md_size;
704 sc->flags = mdio->md_options & MD_FORCE;
705 /* Cast to pointer size, then to pointer to avoid warning */
706 sc->pl_ptr = (u_char *)(uintptr_t)mdio->md_base;
707 sc->pl_len = (mdio->md_size << DEV_BSHIFT);
708 mdinit(sc);
709 return (0);
710}
711
712
713static int
714mdcreate_malloc(struct md_ioctl *mdio)
715{
716 struct md_s *sc;
717 off_t u;
718 uintptr_t sp;
719 int error;
720
721 error = 0;
722 if (mdio->md_size == 0)
723 return (EINVAL);
724 if (mdio->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE))
725 return (EINVAL);
726 /* Compression doesn't make sense if we have reserved space */
727 if (mdio->md_options & MD_RESERVE)
728 mdio->md_options &= ~MD_COMPRESS;
729 if (mdio->md_options & MD_AUTOUNIT) {
730 sc = mdnew(-1);
731 if (sc == NULL)
732 return (ENOMEM);
733 mdio->md_unit = sc->unit;
734 } else {
735 sc = mdnew(mdio->md_unit);
736 if (sc == NULL)
737 return (EBUSY);
738 }
739 sc->type = MD_MALLOC;
740 sc->secsize = DEV_BSIZE;
741 sc->nsect = mdio->md_size;
742 sc->flags = mdio->md_options & (MD_COMPRESS | MD_FORCE);
743 sc->indir = dimension(sc->nsect);
744 sc->uma = uma_zcreate(sc->name, sc->secsize,
745 NULL, NULL, NULL, NULL, 0x1ff, 0);
746 if (mdio->md_options & MD_RESERVE) {
747 for (u = 0; u < sc->nsect; u++) {
748 sp = (uintptr_t) uma_zalloc(sc->uma, M_NOWAIT | M_ZERO);
749 if (sp != 0)
750 error = s_write(sc->indir, u, sp);
751 else
752 error = ENOMEM;
753 if (error)
754 break;
755 }
756 }
757 if (!error) {
758 printf("%s%d: Malloc disk\n", MD_NAME, sc->unit);
759 mdinit(sc);
760 } else
761 mddestroy(sc, NULL);
762 return (error);
763}
764
765
766static int
767mdsetcred(struct md_s *sc, struct ucred *cred)
768{
769 char *tmpbuf;
770 int error = 0;
771
772 /*
773 * Set credits in our softc
774 */
775
776 if (sc->cred)
777 crfree(sc->cred);
778 sc->cred = crhold(cred);
779
780 /*
781 * Horrible kludge to establish credentials for NFS XXX.
782 */
783
784 if (sc->vnode) {
785 struct uio auio;
786 struct iovec aiov;
787
788 tmpbuf = malloc(sc->secsize, M_TEMP, M_WAITOK);
789 bzero(&auio, sizeof(auio));
790
791 aiov.iov_base = tmpbuf;
792 aiov.iov_len = sc->secsize;
793 auio.uio_iov = &aiov;
794 auio.uio_iovcnt = 1;
795 auio.uio_offset = 0;
796 auio.uio_rw = UIO_READ;
797 auio.uio_segflg = UIO_SYSSPACE;
798 auio.uio_resid = aiov.iov_len;
799 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
800 error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
801 VOP_UNLOCK(sc->vnode, 0, curthread);
802 free(tmpbuf, M_TEMP);
803 }
804 return (error);
805}
806
807static int
808mdcreate_vnode(struct md_ioctl *mdio, struct thread *td)
809{
810 struct md_s *sc;
811 struct vattr vattr;
812 struct nameidata nd;
813 int error, flags;
814
815 flags = FREAD|FWRITE;
816 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, mdio->md_file, td);
817 error = vn_open(&nd, &flags, 0);
818 if (error) {
819 if (error != EACCES && error != EPERM && error != EROFS)
820 return (error);
821 flags &= ~FWRITE;
822 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, mdio->md_file, td);
823 error = vn_open(&nd, &flags, 0);
824 if (error)
825 return (error);
826 }
827 NDFREE(&nd, NDF_ONLY_PNBUF);
828 if (nd.ni_vp->v_type != VREG ||
829 (error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred, td))) {
830 VOP_UNLOCK(nd.ni_vp, 0, td);
831 (void) vn_close(nd.ni_vp, flags, td->td_ucred, td);
832 return (error ? error : EINVAL);
833 }
834 VOP_UNLOCK(nd.ni_vp, 0, td);
835
836 if (mdio->md_options & MD_AUTOUNIT) {
837 sc = mdnew(-1);
838 mdio->md_unit = sc->unit;
839 } else {
840 sc = mdnew(mdio->md_unit);
841 }
842 if (sc == NULL) {
843 (void) vn_close(nd.ni_vp, flags, td->td_ucred, td);
844 return (EBUSY);
845 }
846
847 sc->type = MD_VNODE;
848 sc->flags = mdio->md_options & MD_FORCE;
849 if (!(flags & FWRITE))
850 sc->flags |= MD_READONLY;
851 sc->secsize = DEV_BSIZE;
852 sc->vnode = nd.ni_vp;
853
854 /*
855 * If the size is specified, override the file attributes.
856 */
857 if (mdio->md_size)
858 sc->nsect = mdio->md_size;
859 else
860 sc->nsect = vattr.va_size / sc->secsize; /* XXX: round up ? */
861 if (sc->nsect == 0) {
862 mddestroy(sc, td);
863 return (EINVAL);
864 }
865 error = mdsetcred(sc, td->td_ucred);
866 if (error) {
867 mddestroy(sc, td);
868 return (error);
869 }
870 mdinit(sc);
871 return (0);
872}
873
874static int
875mddestroy(struct md_s *sc, struct thread *td)
876{
877
878 GIANT_REQUIRED;
879
880 if (sc->dev != NULL) {
881 devstat_remove_entry(&sc->stats);
882 disk_destroy(sc->dev);
883 }
884 sc->flags |= MD_SHUTDOWN;
885 wakeup(sc);
886 while (sc->procp != NULL)
887 tsleep(&sc->procp, PRIBIO, "mddestroy", hz / 10);
888 if (sc->vnode != NULL)
889 (void)vn_close(sc->vnode, sc->flags & MD_READONLY ?
890 FREAD : (FREAD|FWRITE), sc->cred, td);
891 if (sc->cred != NULL)
892 crfree(sc->cred);
893 if (sc->object != NULL) {
894 vm_pager_deallocate(sc->object);
895 }
896 if (sc->indir)
897 destroy_indir(sc, sc->indir);
898 if (sc->uma)
899 uma_zdestroy(sc->uma);
900
901 /* XXX: LOCK(unique unit numbers) */
902 LIST_REMOVE(sc, list);
903 /* XXX: UNLOCK(unique unit numbers) */
904 free(sc, M_MD);
905 return (0);
906}
907
908static int
909mdcreate_swap(struct md_ioctl *mdio, struct thread *td)
910{
911 int error;
912 struct md_s *sc;
913
914 GIANT_REQUIRED;
915
916 if (mdio->md_options & MD_AUTOUNIT) {
917 sc = mdnew(-1);
918 mdio->md_unit = sc->unit;
919 } else {
920 sc = mdnew(mdio->md_unit);
921 }
922 if (sc == NULL)
923 return (EBUSY);
924
925 sc->type = MD_SWAP;
926
927 /*
928 * Range check. Disallow negative sizes or any size less then the
929 * size of a page. Then round to a page.
930 */
931
932 if (mdio->md_size == 0) {
933 mddestroy(sc, td);
934 return (EDOM);
935 }
936
937 /*
938 * Allocate an OBJT_SWAP object.
939 *
940 * sc_secsize is PAGE_SIZE'd
941 *
942 * mdio->size is in DEV_BSIZE'd chunks.
943 * Note the truncation.
944 */
945
946 sc->secsize = PAGE_SIZE;
947 sc->nsect = mdio->md_size / (PAGE_SIZE / DEV_BSIZE);
948 sc->object = vm_pager_allocate(OBJT_SWAP, NULL, sc->secsize * (vm_offset_t)sc->nsect, VM_PROT_DEFAULT, 0);
949 sc->flags = mdio->md_options & MD_FORCE;
950 if (mdio->md_options & MD_RESERVE) {
951 if (swap_pager_reserve(sc->object, 0, sc->nsect) < 0) {
952 vm_pager_deallocate(sc->object);
953 sc->object = NULL;
954 mddestroy(sc, td);
955 return (EDOM);
956 }
957 }
958 error = mdsetcred(sc, td->td_ucred);
959 if (error)
960 mddestroy(sc, td);
961 else
962 mdinit(sc);
963 return (error);
964}
965
966static int
967mddetach(int unit, struct thread *td)
968{
969 struct md_s *sc;
970
971 sc = mdfind(unit);
972 if (sc == NULL)
973 return (ENOENT);
974 if (sc->opencount != 0 && !(sc->flags & MD_FORCE))
975 return (EBUSY);
976 switch(sc->type) {
977 case MD_VNODE:
978 case MD_SWAP:
979 case MD_MALLOC:
980 case MD_PRELOAD:
981 return (mddestroy(sc, td));
982 default:
983 return (EOPNOTSUPP);
984 }
985}
986
987static int
988mdctlioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
989{
990 struct md_ioctl *mdio;
991 struct md_s *sc;
992
993 if (md_debug)
994 printf("mdctlioctl(%s %lx %p %x %p)\n",
995 devtoname(dev), cmd, addr, flags, td);
996
997 /*
998 * We assert the version number in the individual ioctl
999 * handlers instead of out here because (a) it is possible we
1000 * may add another ioctl in the future which doesn't read an
1001 * mdio, and (b) the correct return value for an unknown ioctl
1002 * is ENOIOCTL, not EINVAL.
1003 */
1004 mdio = (struct md_ioctl *)addr;
1005 switch (cmd) {
1006 case MDIOCATTACH:
1007 if (mdio->md_version != MDIOVERSION)
1008 return (EINVAL);
1009 switch (mdio->md_type) {
1010 case MD_MALLOC:
1011 return (mdcreate_malloc(mdio));
1012 case MD_PRELOAD:
1013 return (mdcreate_preload(mdio));
1014 case MD_VNODE:
1015 return (mdcreate_vnode(mdio, td));
1016 case MD_SWAP:
1017 return (mdcreate_swap(mdio, td));
1018 default:
1019 return (EINVAL);
1020 }
1021 case MDIOCDETACH:
1022 if (mdio->md_version != MDIOVERSION)
1023 return (EINVAL);
1024 if (mdio->md_file != NULL || mdio->md_size != 0 ||
1025 mdio->md_options != 0)
1026 return (EINVAL);
1027 return (mddetach(mdio->md_unit, td));
1028 case MDIOCQUERY:
1029 if (mdio->md_version != MDIOVERSION)
1030 return (EINVAL);
1031 sc = mdfind(mdio->md_unit);
1032 if (sc == NULL)
1033 return (ENOENT);
1034 mdio->md_type = sc->type;
1035 mdio->md_options = sc->flags;
1036 switch (sc->type) {
1037 case MD_MALLOC:
1038 mdio->md_size = sc->nsect;
1039 break;
1040 case MD_PRELOAD:
1041 mdio->md_size = sc->nsect;
1042 (u_char *)(uintptr_t)mdio->md_base = sc->pl_ptr;
1043 break;
1044 case MD_SWAP:
1045 mdio->md_size = sc->nsect * (PAGE_SIZE / DEV_BSIZE);
1046 break;
1047 case MD_VNODE:
1048 mdio->md_size = sc->nsect;
1049 /* XXX fill this in */
1050 mdio->md_file = NULL;
1051 break;
1052 }
1053 return (0);
1054 default:
1055 return (ENOIOCTL);
1056 };
1057 return (ENOIOCTL);
1058}
1059
1060static void
1061md_preloaded(u_char *image, unsigned length)
1062{
1063 struct md_s *sc;
1064
1065 sc = mdnew(-1);
1066 if (sc == NULL)
1067 return;
1068 sc->type = MD_PRELOAD;
1069 sc->secsize = DEV_BSIZE;
1070 sc->nsect = length / DEV_BSIZE;
1071 sc->pl_ptr = image;
1072 sc->pl_len = length;
1073 if (sc->unit == 0)
1074 mdrootready = 1;
1075 mdinit(sc);
1076}
1077
1078static void
1079md_drvinit(void *unused)
1080{
1081
1082 caddr_t mod;
1083 caddr_t c;
1084 u_char *ptr, *name, *type;
1085 unsigned len;
1086
1087#ifdef MD_ROOT_SIZE
1088 md_preloaded(mfs_root, MD_ROOT_SIZE*1024);
1089#endif
1090 mod = NULL;
1091 while ((mod = preload_search_next_name(mod)) != NULL) {
1092 name = (char *)preload_search_info(mod, MODINFO_NAME);
1093 type = (char *)preload_search_info(mod, MODINFO_TYPE);
1094 if (name == NULL)
1095 continue;
1096 if (type == NULL)
1097 continue;
1098 if (strcmp(type, "md_image") && strcmp(type, "mfs_root"))
1099 continue;
1100 c = preload_search_info(mod, MODINFO_ADDR);
1101 ptr = *(u_char **)c;
1102 c = preload_search_info(mod, MODINFO_SIZE);
1103 len = *(unsigned *)c;
1104 printf("%s%d: Preloaded image <%s> %d bytes at %p\n",
1105 MD_NAME, mdunits, name, len, ptr);
1106 md_preloaded(ptr, len);
1107 }
1108 status_dev = make_dev(&mdctl_cdevsw, 0xffff00ff, UID_ROOT, GID_WHEEL,
1109 0600, MDCTL_NAME);
1110}
1111
1112static int
1113md_modevent(module_t mod, int type, void *data)
1114{
1115 int error;
1116 struct md_s *sc;
1117
1118 switch (type) {
1119 case MOD_LOAD:
1120 md_drvinit(NULL);
1121 break;
1122 case MOD_UNLOAD:
1123 LIST_FOREACH(sc, &md_softc_list, list) {
1124 error = mddetach(sc->unit, curthread);
1125 if (error != 0)
1126 return (error);
1127 }
1128 if (status_dev)
1129 destroy_dev(status_dev);
1130 status_dev = 0;
1131 break;
1132 default:
1133 break;
1134 }
1135 return (0);
1136}
1137
1138static moduledata_t md_mod = {
1139 MD_NAME,
1140 md_modevent,
1141 NULL
1142};
1143DECLARE_MODULE(md, md_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+CDEV_MAJOR);
1144MODULE_VERSION(md, MD_MODVER);
1145
1146
1147#ifdef MD_ROOT
1148static void
1149md_takeroot(void *junk)
1150{
1151 if (mdrootready)
1152 rootdevnames[0] = "ufs:/dev/md0c";
1153}
1154
1155SYSINIT(md_root, SI_SUB_MOUNT_ROOT, SI_ORDER_FIRST, md_takeroot, NULL);
1156#endif