Deleted Added
full compact
geom_ccd.c (103942) geom_ccd.c (108470)
1/* $FreeBSD: head/sys/geom/geom_ccd.c 103942 2002-09-25 02:42:43Z jeff $ */
1/* $FreeBSD: head/sys/geom/geom_ccd.c 108470 2002-12-30 21:18:15Z schweikh $ */
2
3/* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */
4
5/*
6 * Copyright (c) 1995 Jason R. Thorpe.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project
20 * by Jason R. Thorpe.
21 * 4. The name of the author may not be used to endorse or promote products
22 * derived from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37/*
38 * Copyright (c) 1988 University of Utah.
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. All advertising materials mentioning features or use of this software
55 * must display the following acknowledgement:
56 * This product includes software developed by the University of
57 * California, Berkeley and its contributors.
58 * 4. Neither the name of the University nor the names of its contributors
59 * may be used to endorse or promote products derived from this software
60 * without specific prior written permission.
61 *
62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72 * SUCH DAMAGE.
73 *
74 * from: Utah $Hdr: cd.c 1.6 90/11/28$
75 *
76 * @(#)cd.c 8.2 (Berkeley) 11/16/93
77 */
78
79/*
80 * "Concatenated" disk driver.
81 *
82 * Dynamic configuration and disklabel support by:
83 * Jason R. Thorpe <thorpej@nas.nasa.gov>
84 * Numerical Aerodynamic Simulation Facility
85 * Mail Stop 258-6
86 * NASA Ames Research Center
87 * Moffett Field, CA 94035
88 */
89
90#include <sys/param.h>
91#include <sys/systm.h>
92#include <sys/kernel.h>
93#include <sys/module.h>
94#include <sys/proc.h>
95#include <sys/bio.h>
96#include <sys/malloc.h>
97#include <sys/namei.h>
98#include <sys/conf.h>
99#include <sys/stat.h>
100#include <sys/stdint.h>
101#include <sys/sysctl.h>
102#include <sys/disk.h>
103#include <sys/disklabel.h>
104#include <sys/devicestat.h>
105#include <sys/fcntl.h>
106#include <sys/vnode.h>
107
108#include <sys/ccdvar.h>
109
110MALLOC_DEFINE(M_CCD, "CCD driver", "Concatenated Disk driver");
111
112#if defined(CCDDEBUG) && !defined(DEBUG)
113#define DEBUG
114#endif
115
116#ifdef DEBUG
117#define CCDB_FOLLOW 0x01
118#define CCDB_INIT 0x02
119#define CCDB_IO 0x04
120#define CCDB_LABEL 0x08
121#define CCDB_VNODE 0x10
122static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
123 CCDB_VNODE;
124SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
125#endif
126
127#define ccdunit(x) dkunit(x)
128#define ccdpart(x) dkpart(x)
129
130/*
131 This is how mirroring works (only writes are special):
132
133 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
134 linked together by the cb_mirror field. "cb_pflags &
135 CCDPF_MIRROR_DONE" is set to 0 on both of them.
136
137 When a component returns to ccdiodone(), it checks if "cb_pflags &
138 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's
139 flag and returns. If it is, it means its partner has already
140 returned, so it will go to the regular cleanup.
141
142 */
143
144struct ccdbuf {
145 struct bio cb_buf; /* new I/O buf */
146 struct bio *cb_obp; /* ptr. to original I/O buf */
147 struct ccdbuf *cb_freenext; /* free list link */
148 int cb_unit; /* target unit */
149 int cb_comp; /* target component */
150 int cb_pflags; /* mirror/parity status flag */
151 struct ccdbuf *cb_mirror; /* mirror counterpart */
152};
153
154/* bits in cb_pflags */
155#define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */
156
157#define CCDLABELDEV(dev) \
158 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
159
160/* convinient macros for often-used statements */
161#define IS_ALLOCATED(unit) (ccdfind(unit) != NULL)
162#define IS_INITED(cs) (((cs)->sc_flags & CCDF_INITED) != 0)
163
164static d_open_t ccdopen;
165static d_close_t ccdclose;
166static d_strategy_t ccdstrategy;
167static d_ioctl_t ccdioctl;
168static d_psize_t ccdsize;
169
170#define NCCDFREEHIWAT 16
171
172#define CDEV_MAJOR 74
173
174static struct cdevsw ccd_cdevsw = {
175 /* open */ ccdopen,
176 /* close */ ccdclose,
177 /* read */ physread,
178 /* write */ physwrite,
179 /* ioctl */ ccdioctl,
180 /* poll */ nopoll,
181 /* mmap */ nommap,
182 /* strategy */ ccdstrategy,
183 /* name */ "ccd",
184 /* maj */ CDEV_MAJOR,
185 /* dump */ nodump,
186 /* psize */ ccdsize,
187 /* flags */ D_DISK,
188};
189static LIST_HEAD(, ccd_s) ccd_softc_list = LIST_HEAD_INITIALIZER(&ccd_softc_list);
190
191static struct ccd_s *ccdfind(int);
192static struct ccd_s *ccdnew(int);
193static int ccddestroy(struct ccd_s *, struct proc *);
194
195/* called during module initialization */
196static void ccdattach(void);
197static int ccd_modevent(module_t, int, void *);
198
199/* called by biodone() at interrupt time */
200static void ccdiodone(struct bio *bp);
201
202static void ccdstart(struct ccd_s *, struct bio *);
203static void ccdinterleave(struct ccd_s *, int);
204static void ccdintr(struct ccd_s *, struct bio *);
205static int ccdinit(struct ccd_s *, char **, struct thread *);
206static int ccdlookup(char *, struct thread *p, struct vnode **);
207static void ccdbuffer(struct ccdbuf **ret, struct ccd_s *,
208 struct bio *, daddr_t, caddr_t, long);
209static void ccdgetdisklabel(dev_t);
210static void ccdmakedisklabel(struct ccd_s *);
211static int ccdlock(struct ccd_s *);
212static void ccdunlock(struct ccd_s *);
213
214#ifdef DEBUG
215static void printiinfo(struct ccdiinfo *);
216#endif
217
218/* Non-private for the benefit of libkvm. */
219struct ccdbuf *ccdfreebufs;
220static int numccdfreebufs;
221
222/*
223 * getccdbuf() - Allocate and zero a ccd buffer.
224 *
225 * This routine is called at splbio().
226 */
227
228static __inline
229struct ccdbuf *
230getccdbuf(struct ccdbuf *cpy)
231{
232 struct ccdbuf *cbp;
233
234 /*
235 * Allocate from freelist or malloc as necessary
236 */
237 if ((cbp = ccdfreebufs) != NULL) {
238 ccdfreebufs = cbp->cb_freenext;
239 --numccdfreebufs;
240 } else {
241 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK);
242 }
243
244 /*
245 * Used by mirroring code
246 */
247 if (cpy)
248 bcopy(cpy, cbp, sizeof(struct ccdbuf));
249 else
250 bzero(cbp, sizeof(struct ccdbuf));
251
252 /*
253 * independant struct bio initialization
254 */
255
256 return(cbp);
257}
258
259/*
260 * putccdbuf() - Free a ccd buffer.
261 *
262 * This routine is called at splbio().
263 */
264
265static __inline
266void
267putccdbuf(struct ccdbuf *cbp)
268{
269
270 if (numccdfreebufs < NCCDFREEHIWAT) {
271 cbp->cb_freenext = ccdfreebufs;
272 ccdfreebufs = cbp;
273 ++numccdfreebufs;
274 } else {
275 free((caddr_t)cbp, M_DEVBUF);
276 }
277}
278
279
280/*
281 * Number of blocks to untouched in front of a component partition.
282 * This is to avoid violating its disklabel area when it starts at the
283 * beginning of the slice.
284 */
285#if !defined(CCD_OFFSET)
286#define CCD_OFFSET 16
287#endif
288
289static struct ccd_s *
290ccdfind(int unit)
291{
292 struct ccd_s *sc = NULL;
293
294 /* XXX: LOCK(unique unit numbers) */
295 LIST_FOREACH(sc, &ccd_softc_list, list) {
296 if (sc->sc_unit == unit)
297 break;
298 }
299 /* XXX: UNLOCK(unique unit numbers) */
300 return ((sc == NULL) || (sc->sc_unit != unit) ? NULL : sc);
301}
302
303static struct ccd_s *
304ccdnew(int unit)
305{
306 struct ccd_s *sc;
307
308 /* XXX: LOCK(unique unit numbers) */
309 if (IS_ALLOCATED(unit) || unit > DKMAXUNIT)
310 return (NULL);
311
312 MALLOC(sc, struct ccd_s *, sizeof(*sc), M_CCD, M_WAITOK | M_ZERO);
313 sc->sc_unit = unit;
314 LIST_INSERT_HEAD(&ccd_softc_list, sc, list);
315 /* XXX: UNLOCK(unique unit numbers) */
316 return (sc);
317}
318
319static int
320ccddestroy(struct ccd_s *sc, struct proc *p)
321{
322
323 /* XXX: LOCK(unique unit numbers) */
324 LIST_REMOVE(sc, list);
325 /* XXX: UNLOCK(unique unit numbers) */
326 FREE(sc, M_CCD);
327 return (0);
328}
329
330static void
331ccd_clone(void *arg, char *name, int namelen, dev_t *dev)
332{
333 int i, u;
334 char *s;
335
336 if (*dev != NODEV)
337 return;
338 i = dev_stdclone(name, &s, "ccd", &u);
339 if (i != 2)
340 return;
341 if (*s < 'a' || *s > 'h')
342 return;
343 if (s[1] != '\0')
344 return;
345 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a',
346 UID_ROOT, GID_OPERATOR, 0640, name);
347}
348
349/*
350 * Called by main() during pseudo-device attachment. All we need
351 * to do is to add devsw entries.
352 */
353static void
354ccdattach()
355{
356
357 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000);
358}
359
360static int
361ccd_modevent(module_t mod, int type, void *data)
362{
363 int error = 0;
364
365 switch (type) {
366 case MOD_LOAD:
367 ccdattach();
368 break;
369
370 case MOD_UNLOAD:
371 printf("ccd0: Unload not supported!\n");
372 error = EOPNOTSUPP;
373 break;
374
375 case MOD_SHUTDOWN:
376 break;
377
378 default:
379 error = EOPNOTSUPP;
380 }
381 return (error);
382}
383
384DEV_MODULE(ccd, ccd_modevent, NULL);
385
386static int
387ccdinit(struct ccd_s *cs, char **cpaths, struct thread *td)
388{
389 struct ccdcinfo *ci = NULL; /* XXX */
390 size_t size;
391 int ix;
392 struct vnode *vp;
393 size_t minsize;
394 int maxsecsize;
395 struct ccdgeom *ccg = &cs->sc_geom;
396 char *tmppath = NULL;
397 int error = 0;
398 off_t mediasize;
399 u_int sectorsize;
400
401#ifdef DEBUG
402 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
403 printf("ccdinit: unit %d\n", cs->sc_unit);
404#endif
405
406 cs->sc_size = 0;
407
408 /* Allocate space for the component info. */
409 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
410 M_DEVBUF, M_WAITOK);
411
412 /*
413 * Verify that each component piece exists and record
414 * relevant information about it.
415 */
416 maxsecsize = 0;
417 minsize = 0;
418 tmppath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK);
419 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
420 vp = cs->sc_vpp[ix];
421 ci = &cs->sc_cinfo[ix];
422 ci->ci_vp = vp;
423
424 /*
425 * Copy in the pathname of the component.
426 */
427 if ((error = copyinstr(cpaths[ix], tmppath,
428 MAXPATHLEN, &ci->ci_pathlen)) != 0) {
429#ifdef DEBUG
430 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
431 printf("ccd%d: can't copy path, error = %d\n",
432 cs->sc_unit, error);
433#endif
434 goto fail;
435 }
436 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
437 bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
438
439 ci->ci_dev = vn_todev(vp);
440
441 /*
442 * Get partition information for the component.
443 */
444 error = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize,
445 FREAD, td->td_ucred, td);
446 if (error != 0) {
447#ifdef DEBUG
448 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
449 printf("ccd%d: %s: ioctl failed, error = %d\n",
450 cs->sc_unit, ci->ci_path, error);
451#endif
452 goto fail;
453 }
454 /*
455 * Get partition information for the component.
456 */
457 error = VOP_IOCTL(vp, DIOCGSECTORSIZE, (caddr_t)&sectorsize,
458 FREAD, td->td_ucred, td);
459 if (error != 0) {
460#ifdef DEBUG
461 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
462 printf("ccd%d: %s: ioctl failed, error = %d\n",
463 cs->sc_unit, ci->ci_path, error);
464#endif
465 goto fail;
466 }
467 if (sectorsize > maxsecsize)
468 maxsecsize = sectorsize;
469 size = mediasize / DEV_BSIZE - CCD_OFFSET;
470
471 /*
472 * Calculate the size, truncating to an interleave
473 * boundary if necessary.
474 */
475
476 if (cs->sc_ileave > 1)
477 size -= size % cs->sc_ileave;
478
479 if (size == 0) {
480#ifdef DEBUG
481 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
482 printf("ccd%d: %s: size == 0\n",
483 cs->sc_unit, ci->ci_path);
484#endif
485 error = ENODEV;
486 goto fail;
487 }
488
489 if (minsize == 0 || size < minsize)
490 minsize = size;
491 ci->ci_size = size;
492 cs->sc_size += size;
493 }
494
495 free(tmppath, M_DEVBUF);
496 tmppath = NULL;
497
498 /*
499 * Don't allow the interleave to be smaller than
500 * the biggest component sector.
501 */
502 if ((cs->sc_ileave > 0) &&
503 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
504#ifdef DEBUG
505 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
506 printf("ccd%d: interleave must be at least %d\n",
507 cs->sc_unit, (maxsecsize / DEV_BSIZE));
508#endif
509 error = EINVAL;
510 goto fail;
511 }
512
513 /*
514 * If uniform interleave is desired set all sizes to that of
515 * the smallest component. This will guarentee that a single
516 * interleave table is generated.
517 *
518 * Lost space must be taken into account when calculating the
519 * overall size. Half the space is lost when CCDF_MIRROR is
520 * specified. One disk is lost when CCDF_PARITY is specified.
521 */
522 if (cs->sc_flags & CCDF_UNIFORM) {
523 for (ci = cs->sc_cinfo;
524 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
525 ci->ci_size = minsize;
526 }
527 if (cs->sc_flags & CCDF_MIRROR) {
528 /*
529 * Check to see if an even number of components
530 * have been specified. The interleave must also
531 * be non-zero in order for us to be able to
532 * guarentee the topology.
533 */
534 if (cs->sc_nccdisks % 2) {
535 printf("ccd%d: mirroring requires an even number of disks\n", cs->sc_unit );
536 error = EINVAL;
537 goto fail;
538 }
539 if (cs->sc_ileave == 0) {
540 printf("ccd%d: an interleave must be specified when mirroring\n", cs->sc_unit);
541 error = EINVAL;
542 goto fail;
543 }
544 cs->sc_size = (cs->sc_nccdisks/2) * minsize;
545 } else if (cs->sc_flags & CCDF_PARITY) {
546 cs->sc_size = (cs->sc_nccdisks-1) * minsize;
547 } else {
548 if (cs->sc_ileave == 0) {
549 printf("ccd%d: an interleave must be specified when using parity\n", cs->sc_unit);
550 error = EINVAL;
551 goto fail;
552 }
553 cs->sc_size = cs->sc_nccdisks * minsize;
554 }
555 }
556
557 /*
558 * Construct the interleave table.
559 */
560 ccdinterleave(cs, cs->sc_unit);
561
562 /*
563 * Create pseudo-geometry based on 1MB cylinders. It's
564 * pretty close.
565 */
566 ccg->ccg_secsize = maxsecsize;
567 ccg->ccg_ntracks = 1;
568 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
569 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
570
571 /*
2
3/* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */
4
5/*
6 * Copyright (c) 1995 Jason R. Thorpe.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project
20 * by Jason R. Thorpe.
21 * 4. The name of the author may not be used to endorse or promote products
22 * derived from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37/*
38 * Copyright (c) 1988 University of Utah.
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
44 * Science Department.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. All advertising materials mentioning features or use of this software
55 * must display the following acknowledgement:
56 * This product includes software developed by the University of
57 * California, Berkeley and its contributors.
58 * 4. Neither the name of the University nor the names of its contributors
59 * may be used to endorse or promote products derived from this software
60 * without specific prior written permission.
61 *
62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72 * SUCH DAMAGE.
73 *
74 * from: Utah $Hdr: cd.c 1.6 90/11/28$
75 *
76 * @(#)cd.c 8.2 (Berkeley) 11/16/93
77 */
78
79/*
80 * "Concatenated" disk driver.
81 *
82 * Dynamic configuration and disklabel support by:
83 * Jason R. Thorpe <thorpej@nas.nasa.gov>
84 * Numerical Aerodynamic Simulation Facility
85 * Mail Stop 258-6
86 * NASA Ames Research Center
87 * Moffett Field, CA 94035
88 */
89
90#include <sys/param.h>
91#include <sys/systm.h>
92#include <sys/kernel.h>
93#include <sys/module.h>
94#include <sys/proc.h>
95#include <sys/bio.h>
96#include <sys/malloc.h>
97#include <sys/namei.h>
98#include <sys/conf.h>
99#include <sys/stat.h>
100#include <sys/stdint.h>
101#include <sys/sysctl.h>
102#include <sys/disk.h>
103#include <sys/disklabel.h>
104#include <sys/devicestat.h>
105#include <sys/fcntl.h>
106#include <sys/vnode.h>
107
108#include <sys/ccdvar.h>
109
110MALLOC_DEFINE(M_CCD, "CCD driver", "Concatenated Disk driver");
111
112#if defined(CCDDEBUG) && !defined(DEBUG)
113#define DEBUG
114#endif
115
116#ifdef DEBUG
117#define CCDB_FOLLOW 0x01
118#define CCDB_INIT 0x02
119#define CCDB_IO 0x04
120#define CCDB_LABEL 0x08
121#define CCDB_VNODE 0x10
122static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
123 CCDB_VNODE;
124SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
125#endif
126
127#define ccdunit(x) dkunit(x)
128#define ccdpart(x) dkpart(x)
129
130/*
131 This is how mirroring works (only writes are special):
132
133 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
134 linked together by the cb_mirror field. "cb_pflags &
135 CCDPF_MIRROR_DONE" is set to 0 on both of them.
136
137 When a component returns to ccdiodone(), it checks if "cb_pflags &
138 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's
139 flag and returns. If it is, it means its partner has already
140 returned, so it will go to the regular cleanup.
141
142 */
143
144struct ccdbuf {
145 struct bio cb_buf; /* new I/O buf */
146 struct bio *cb_obp; /* ptr. to original I/O buf */
147 struct ccdbuf *cb_freenext; /* free list link */
148 int cb_unit; /* target unit */
149 int cb_comp; /* target component */
150 int cb_pflags; /* mirror/parity status flag */
151 struct ccdbuf *cb_mirror; /* mirror counterpart */
152};
153
154/* bits in cb_pflags */
155#define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */
156
157#define CCDLABELDEV(dev) \
158 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
159
160/* convinient macros for often-used statements */
161#define IS_ALLOCATED(unit) (ccdfind(unit) != NULL)
162#define IS_INITED(cs) (((cs)->sc_flags & CCDF_INITED) != 0)
163
164static d_open_t ccdopen;
165static d_close_t ccdclose;
166static d_strategy_t ccdstrategy;
167static d_ioctl_t ccdioctl;
168static d_psize_t ccdsize;
169
170#define NCCDFREEHIWAT 16
171
172#define CDEV_MAJOR 74
173
174static struct cdevsw ccd_cdevsw = {
175 /* open */ ccdopen,
176 /* close */ ccdclose,
177 /* read */ physread,
178 /* write */ physwrite,
179 /* ioctl */ ccdioctl,
180 /* poll */ nopoll,
181 /* mmap */ nommap,
182 /* strategy */ ccdstrategy,
183 /* name */ "ccd",
184 /* maj */ CDEV_MAJOR,
185 /* dump */ nodump,
186 /* psize */ ccdsize,
187 /* flags */ D_DISK,
188};
189static LIST_HEAD(, ccd_s) ccd_softc_list = LIST_HEAD_INITIALIZER(&ccd_softc_list);
190
191static struct ccd_s *ccdfind(int);
192static struct ccd_s *ccdnew(int);
193static int ccddestroy(struct ccd_s *, struct proc *);
194
195/* called during module initialization */
196static void ccdattach(void);
197static int ccd_modevent(module_t, int, void *);
198
199/* called by biodone() at interrupt time */
200static void ccdiodone(struct bio *bp);
201
202static void ccdstart(struct ccd_s *, struct bio *);
203static void ccdinterleave(struct ccd_s *, int);
204static void ccdintr(struct ccd_s *, struct bio *);
205static int ccdinit(struct ccd_s *, char **, struct thread *);
206static int ccdlookup(char *, struct thread *p, struct vnode **);
207static void ccdbuffer(struct ccdbuf **ret, struct ccd_s *,
208 struct bio *, daddr_t, caddr_t, long);
209static void ccdgetdisklabel(dev_t);
210static void ccdmakedisklabel(struct ccd_s *);
211static int ccdlock(struct ccd_s *);
212static void ccdunlock(struct ccd_s *);
213
214#ifdef DEBUG
215static void printiinfo(struct ccdiinfo *);
216#endif
217
218/* Non-private for the benefit of libkvm. */
219struct ccdbuf *ccdfreebufs;
220static int numccdfreebufs;
221
222/*
223 * getccdbuf() - Allocate and zero a ccd buffer.
224 *
225 * This routine is called at splbio().
226 */
227
228static __inline
229struct ccdbuf *
230getccdbuf(struct ccdbuf *cpy)
231{
232 struct ccdbuf *cbp;
233
234 /*
235 * Allocate from freelist or malloc as necessary
236 */
237 if ((cbp = ccdfreebufs) != NULL) {
238 ccdfreebufs = cbp->cb_freenext;
239 --numccdfreebufs;
240 } else {
241 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK);
242 }
243
244 /*
245 * Used by mirroring code
246 */
247 if (cpy)
248 bcopy(cpy, cbp, sizeof(struct ccdbuf));
249 else
250 bzero(cbp, sizeof(struct ccdbuf));
251
252 /*
253 * independant struct bio initialization
254 */
255
256 return(cbp);
257}
258
259/*
260 * putccdbuf() - Free a ccd buffer.
261 *
262 * This routine is called at splbio().
263 */
264
265static __inline
266void
267putccdbuf(struct ccdbuf *cbp)
268{
269
270 if (numccdfreebufs < NCCDFREEHIWAT) {
271 cbp->cb_freenext = ccdfreebufs;
272 ccdfreebufs = cbp;
273 ++numccdfreebufs;
274 } else {
275 free((caddr_t)cbp, M_DEVBUF);
276 }
277}
278
279
280/*
281 * Number of blocks to untouched in front of a component partition.
282 * This is to avoid violating its disklabel area when it starts at the
283 * beginning of the slice.
284 */
285#if !defined(CCD_OFFSET)
286#define CCD_OFFSET 16
287#endif
288
289static struct ccd_s *
290ccdfind(int unit)
291{
292 struct ccd_s *sc = NULL;
293
294 /* XXX: LOCK(unique unit numbers) */
295 LIST_FOREACH(sc, &ccd_softc_list, list) {
296 if (sc->sc_unit == unit)
297 break;
298 }
299 /* XXX: UNLOCK(unique unit numbers) */
300 return ((sc == NULL) || (sc->sc_unit != unit) ? NULL : sc);
301}
302
303static struct ccd_s *
304ccdnew(int unit)
305{
306 struct ccd_s *sc;
307
308 /* XXX: LOCK(unique unit numbers) */
309 if (IS_ALLOCATED(unit) || unit > DKMAXUNIT)
310 return (NULL);
311
312 MALLOC(sc, struct ccd_s *, sizeof(*sc), M_CCD, M_WAITOK | M_ZERO);
313 sc->sc_unit = unit;
314 LIST_INSERT_HEAD(&ccd_softc_list, sc, list);
315 /* XXX: UNLOCK(unique unit numbers) */
316 return (sc);
317}
318
319static int
320ccddestroy(struct ccd_s *sc, struct proc *p)
321{
322
323 /* XXX: LOCK(unique unit numbers) */
324 LIST_REMOVE(sc, list);
325 /* XXX: UNLOCK(unique unit numbers) */
326 FREE(sc, M_CCD);
327 return (0);
328}
329
330static void
331ccd_clone(void *arg, char *name, int namelen, dev_t *dev)
332{
333 int i, u;
334 char *s;
335
336 if (*dev != NODEV)
337 return;
338 i = dev_stdclone(name, &s, "ccd", &u);
339 if (i != 2)
340 return;
341 if (*s < 'a' || *s > 'h')
342 return;
343 if (s[1] != '\0')
344 return;
345 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a',
346 UID_ROOT, GID_OPERATOR, 0640, name);
347}
348
349/*
350 * Called by main() during pseudo-device attachment. All we need
351 * to do is to add devsw entries.
352 */
353static void
354ccdattach()
355{
356
357 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000);
358}
359
360static int
361ccd_modevent(module_t mod, int type, void *data)
362{
363 int error = 0;
364
365 switch (type) {
366 case MOD_LOAD:
367 ccdattach();
368 break;
369
370 case MOD_UNLOAD:
371 printf("ccd0: Unload not supported!\n");
372 error = EOPNOTSUPP;
373 break;
374
375 case MOD_SHUTDOWN:
376 break;
377
378 default:
379 error = EOPNOTSUPP;
380 }
381 return (error);
382}
383
384DEV_MODULE(ccd, ccd_modevent, NULL);
385
386static int
387ccdinit(struct ccd_s *cs, char **cpaths, struct thread *td)
388{
389 struct ccdcinfo *ci = NULL; /* XXX */
390 size_t size;
391 int ix;
392 struct vnode *vp;
393 size_t minsize;
394 int maxsecsize;
395 struct ccdgeom *ccg = &cs->sc_geom;
396 char *tmppath = NULL;
397 int error = 0;
398 off_t mediasize;
399 u_int sectorsize;
400
401#ifdef DEBUG
402 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
403 printf("ccdinit: unit %d\n", cs->sc_unit);
404#endif
405
406 cs->sc_size = 0;
407
408 /* Allocate space for the component info. */
409 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
410 M_DEVBUF, M_WAITOK);
411
412 /*
413 * Verify that each component piece exists and record
414 * relevant information about it.
415 */
416 maxsecsize = 0;
417 minsize = 0;
418 tmppath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK);
419 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
420 vp = cs->sc_vpp[ix];
421 ci = &cs->sc_cinfo[ix];
422 ci->ci_vp = vp;
423
424 /*
425 * Copy in the pathname of the component.
426 */
427 if ((error = copyinstr(cpaths[ix], tmppath,
428 MAXPATHLEN, &ci->ci_pathlen)) != 0) {
429#ifdef DEBUG
430 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
431 printf("ccd%d: can't copy path, error = %d\n",
432 cs->sc_unit, error);
433#endif
434 goto fail;
435 }
436 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
437 bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
438
439 ci->ci_dev = vn_todev(vp);
440
441 /*
442 * Get partition information for the component.
443 */
444 error = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize,
445 FREAD, td->td_ucred, td);
446 if (error != 0) {
447#ifdef DEBUG
448 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
449 printf("ccd%d: %s: ioctl failed, error = %d\n",
450 cs->sc_unit, ci->ci_path, error);
451#endif
452 goto fail;
453 }
454 /*
455 * Get partition information for the component.
456 */
457 error = VOP_IOCTL(vp, DIOCGSECTORSIZE, (caddr_t)&sectorsize,
458 FREAD, td->td_ucred, td);
459 if (error != 0) {
460#ifdef DEBUG
461 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
462 printf("ccd%d: %s: ioctl failed, error = %d\n",
463 cs->sc_unit, ci->ci_path, error);
464#endif
465 goto fail;
466 }
467 if (sectorsize > maxsecsize)
468 maxsecsize = sectorsize;
469 size = mediasize / DEV_BSIZE - CCD_OFFSET;
470
471 /*
472 * Calculate the size, truncating to an interleave
473 * boundary if necessary.
474 */
475
476 if (cs->sc_ileave > 1)
477 size -= size % cs->sc_ileave;
478
479 if (size == 0) {
480#ifdef DEBUG
481 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
482 printf("ccd%d: %s: size == 0\n",
483 cs->sc_unit, ci->ci_path);
484#endif
485 error = ENODEV;
486 goto fail;
487 }
488
489 if (minsize == 0 || size < minsize)
490 minsize = size;
491 ci->ci_size = size;
492 cs->sc_size += size;
493 }
494
495 free(tmppath, M_DEVBUF);
496 tmppath = NULL;
497
498 /*
499 * Don't allow the interleave to be smaller than
500 * the biggest component sector.
501 */
502 if ((cs->sc_ileave > 0) &&
503 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
504#ifdef DEBUG
505 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
506 printf("ccd%d: interleave must be at least %d\n",
507 cs->sc_unit, (maxsecsize / DEV_BSIZE));
508#endif
509 error = EINVAL;
510 goto fail;
511 }
512
513 /*
514 * If uniform interleave is desired set all sizes to that of
515 * the smallest component. This will guarentee that a single
516 * interleave table is generated.
517 *
518 * Lost space must be taken into account when calculating the
519 * overall size. Half the space is lost when CCDF_MIRROR is
520 * specified. One disk is lost when CCDF_PARITY is specified.
521 */
522 if (cs->sc_flags & CCDF_UNIFORM) {
523 for (ci = cs->sc_cinfo;
524 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
525 ci->ci_size = minsize;
526 }
527 if (cs->sc_flags & CCDF_MIRROR) {
528 /*
529 * Check to see if an even number of components
530 * have been specified. The interleave must also
531 * be non-zero in order for us to be able to
532 * guarentee the topology.
533 */
534 if (cs->sc_nccdisks % 2) {
535 printf("ccd%d: mirroring requires an even number of disks\n", cs->sc_unit );
536 error = EINVAL;
537 goto fail;
538 }
539 if (cs->sc_ileave == 0) {
540 printf("ccd%d: an interleave must be specified when mirroring\n", cs->sc_unit);
541 error = EINVAL;
542 goto fail;
543 }
544 cs->sc_size = (cs->sc_nccdisks/2) * minsize;
545 } else if (cs->sc_flags & CCDF_PARITY) {
546 cs->sc_size = (cs->sc_nccdisks-1) * minsize;
547 } else {
548 if (cs->sc_ileave == 0) {
549 printf("ccd%d: an interleave must be specified when using parity\n", cs->sc_unit);
550 error = EINVAL;
551 goto fail;
552 }
553 cs->sc_size = cs->sc_nccdisks * minsize;
554 }
555 }
556
557 /*
558 * Construct the interleave table.
559 */
560 ccdinterleave(cs, cs->sc_unit);
561
562 /*
563 * Create pseudo-geometry based on 1MB cylinders. It's
564 * pretty close.
565 */
566 ccg->ccg_secsize = maxsecsize;
567 ccg->ccg_ntracks = 1;
568 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
569 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
570
571 /*
572 * Add an devstat entry for this device.
572 * Add a devstat entry for this device.
573 */
574 devstat_add_entry(&cs->device_stats, "ccd", cs->sc_unit,
575 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
576 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
577 DEVSTAT_PRIORITY_ARRAY);
578
579 cs->sc_flags |= CCDF_INITED;
580 cs->sc_cflags = cs->sc_flags; /* So we can find out later... */
581 return (0);
582fail:
583 while (ci > cs->sc_cinfo) {
584 ci--;
585 free(ci->ci_path, M_DEVBUF);
586 }
587 if (tmppath != NULL)
588 free(tmppath, M_DEVBUF);
589 free(cs->sc_cinfo, M_DEVBUF);
590 return (error);
591}
592
593static void
594ccdinterleave(struct ccd_s *cs, int unit)
595{
596 struct ccdcinfo *ci, *smallci;
597 struct ccdiinfo *ii;
598 daddr_t bn, lbn;
599 int ix;
600 u_long size;
601
602#ifdef DEBUG
603 if (ccddebug & CCDB_INIT)
604 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave);
605#endif
606
607 /*
608 * Allocate an interleave table. The worst case occurs when each
609 * of N disks is of a different size, resulting in N interleave
610 * tables.
611 *
612 * Chances are this is too big, but we don't care.
613 */
614 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
615 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF,
616 M_WAITOK | M_ZERO);
617
618 /*
619 * Trivial case: no interleave (actually interleave of disk size).
620 * Each table entry represents a single component in its entirety.
621 *
622 * An interleave of 0 may not be used with a mirror or parity setup.
623 */
624 if (cs->sc_ileave == 0) {
625 bn = 0;
626 ii = cs->sc_itable;
627
628 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
629 /* Allocate space for ii_index. */
630 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
631 ii->ii_ndisk = 1;
632 ii->ii_startblk = bn;
633 ii->ii_startoff = 0;
634 ii->ii_index[0] = ix;
635 bn += cs->sc_cinfo[ix].ci_size;
636 ii++;
637 }
638 ii->ii_ndisk = 0;
639#ifdef DEBUG
640 if (ccddebug & CCDB_INIT)
641 printiinfo(cs->sc_itable);
642#endif
643 return;
644 }
645
646 /*
647 * The following isn't fast or pretty; it doesn't have to be.
648 */
649 size = 0;
650 bn = lbn = 0;
651 for (ii = cs->sc_itable; ; ii++) {
652 /*
653 * Allocate space for ii_index. We might allocate more then
654 * we use.
655 */
656 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
657 M_DEVBUF, M_WAITOK);
658
659 /*
660 * Locate the smallest of the remaining components
661 */
662 smallci = NULL;
663 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
664 ci++) {
665 if (ci->ci_size > size &&
666 (smallci == NULL ||
667 ci->ci_size < smallci->ci_size)) {
668 smallci = ci;
669 }
670 }
671
672 /*
673 * Nobody left, all done
674 */
675 if (smallci == NULL) {
676 ii->ii_ndisk = 0;
677 break;
678 }
679
680 /*
681 * Record starting logical block using an sc_ileave blocksize.
682 */
683 ii->ii_startblk = bn / cs->sc_ileave;
684
685 /*
686 * Record starting comopnent block using an sc_ileave
687 * blocksize. This value is relative to the beginning of
688 * a component disk.
689 */
690 ii->ii_startoff = lbn;
691
692 /*
693 * Determine how many disks take part in this interleave
694 * and record their indices.
695 */
696 ix = 0;
697 for (ci = cs->sc_cinfo;
698 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
699 if (ci->ci_size >= smallci->ci_size) {
700 ii->ii_index[ix++] = ci - cs->sc_cinfo;
701 }
702 }
703 ii->ii_ndisk = ix;
704 bn += ix * (smallci->ci_size - size);
705 lbn = smallci->ci_size / cs->sc_ileave;
706 size = smallci->ci_size;
707 }
708#ifdef DEBUG
709 if (ccddebug & CCDB_INIT)
710 printiinfo(cs->sc_itable);
711#endif
712}
713
714/* ARGSUSED */
715static int
716ccdopen(dev_t dev, int flags, int fmt, struct thread *td)
717{
718 int unit = ccdunit(dev);
719 struct ccd_s *cs;
720 struct disklabel *lp;
721 int error = 0, part, pmask;
722
723#ifdef DEBUG
724 if (ccddebug & CCDB_FOLLOW)
725 printf("ccdopen(%p, %x)\n", dev, flags);
726#endif
727
728 cs = IS_ALLOCATED(unit) ? ccdfind(unit) : ccdnew(unit);
729
730 if ((error = ccdlock(cs)) != 0)
731 return (error);
732
733 lp = &cs->sc_label;
734
735 part = ccdpart(dev);
736 pmask = (1 << part);
737
738 /*
739 * If we're initialized, check to see if there are any other
740 * open partitions. If not, then it's safe to update
741 * the in-core disklabel.
742 */
743 if (IS_INITED(cs) && (cs->sc_openmask == 0))
744 ccdgetdisklabel(dev);
745
746 /* Check that the partition exists. */
747 if (part != RAW_PART && ((part >= lp->d_npartitions) ||
748 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
749 error = ENXIO;
750 goto done;
751 }
752
753 cs->sc_openmask |= pmask;
754 done:
755 ccdunlock(cs);
756 return (0);
757}
758
759/* ARGSUSED */
760static int
761ccdclose(dev_t dev, int flags, int fmt, struct thread *td)
762{
763 int unit = ccdunit(dev);
764 struct ccd_s *cs;
765 int error = 0, part;
766
767#ifdef DEBUG
768 if (ccddebug & CCDB_FOLLOW)
769 printf("ccdclose(%p, %x)\n", dev, flags);
770#endif
771
772 if (!IS_ALLOCATED(unit))
773 return (ENXIO);
774 cs = ccdfind(unit);
775
776 if ((error = ccdlock(cs)) != 0)
777 return (error);
778
779 part = ccdpart(dev);
780
781 /* ...that much closer to allowing unconfiguration... */
782 cs->sc_openmask &= ~(1 << part);
783 /* collect "garbage" if possible */
784 if (!IS_INITED(cs) && (cs->sc_flags & CCDF_WANTED) == 0)
785 ccddestroy(cs, td->td_proc);
786 else
787 ccdunlock(cs);
788 return (0);
789}
790
791static void
792ccdstrategy(struct bio *bp)
793{
794 int unit = ccdunit(bp->bio_dev);
795 struct ccd_s *cs = ccdfind(unit);
796 int s;
797 int wlabel;
798 struct disklabel *lp;
799
800#ifdef DEBUG
801 if (ccddebug & CCDB_FOLLOW)
802 printf("ccdstrategy(%p): unit %d\n", bp, unit);
803#endif
804 if (!IS_INITED(cs)) {
805 biofinish(bp, NULL, ENXIO);
806 return;
807 }
808
809 /* If it's a nil transfer, wake up the top half now. */
810 if (bp->bio_bcount == 0) {
811 biodone(bp);
812 return;
813 }
814
815 lp = &cs->sc_label;
816
817 /*
818 * Do bounds checking and adjust transfer. If there's an
819 * error, the bounds check will flag that for us.
820 */
821 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
822 if (ccdpart(bp->bio_dev) != RAW_PART) {
823 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
824 biodone(bp);
825 return;
826 }
827 } else {
828 int pbn; /* in sc_secsize chunks */
829 long sz; /* in sc_secsize chunks */
830
831 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE);
832 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize);
833
834 /*
835 * If out of bounds return an error. If at the EOF point,
836 * simply read or write less.
837 */
838
839 if (pbn < 0 || pbn >= cs->sc_size) {
840 bp->bio_resid = bp->bio_bcount;
841 if (pbn != cs->sc_size)
842 biofinish(bp, NULL, EINVAL);
843 else
844 biodone(bp);
845 return;
846 }
847
848 /*
849 * If the request crosses EOF, truncate the request.
850 */
851 if (pbn + sz > cs->sc_size) {
852 bp->bio_bcount = (cs->sc_size - pbn) *
853 cs->sc_geom.ccg_secsize;
854 }
855 }
856
857 bp->bio_resid = bp->bio_bcount;
858
859 /*
860 * "Start" the unit.
861 */
862 s = splbio();
863 ccdstart(cs, bp);
864 splx(s);
865 return;
866}
867
868static void
869ccdstart(struct ccd_s *cs, struct bio *bp)
870{
871 long bcount, rcount;
872 struct ccdbuf *cbp[4];
873 /* XXX! : 2 reads and 2 writes for RAID 4/5 */
874 caddr_t addr;
875 daddr_t bn;
876 struct partition *pp;
877
878#ifdef DEBUG
879 if (ccddebug & CCDB_FOLLOW)
880 printf("ccdstart(%p, %p)\n", cs, bp);
881#endif
882
883 /* Record the transaction start */
884 devstat_start_transaction(&cs->device_stats);
885
886 /*
887 * Translate the partition-relative block number to an absolute.
888 */
889 bn = bp->bio_blkno;
890 if (ccdpart(bp->bio_dev) != RAW_PART) {
891 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)];
892 bn += pp->p_offset;
893 }
894
895 /*
896 * Allocate component buffers and fire off the requests
897 */
898 addr = bp->bio_data;
899 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) {
900 ccdbuffer(cbp, cs, bp, bn, addr, bcount);
901 rcount = cbp[0]->cb_buf.bio_bcount;
902
903 if (cs->sc_cflags & CCDF_MIRROR) {
904 /*
905 * Mirroring. Writes go to both disks, reads are
906 * taken from whichever disk seems most appropriate.
907 *
908 * We attempt to localize reads to the disk whos arm
909 * is nearest the read request. We ignore seeks due
910 * to writes when making this determination and we
911 * also try to avoid hogging.
912 */
913 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) {
914 BIO_STRATEGY(&cbp[0]->cb_buf, 0);
915 BIO_STRATEGY(&cbp[1]->cb_buf, 0);
916 } else {
917 int pick = cs->sc_pick;
918 daddr_t range = cs->sc_size / 16;
919
920 if (bn < cs->sc_blk[pick] - range ||
921 bn > cs->sc_blk[pick] + range
922 ) {
923 cs->sc_pick = pick = 1 - pick;
924 }
925 cs->sc_blk[pick] = bn + btodb(rcount);
926 BIO_STRATEGY(&cbp[pick]->cb_buf, 0);
927 }
928 } else {
929 /*
930 * Not mirroring
931 */
932 BIO_STRATEGY(&cbp[0]->cb_buf, 0);
933 }
934 bn += btodb(rcount);
935 addr += rcount;
936 }
937}
938
939/*
940 * Build a component buffer header.
941 */
942static void
943ccdbuffer(struct ccdbuf **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
944{
945 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */
946 struct ccdbuf *cbp;
947 daddr_t cbn, cboff;
948 off_t cbc;
949
950#ifdef DEBUG
951 if (ccddebug & CCDB_IO)
952 printf("ccdbuffer(%p, %p, %lld, %p, %ld)\n",
953 (void *)cs, (void *)bp, (long long)bn, (void *)addr,
954 bcount);
955#endif
956 /*
957 * Determine which component bn falls in.
958 */
959 cbn = bn;
960 cboff = 0;
961
962 if (cs->sc_ileave == 0) {
963 /*
964 * Serially concatenated and neither a mirror nor a parity
965 * config. This is a special case.
966 */
967 daddr_t sblk;
968
969 sblk = 0;
970 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
971 sblk += ci->ci_size;
972 cbn -= sblk;
973 } else {
974 struct ccdiinfo *ii;
975 int ccdisk, off;
976
977 /*
978 * Calculate cbn, the logical superblock (sc_ileave chunks),
979 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
980 * to cbn.
981 */
982 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */
983 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */
984
985 /*
986 * Figure out which interleave table to use.
987 */
988 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
989 if (ii->ii_startblk > cbn)
990 break;
991 }
992 ii--;
993
994 /*
995 * off is the logical superblock relative to the beginning
996 * of this interleave block.
997 */
998 off = cbn - ii->ii_startblk;
999
1000 /*
1001 * We must calculate which disk component to use (ccdisk),
1002 * and recalculate cbn to be the superblock relative to
1003 * the beginning of the component. This is typically done by
1004 * adding 'off' and ii->ii_startoff together. However, 'off'
1005 * must typically be divided by the number of components in
1006 * this interleave array to be properly convert it from a
1007 * CCD-relative logical superblock number to a
1008 * component-relative superblock number.
1009 */
1010 if (ii->ii_ndisk == 1) {
1011 /*
1012 * When we have just one disk, it can't be a mirror
1013 * or a parity config.
1014 */
1015 ccdisk = ii->ii_index[0];
1016 cbn = ii->ii_startoff + off;
1017 } else {
1018 if (cs->sc_cflags & CCDF_MIRROR) {
1019 /*
1020 * We have forced a uniform mapping, resulting
1021 * in a single interleave array. We double
1022 * up on the first half of the available
1023 * components and our mirror is in the second
1024 * half. This only works with a single
1025 * interleave array because doubling up
1026 * doubles the number of sectors, so there
1027 * cannot be another interleave array because
1028 * the next interleave array's calculations
1029 * would be off.
1030 */
1031 int ndisk2 = ii->ii_ndisk / 2;
1032 ccdisk = ii->ii_index[off % ndisk2];
1033 cbn = ii->ii_startoff + off / ndisk2;
1034 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1035 } else if (cs->sc_cflags & CCDF_PARITY) {
1036 /*
1037 * XXX not implemented yet
1038 */
1039 int ndisk2 = ii->ii_ndisk - 1;
1040 ccdisk = ii->ii_index[off % ndisk2];
1041 cbn = ii->ii_startoff + off / ndisk2;
1042 if (cbn % ii->ii_ndisk <= ccdisk)
1043 ccdisk++;
1044 } else {
1045 ccdisk = ii->ii_index[off % ii->ii_ndisk];
1046 cbn = ii->ii_startoff + off / ii->ii_ndisk;
1047 }
1048 }
1049
1050 ci = &cs->sc_cinfo[ccdisk];
1051
1052 /*
1053 * Convert cbn from a superblock to a normal block so it
1054 * can be used to calculate (along with cboff) the normal
1055 * block index into this particular disk.
1056 */
1057 cbn *= cs->sc_ileave;
1058 }
1059
1060 /*
1061 * Fill in the component buf structure.
1062 */
1063 cbp = getccdbuf(NULL);
1064 cbp->cb_buf.bio_cmd = bp->bio_cmd;
1065 cbp->cb_buf.bio_done = ccdiodone;
1066 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */
1067 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET;
1068 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET);
1069 cbp->cb_buf.bio_data = addr;
1070 if (cs->sc_ileave == 0)
1071 cbc = dbtob((off_t)(ci->ci_size - cbn));
1072 else
1073 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1074 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount;
1075 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount;
1076
1077 /*
1078 * context for ccdiodone
1079 */
1080 cbp->cb_obp = bp;
1081 cbp->cb_unit = cs->sc_unit;
1082 cbp->cb_comp = ci - cs->sc_cinfo;
1083
1084#ifdef DEBUG
1085 if (ccddebug & CCDB_IO)
1086 printf(" dev %p(u%ld): cbp %p bn %jd addr %p bcnt %ld\n",
1087 ci->ci_dev, (unsigned long)(ci-cs->sc_cinfo), cbp,
1088 (intmax_t)cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data,
1089 cbp->cb_buf.bio_bcount);
1090#endif
1091 cb[0] = cbp;
1092
1093 /*
1094 * Note: both I/O's setup when reading from mirror, but only one
1095 * will be executed.
1096 */
1097 if (cs->sc_cflags & CCDF_MIRROR) {
1098 /* mirror, setup second I/O */
1099 cbp = getccdbuf(cb[0]);
1100 cbp->cb_buf.bio_dev = ci2->ci_dev;
1101 cbp->cb_comp = ci2 - cs->sc_cinfo;
1102 cb[1] = cbp;
1103 /* link together the ccdbuf's and clear "mirror done" flag */
1104 cb[0]->cb_mirror = cb[1];
1105 cb[1]->cb_mirror = cb[0];
1106 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1107 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1108 }
1109}
1110
1111static void
1112ccdintr(struct ccd_s *cs, struct bio *bp)
1113{
1114#ifdef DEBUG
1115 if (ccddebug & CCDB_FOLLOW)
1116 printf("ccdintr(%p, %p)\n", cs, bp);
1117#endif
1118 /*
1119 * Request is done for better or worse, wakeup the top half.
1120 */
1121 if (bp->bio_flags & BIO_ERROR)
1122 bp->bio_resid = bp->bio_bcount;
1123 biofinish(bp, &cs->device_stats, 0);
1124}
1125
1126/*
1127 * Called at interrupt time.
1128 * Mark the component as done and if all components are done,
1129 * take a ccd interrupt.
1130 */
1131static void
1132ccdiodone(struct bio *ibp)
1133{
1134 struct ccdbuf *cbp = (struct ccdbuf *)ibp;
1135 struct bio *bp = cbp->cb_obp;
1136 int unit = cbp->cb_unit;
1137 int count, s;
1138
1139 s = splbio();
1140#ifdef DEBUG
1141 if (ccddebug & CCDB_FOLLOW)
1142 printf("ccdiodone(%p)\n", cbp);
1143 if (ccddebug & CCDB_IO) {
1144 printf("ccdiodone: bp %p bcount %ld resid %ld\n",
1145 bp, bp->bio_bcount, bp->bio_resid);
1146 printf(" dev %p(u%d), cbp %p bn %jd addr %p bcnt %ld\n",
1147 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp,
1148 (intmax_t)cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data,
1149 cbp->cb_buf.bio_bcount);
1150 }
1151#endif
1152 /*
1153 * If an error occured, report it. If this is a mirrored
1154 * configuration and the first of two possible reads, do not
1155 * set the error in the bp yet because the second read may
1156 * succeed.
1157 */
1158
1159 if (cbp->cb_buf.bio_flags & BIO_ERROR) {
1160 const char *msg = "";
1161
1162 if ((ccdfind(unit)->sc_cflags & CCDF_MIRROR) &&
1163 (cbp->cb_buf.bio_cmd == BIO_READ) &&
1164 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1165 /*
1166 * We will try our read on the other disk down
1167 * below, also reverse the default pick so if we
1168 * are doing a scan we do not keep hitting the
1169 * bad disk first.
1170 */
1171 struct ccd_s *cs = ccdfind(unit);
1172
1173 msg = ", trying other disk";
1174 cs->sc_pick = 1 - cs->sc_pick;
1175 cs->sc_blk[cs->sc_pick] = bp->bio_blkno;
1176 } else {
1177 bp->bio_flags |= BIO_ERROR;
1178 bp->bio_error = cbp->cb_buf.bio_error ?
1179 cbp->cb_buf.bio_error : EIO;
1180 }
1181 printf("ccd%d: error %d on component %d block %jd "
1182 "(ccd block %jd)%s\n", unit, bp->bio_error, cbp->cb_comp,
1183 (intmax_t)cbp->cb_buf.bio_blkno, (intmax_t)bp->bio_blkno,
1184 msg);
1185 }
1186
1187 /*
1188 * Process mirror. If we are writing, I/O has been initiated on both
1189 * buffers and we fall through only after both are finished.
1190 *
1191 * If we are reading only one I/O is initiated at a time. If an
1192 * error occurs we initiate the second I/O and return, otherwise
1193 * we free the second I/O without initiating it.
1194 */
1195
1196 if (ccdfind(unit)->sc_cflags & CCDF_MIRROR) {
1197 if (cbp->cb_buf.bio_cmd == BIO_WRITE) {
1198 /*
1199 * When writing, handshake with the second buffer
1200 * to determine when both are done. If both are not
1201 * done, return here.
1202 */
1203 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1204 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1205 putccdbuf(cbp);
1206 splx(s);
1207 return;
1208 }
1209 } else {
1210 /*
1211 * When reading, either dispose of the second buffer
1212 * or initiate I/O on the second buffer if an error
1213 * occured with this one.
1214 */
1215 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1216 if (cbp->cb_buf.bio_flags & BIO_ERROR) {
1217 cbp->cb_mirror->cb_pflags |=
1218 CCDPF_MIRROR_DONE;
1219 BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0);
1220 putccdbuf(cbp);
1221 splx(s);
1222 return;
1223 } else {
1224 putccdbuf(cbp->cb_mirror);
1225 /* fall through */
1226 }
1227 }
1228 }
1229 }
1230
1231 /*
1232 * use bio_caller1 to determine how big the original request was rather
1233 * then bio_bcount, because bio_bcount may have been truncated for EOF.
1234 *
1235 * XXX We check for an error, but we do not test the resid for an
1236 * aligned EOF condition. This may result in character & block
1237 * device access not recognizing EOF properly when read or written
1238 * sequentially, but will not effect filesystems.
1239 */
1240 count = (long)cbp->cb_buf.bio_caller1;
1241 putccdbuf(cbp);
1242
1243 /*
1244 * If all done, "interrupt".
1245 */
1246 bp->bio_resid -= count;
1247 if (bp->bio_resid < 0)
1248 panic("ccdiodone: count");
1249 if (bp->bio_resid == 0)
1250 ccdintr(ccdfind(unit), bp);
1251 splx(s);
1252}
1253
1254static int
1255ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
1256{
1257 int unit = ccdunit(dev);
1258 int i, j, lookedup = 0, error = 0;
1259 int part, pmask, s;
1260 struct ccd_s *cs;
1261 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1262 char **cpp;
1263 struct vnode **vpp;
1264
1265 if (!IS_ALLOCATED(unit))
1266 return (ENXIO);
1267 cs = ccdfind(unit);
1268
1269 switch (cmd) {
1270 case CCDIOCSET:
1271 if (IS_INITED(cs))
1272 return (EBUSY);
1273
1274 if ((flag & FWRITE) == 0)
1275 return (EBADF);
1276
1277 if ((error = ccdlock(cs)) != 0)
1278 return (error);
1279
1280 if (ccio->ccio_ndisks > CCD_MAXNDISKS)
1281 return (EINVAL);
1282
1283 /* Fill in some important bits. */
1284 cs->sc_ileave = ccio->ccio_ileave;
1285 if (cs->sc_ileave == 0 &&
1286 ((ccio->ccio_flags & CCDF_MIRROR) ||
1287 (ccio->ccio_flags & CCDF_PARITY))) {
1288 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1289 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1290 }
1291 if ((ccio->ccio_flags & CCDF_MIRROR) &&
1292 (ccio->ccio_flags & CCDF_PARITY)) {
1293 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1294 ccio->ccio_flags &= ~CCDF_PARITY;
1295 }
1296 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1297 !(ccio->ccio_flags & CCDF_UNIFORM)) {
1298 printf("ccd%d: mirror/parity forces uniform flag\n",
1299 unit);
1300 ccio->ccio_flags |= CCDF_UNIFORM;
1301 }
1302 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK;
1303
1304 /*
1305 * Allocate space for and copy in the array of
1306 * componet pathnames and device numbers.
1307 */
1308 cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1309 M_DEVBUF, M_WAITOK);
1310 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1311 M_DEVBUF, M_WAITOK);
1312
1313 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1314 ccio->ccio_ndisks * sizeof(char **));
1315 if (error) {
1316 free(vpp, M_DEVBUF);
1317 free(cpp, M_DEVBUF);
1318 ccdunlock(cs);
1319 return (error);
1320 }
1321
1322#ifdef DEBUG
1323 if (ccddebug & CCDB_INIT)
1324 for (i = 0; i < ccio->ccio_ndisks; ++i)
1325 printf("ccdioctl: component %d: %p\n",
1326 i, cpp[i]);
1327#endif
1328
1329 for (i = 0; i < ccio->ccio_ndisks; ++i) {
1330#ifdef DEBUG
1331 if (ccddebug & CCDB_INIT)
1332 printf("ccdioctl: lookedup = %d\n", lookedup);
1333#endif
1334 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) {
1335 for (j = 0; j < lookedup; ++j)
1336 (void)vn_close(vpp[j], FREAD|FWRITE,
1337 td->td_ucred, td);
1338 free(vpp, M_DEVBUF);
1339 free(cpp, M_DEVBUF);
1340 ccdunlock(cs);
1341 return (error);
1342 }
1343 ++lookedup;
1344 }
1345 cs->sc_vpp = vpp;
1346 cs->sc_nccdisks = ccio->ccio_ndisks;
1347
1348 /*
1349 * Initialize the ccd. Fills in the softc for us.
1350 */
1351 if ((error = ccdinit(cs, cpp, td)) != 0) {
1352 for (j = 0; j < lookedup; ++j)
1353 (void)vn_close(vpp[j], FREAD|FWRITE,
1354 td->td_ucred, td);
1355 /*
1356 * We can't ccddestroy() cs just yet, because nothing
1357 * prevents user-level app to do another ioctl()
1358 * without closing the device first, therefore
1359 * declare unit null and void and let ccdclose()
1360 * destroy it when it is safe to do so.
1361 */
1362 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED);
1363 free(vpp, M_DEVBUF);
1364 free(cpp, M_DEVBUF);
1365 ccdunlock(cs);
1366 return (error);
1367 }
1368
1369 /*
1370 * The ccd has been successfully initialized, so
1371 * we can place it into the array and read the disklabel.
1372 */
1373 ccio->ccio_unit = unit;
1374 ccio->ccio_size = cs->sc_size;
1375 ccdgetdisklabel(dev);
1376
1377 ccdunlock(cs);
1378
1379 break;
1380
1381 case CCDIOCCLR:
1382 if (!IS_INITED(cs))
1383 return (ENXIO);
1384
1385 if ((flag & FWRITE) == 0)
1386 return (EBADF);
1387
1388 if ((error = ccdlock(cs)) != 0)
1389 return (error);
1390
1391 /* Don't unconfigure if any other partitions are open */
1392 part = ccdpart(dev);
1393 pmask = (1 << part);
1394 if ((cs->sc_openmask & ~pmask)) {
1395 ccdunlock(cs);
1396 return (EBUSY);
1397 }
1398
1399 /* Declare unit null and void (reset all flags) */
1400 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED);
1401
1402 /* Close the components and free their pathnames. */
1403 for (i = 0; i < cs->sc_nccdisks; ++i) {
1404 /*
1405 * XXX: this close could potentially fail and
1406 * cause Bad Things. Maybe we need to force
1407 * the close to happen?
1408 */
1409#ifdef DEBUG
1410 if (ccddebug & CCDB_VNODE)
1411 vprint("CCDIOCCLR: vnode info",
1412 cs->sc_cinfo[i].ci_vp);
1413#endif
1414 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE,
1415 td->td_ucred, td);
1416 free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1417 }
1418
1419 /* Free interleave index. */
1420 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1421 free(cs->sc_itable[i].ii_index, M_DEVBUF);
1422
1423 /* Free component info and interleave table. */
1424 free(cs->sc_cinfo, M_DEVBUF);
1425 free(cs->sc_itable, M_DEVBUF);
1426 free(cs->sc_vpp, M_DEVBUF);
1427
1428 /* And remove the devstat entry. */
1429 devstat_remove_entry(&cs->device_stats);
1430
1431 /* This must be atomic. */
1432 s = splhigh();
1433 ccdunlock(cs);
1434 splx(s);
1435
1436 break;
1437
1438 case CCDCONFINFO:
1439 {
1440 int ninit = 0;
1441 struct ccdconf *conf = (struct ccdconf *)data;
1442 struct ccd_s *tmpcs;
1443 struct ccd_s *ubuf = conf->buffer;
1444
1445 /* XXX: LOCK(unique unit numbers) */
1446 LIST_FOREACH(tmpcs, &ccd_softc_list, list)
1447 if (IS_INITED(tmpcs))
1448 ninit++;
1449
1450 if (conf->size == 0) {
1451 conf->size = sizeof(struct ccd_s) * ninit;
1452 break;
1453 } else if ((conf->size / sizeof(struct ccd_s) != ninit) ||
1454 (conf->size % sizeof(struct ccd_s) != 0)) {
1455 /* XXX: UNLOCK(unique unit numbers) */
1456 return (EINVAL);
1457 }
1458
1459 ubuf += ninit;
1460 LIST_FOREACH(tmpcs, &ccd_softc_list, list) {
1461 if (!IS_INITED(tmpcs))
1462 continue;
1463 error = copyout(tmpcs, --ubuf,
1464 sizeof(struct ccd_s));
1465 if (error != 0)
1466 /* XXX: UNLOCK(unique unit numbers) */
1467 return (error);
1468 }
1469 /* XXX: UNLOCK(unique unit numbers) */
1470 }
1471 break;
1472
1473 case CCDCPPINFO:
1474 if (!IS_INITED(cs))
1475 return (ENXIO);
1476
1477 {
1478 int len = 0;
1479 struct ccdcpps *cpps = (struct ccdcpps *)data;
1480 char *ubuf = cpps->buffer;
1481
1482
1483 for (i = 0; i < cs->sc_nccdisks; ++i)
1484 len += cs->sc_cinfo[i].ci_pathlen;
1485
1486 if (cpps->size == 0) {
1487 cpps->size = len;
1488 break;
1489 } else if (cpps->size != len) {
1490 return (EINVAL);
1491 }
1492
1493 for (i = 0; i < cs->sc_nccdisks; ++i) {
1494 len = cs->sc_cinfo[i].ci_pathlen;
1495 error = copyout(cs->sc_cinfo[i].ci_path, ubuf,
1496 len);
1497 if (error != 0)
1498 return (error);
1499 ubuf += len;
1500 }
1501 }
1502 break;
1503
1504 case DIOCGDINFO:
1505 if (!IS_INITED(cs))
1506 return (ENXIO);
1507
1508 *(struct disklabel *)data = cs->sc_label;
1509 break;
1510
1511 case DIOCWDINFO:
1512 case DIOCSDINFO:
1513 if (!IS_INITED(cs))
1514 return (ENXIO);
1515
1516 if ((flag & FWRITE) == 0)
1517 return (EBADF);
1518
1519 if ((error = ccdlock(cs)) != 0)
1520 return (error);
1521
1522 cs->sc_flags |= CCDF_LABELLING;
1523
1524 error = setdisklabel(&cs->sc_label,
1525 (struct disklabel *)data, 0);
1526 if (error == 0) {
1527 if (cmd == DIOCWDINFO)
1528 error = writedisklabel(CCDLABELDEV(dev),
1529 &cs->sc_label);
1530 }
1531
1532 cs->sc_flags &= ~CCDF_LABELLING;
1533
1534 ccdunlock(cs);
1535
1536 if (error)
1537 return (error);
1538 break;
1539
1540 case DIOCWLABEL:
1541 if (!IS_INITED(cs))
1542 return (ENXIO);
1543
1544 if ((flag & FWRITE) == 0)
1545 return (EBADF);
1546 if (*(int *)data != 0)
1547 cs->sc_flags |= CCDF_WLABEL;
1548 else
1549 cs->sc_flags &= ~CCDF_WLABEL;
1550 break;
1551
1552 default:
1553 return (ENOTTY);
1554 }
1555
1556 return (0);
1557}
1558
1559static int
1560ccdsize(dev_t dev)
1561{
1562 struct ccd_s *cs;
1563 int part, size;
1564
1565 if (ccdopen(dev, 0, S_IFCHR, curthread))
1566 return (-1);
1567
1568 cs = ccdfind(ccdunit(dev));
1569 part = ccdpart(dev);
1570
1571 if (!IS_INITED(cs))
1572 return (-1);
1573
1574 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1575 size = -1;
1576 else
1577 size = cs->sc_label.d_partitions[part].p_size;
1578
1579 if (ccdclose(dev, 0, S_IFCHR, curthread))
1580 return (-1);
1581
1582 return (size);
1583}
1584
1585/*
1586 * Lookup the provided name in the filesystem. If the file exists,
1587 * is a valid block device, and isn't being used by anyone else,
1588 * set *vpp to the file's vnode.
1589 */
1590static int
1591ccdlookup(char *path, struct thread *td, struct vnode **vpp)
1592{
1593 struct nameidata nd;
1594 struct vnode *vp;
1595 int error, flags;
1596
1597 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, td);
1598 flags = FREAD | FWRITE;
1599 if ((error = vn_open(&nd, &flags, 0)) != 0) {
1600#ifdef DEBUG
1601 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
1602 printf("ccdlookup: vn_open error = %d\n", error);
1603#endif
1604 return (error);
1605 }
1606 vp = nd.ni_vp;
1607
1608 if (vrefcnt(vp) > 1) {
1609 error = EBUSY;
1610 goto bad;
1611 }
1612
1613 if (!vn_isdisk(vp, &error))
1614 goto bad;
1615
1616#ifdef DEBUG
1617 if (ccddebug & CCDB_VNODE)
1618 vprint("ccdlookup: vnode info", vp);
1619#endif
1620
1621 VOP_UNLOCK(vp, 0, td);
1622 NDFREE(&nd, NDF_ONLY_PNBUF);
1623 *vpp = vp;
1624 return (0);
1625bad:
1626 VOP_UNLOCK(vp, 0, td);
1627 NDFREE(&nd, NDF_ONLY_PNBUF);
1628 /* vn_close does vrele() for vp */
1629 (void)vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
1630 return (error);
1631}
1632
1633/*
1634 * Read the disklabel from the ccd. If one is not present, fake one
1635 * up.
1636 */
1637static void
1638ccdgetdisklabel(dev_t dev)
1639{
1640 int unit = ccdunit(dev);
1641 struct ccd_s *cs = ccdfind(unit);
1642 char *errstring;
1643 struct disklabel *lp = &cs->sc_label;
1644 struct ccdgeom *ccg = &cs->sc_geom;
1645
1646 bzero(lp, sizeof(*lp));
1647
1648 lp->d_secperunit = cs->sc_size;
1649 lp->d_secsize = ccg->ccg_secsize;
1650 lp->d_nsectors = ccg->ccg_nsectors;
1651 lp->d_ntracks = ccg->ccg_ntracks;
1652 lp->d_ncylinders = ccg->ccg_ncylinders;
1653 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1654
1655 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1656 lp->d_type = DTYPE_CCD;
1657 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1658 lp->d_rpm = 3600;
1659 lp->d_interleave = 1;
1660 lp->d_flags = 0;
1661
1662 lp->d_partitions[RAW_PART].p_offset = 0;
1663 lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1664 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1665 lp->d_npartitions = RAW_PART + 1;
1666
1667 lp->d_bbsize = BBSIZE; /* XXX */
1668 lp->d_sbsize = 0;
1669
1670 lp->d_magic = DISKMAGIC;
1671 lp->d_magic2 = DISKMAGIC;
1672 lp->d_checksum = dkcksum(&cs->sc_label);
1673
1674 /*
1675 * Call the generic disklabel extraction routine.
1676 */
1677 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label);
1678 if (errstring != NULL)
1679 ccdmakedisklabel(cs);
1680
1681#ifdef DEBUG
1682 /* It's actually extremely common to have unlabeled ccds. */
1683 if (ccddebug & CCDB_LABEL)
1684 if (errstring != NULL)
1685 printf("ccd%d: %s\n", unit, errstring);
1686#endif
1687}
1688
1689/*
1690 * Take care of things one might want to take care of in the event
1691 * that a disklabel isn't present.
1692 */
1693static void
1694ccdmakedisklabel(struct ccd_s *cs)
1695{
1696 struct disklabel *lp = &cs->sc_label;
1697
1698 /*
1699 * For historical reasons, if there's no disklabel present
1700 * the raw partition must be marked FS_BSDFFS.
1701 */
1702 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1703
1704 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1705}
1706
1707/*
1708 * Wait interruptibly for an exclusive lock.
1709 *
1710 * XXX
1711 * Several drivers do this; it should be abstracted and made MP-safe.
1712 */
1713static int
1714ccdlock(struct ccd_s *cs)
1715{
1716 int error;
1717
1718 while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1719 cs->sc_flags |= CCDF_WANTED;
1720 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0)
1721 return (error);
1722 }
1723 cs->sc_flags |= CCDF_LOCKED;
1724 return (0);
1725}
1726
1727/*
1728 * Unlock and wake up any waiters.
1729 */
1730static void
1731ccdunlock(struct ccd_s *cs)
1732{
1733
1734 cs->sc_flags &= ~CCDF_LOCKED;
1735 if ((cs->sc_flags & CCDF_WANTED) != 0) {
1736 cs->sc_flags &= ~CCDF_WANTED;
1737 wakeup(cs);
1738 }
1739}
1740
1741#ifdef DEBUG
1742static void
1743printiinfo(struct ccdiinfo *ii)
1744{
1745 int ix, i;
1746
1747 for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1748 printf(" itab[%d]: #dk %d sblk %lld soff %lld",
1749 ix, ii->ii_ndisk, (long long)ii->ii_startblk,
1750 (long long)ii->ii_startoff);
1751 for (i = 0; i < ii->ii_ndisk; i++)
1752 printf(" %d", ii->ii_index[i]);
1753 printf("\n");
1754 }
1755}
1756#endif
573 */
574 devstat_add_entry(&cs->device_stats, "ccd", cs->sc_unit,
575 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
576 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
577 DEVSTAT_PRIORITY_ARRAY);
578
579 cs->sc_flags |= CCDF_INITED;
580 cs->sc_cflags = cs->sc_flags; /* So we can find out later... */
581 return (0);
582fail:
583 while (ci > cs->sc_cinfo) {
584 ci--;
585 free(ci->ci_path, M_DEVBUF);
586 }
587 if (tmppath != NULL)
588 free(tmppath, M_DEVBUF);
589 free(cs->sc_cinfo, M_DEVBUF);
590 return (error);
591}
592
593static void
594ccdinterleave(struct ccd_s *cs, int unit)
595{
596 struct ccdcinfo *ci, *smallci;
597 struct ccdiinfo *ii;
598 daddr_t bn, lbn;
599 int ix;
600 u_long size;
601
602#ifdef DEBUG
603 if (ccddebug & CCDB_INIT)
604 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave);
605#endif
606
607 /*
608 * Allocate an interleave table. The worst case occurs when each
609 * of N disks is of a different size, resulting in N interleave
610 * tables.
611 *
612 * Chances are this is too big, but we don't care.
613 */
614 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
615 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF,
616 M_WAITOK | M_ZERO);
617
618 /*
619 * Trivial case: no interleave (actually interleave of disk size).
620 * Each table entry represents a single component in its entirety.
621 *
622 * An interleave of 0 may not be used with a mirror or parity setup.
623 */
624 if (cs->sc_ileave == 0) {
625 bn = 0;
626 ii = cs->sc_itable;
627
628 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
629 /* Allocate space for ii_index. */
630 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
631 ii->ii_ndisk = 1;
632 ii->ii_startblk = bn;
633 ii->ii_startoff = 0;
634 ii->ii_index[0] = ix;
635 bn += cs->sc_cinfo[ix].ci_size;
636 ii++;
637 }
638 ii->ii_ndisk = 0;
639#ifdef DEBUG
640 if (ccddebug & CCDB_INIT)
641 printiinfo(cs->sc_itable);
642#endif
643 return;
644 }
645
646 /*
647 * The following isn't fast or pretty; it doesn't have to be.
648 */
649 size = 0;
650 bn = lbn = 0;
651 for (ii = cs->sc_itable; ; ii++) {
652 /*
653 * Allocate space for ii_index. We might allocate more then
654 * we use.
655 */
656 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
657 M_DEVBUF, M_WAITOK);
658
659 /*
660 * Locate the smallest of the remaining components
661 */
662 smallci = NULL;
663 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
664 ci++) {
665 if (ci->ci_size > size &&
666 (smallci == NULL ||
667 ci->ci_size < smallci->ci_size)) {
668 smallci = ci;
669 }
670 }
671
672 /*
673 * Nobody left, all done
674 */
675 if (smallci == NULL) {
676 ii->ii_ndisk = 0;
677 break;
678 }
679
680 /*
681 * Record starting logical block using an sc_ileave blocksize.
682 */
683 ii->ii_startblk = bn / cs->sc_ileave;
684
685 /*
686 * Record starting comopnent block using an sc_ileave
687 * blocksize. This value is relative to the beginning of
688 * a component disk.
689 */
690 ii->ii_startoff = lbn;
691
692 /*
693 * Determine how many disks take part in this interleave
694 * and record their indices.
695 */
696 ix = 0;
697 for (ci = cs->sc_cinfo;
698 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
699 if (ci->ci_size >= smallci->ci_size) {
700 ii->ii_index[ix++] = ci - cs->sc_cinfo;
701 }
702 }
703 ii->ii_ndisk = ix;
704 bn += ix * (smallci->ci_size - size);
705 lbn = smallci->ci_size / cs->sc_ileave;
706 size = smallci->ci_size;
707 }
708#ifdef DEBUG
709 if (ccddebug & CCDB_INIT)
710 printiinfo(cs->sc_itable);
711#endif
712}
713
714/* ARGSUSED */
715static int
716ccdopen(dev_t dev, int flags, int fmt, struct thread *td)
717{
718 int unit = ccdunit(dev);
719 struct ccd_s *cs;
720 struct disklabel *lp;
721 int error = 0, part, pmask;
722
723#ifdef DEBUG
724 if (ccddebug & CCDB_FOLLOW)
725 printf("ccdopen(%p, %x)\n", dev, flags);
726#endif
727
728 cs = IS_ALLOCATED(unit) ? ccdfind(unit) : ccdnew(unit);
729
730 if ((error = ccdlock(cs)) != 0)
731 return (error);
732
733 lp = &cs->sc_label;
734
735 part = ccdpart(dev);
736 pmask = (1 << part);
737
738 /*
739 * If we're initialized, check to see if there are any other
740 * open partitions. If not, then it's safe to update
741 * the in-core disklabel.
742 */
743 if (IS_INITED(cs) && (cs->sc_openmask == 0))
744 ccdgetdisklabel(dev);
745
746 /* Check that the partition exists. */
747 if (part != RAW_PART && ((part >= lp->d_npartitions) ||
748 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
749 error = ENXIO;
750 goto done;
751 }
752
753 cs->sc_openmask |= pmask;
754 done:
755 ccdunlock(cs);
756 return (0);
757}
758
759/* ARGSUSED */
760static int
761ccdclose(dev_t dev, int flags, int fmt, struct thread *td)
762{
763 int unit = ccdunit(dev);
764 struct ccd_s *cs;
765 int error = 0, part;
766
767#ifdef DEBUG
768 if (ccddebug & CCDB_FOLLOW)
769 printf("ccdclose(%p, %x)\n", dev, flags);
770#endif
771
772 if (!IS_ALLOCATED(unit))
773 return (ENXIO);
774 cs = ccdfind(unit);
775
776 if ((error = ccdlock(cs)) != 0)
777 return (error);
778
779 part = ccdpart(dev);
780
781 /* ...that much closer to allowing unconfiguration... */
782 cs->sc_openmask &= ~(1 << part);
783 /* collect "garbage" if possible */
784 if (!IS_INITED(cs) && (cs->sc_flags & CCDF_WANTED) == 0)
785 ccddestroy(cs, td->td_proc);
786 else
787 ccdunlock(cs);
788 return (0);
789}
790
791static void
792ccdstrategy(struct bio *bp)
793{
794 int unit = ccdunit(bp->bio_dev);
795 struct ccd_s *cs = ccdfind(unit);
796 int s;
797 int wlabel;
798 struct disklabel *lp;
799
800#ifdef DEBUG
801 if (ccddebug & CCDB_FOLLOW)
802 printf("ccdstrategy(%p): unit %d\n", bp, unit);
803#endif
804 if (!IS_INITED(cs)) {
805 biofinish(bp, NULL, ENXIO);
806 return;
807 }
808
809 /* If it's a nil transfer, wake up the top half now. */
810 if (bp->bio_bcount == 0) {
811 biodone(bp);
812 return;
813 }
814
815 lp = &cs->sc_label;
816
817 /*
818 * Do bounds checking and adjust transfer. If there's an
819 * error, the bounds check will flag that for us.
820 */
821 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
822 if (ccdpart(bp->bio_dev) != RAW_PART) {
823 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
824 biodone(bp);
825 return;
826 }
827 } else {
828 int pbn; /* in sc_secsize chunks */
829 long sz; /* in sc_secsize chunks */
830
831 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE);
832 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize);
833
834 /*
835 * If out of bounds return an error. If at the EOF point,
836 * simply read or write less.
837 */
838
839 if (pbn < 0 || pbn >= cs->sc_size) {
840 bp->bio_resid = bp->bio_bcount;
841 if (pbn != cs->sc_size)
842 biofinish(bp, NULL, EINVAL);
843 else
844 biodone(bp);
845 return;
846 }
847
848 /*
849 * If the request crosses EOF, truncate the request.
850 */
851 if (pbn + sz > cs->sc_size) {
852 bp->bio_bcount = (cs->sc_size - pbn) *
853 cs->sc_geom.ccg_secsize;
854 }
855 }
856
857 bp->bio_resid = bp->bio_bcount;
858
859 /*
860 * "Start" the unit.
861 */
862 s = splbio();
863 ccdstart(cs, bp);
864 splx(s);
865 return;
866}
867
868static void
869ccdstart(struct ccd_s *cs, struct bio *bp)
870{
871 long bcount, rcount;
872 struct ccdbuf *cbp[4];
873 /* XXX! : 2 reads and 2 writes for RAID 4/5 */
874 caddr_t addr;
875 daddr_t bn;
876 struct partition *pp;
877
878#ifdef DEBUG
879 if (ccddebug & CCDB_FOLLOW)
880 printf("ccdstart(%p, %p)\n", cs, bp);
881#endif
882
883 /* Record the transaction start */
884 devstat_start_transaction(&cs->device_stats);
885
886 /*
887 * Translate the partition-relative block number to an absolute.
888 */
889 bn = bp->bio_blkno;
890 if (ccdpart(bp->bio_dev) != RAW_PART) {
891 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)];
892 bn += pp->p_offset;
893 }
894
895 /*
896 * Allocate component buffers and fire off the requests
897 */
898 addr = bp->bio_data;
899 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) {
900 ccdbuffer(cbp, cs, bp, bn, addr, bcount);
901 rcount = cbp[0]->cb_buf.bio_bcount;
902
903 if (cs->sc_cflags & CCDF_MIRROR) {
904 /*
905 * Mirroring. Writes go to both disks, reads are
906 * taken from whichever disk seems most appropriate.
907 *
908 * We attempt to localize reads to the disk whos arm
909 * is nearest the read request. We ignore seeks due
910 * to writes when making this determination and we
911 * also try to avoid hogging.
912 */
913 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) {
914 BIO_STRATEGY(&cbp[0]->cb_buf, 0);
915 BIO_STRATEGY(&cbp[1]->cb_buf, 0);
916 } else {
917 int pick = cs->sc_pick;
918 daddr_t range = cs->sc_size / 16;
919
920 if (bn < cs->sc_blk[pick] - range ||
921 bn > cs->sc_blk[pick] + range
922 ) {
923 cs->sc_pick = pick = 1 - pick;
924 }
925 cs->sc_blk[pick] = bn + btodb(rcount);
926 BIO_STRATEGY(&cbp[pick]->cb_buf, 0);
927 }
928 } else {
929 /*
930 * Not mirroring
931 */
932 BIO_STRATEGY(&cbp[0]->cb_buf, 0);
933 }
934 bn += btodb(rcount);
935 addr += rcount;
936 }
937}
938
939/*
940 * Build a component buffer header.
941 */
942static void
943ccdbuffer(struct ccdbuf **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
944{
945 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */
946 struct ccdbuf *cbp;
947 daddr_t cbn, cboff;
948 off_t cbc;
949
950#ifdef DEBUG
951 if (ccddebug & CCDB_IO)
952 printf("ccdbuffer(%p, %p, %lld, %p, %ld)\n",
953 (void *)cs, (void *)bp, (long long)bn, (void *)addr,
954 bcount);
955#endif
956 /*
957 * Determine which component bn falls in.
958 */
959 cbn = bn;
960 cboff = 0;
961
962 if (cs->sc_ileave == 0) {
963 /*
964 * Serially concatenated and neither a mirror nor a parity
965 * config. This is a special case.
966 */
967 daddr_t sblk;
968
969 sblk = 0;
970 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
971 sblk += ci->ci_size;
972 cbn -= sblk;
973 } else {
974 struct ccdiinfo *ii;
975 int ccdisk, off;
976
977 /*
978 * Calculate cbn, the logical superblock (sc_ileave chunks),
979 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
980 * to cbn.
981 */
982 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */
983 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */
984
985 /*
986 * Figure out which interleave table to use.
987 */
988 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
989 if (ii->ii_startblk > cbn)
990 break;
991 }
992 ii--;
993
994 /*
995 * off is the logical superblock relative to the beginning
996 * of this interleave block.
997 */
998 off = cbn - ii->ii_startblk;
999
1000 /*
1001 * We must calculate which disk component to use (ccdisk),
1002 * and recalculate cbn to be the superblock relative to
1003 * the beginning of the component. This is typically done by
1004 * adding 'off' and ii->ii_startoff together. However, 'off'
1005 * must typically be divided by the number of components in
1006 * this interleave array to be properly convert it from a
1007 * CCD-relative logical superblock number to a
1008 * component-relative superblock number.
1009 */
1010 if (ii->ii_ndisk == 1) {
1011 /*
1012 * When we have just one disk, it can't be a mirror
1013 * or a parity config.
1014 */
1015 ccdisk = ii->ii_index[0];
1016 cbn = ii->ii_startoff + off;
1017 } else {
1018 if (cs->sc_cflags & CCDF_MIRROR) {
1019 /*
1020 * We have forced a uniform mapping, resulting
1021 * in a single interleave array. We double
1022 * up on the first half of the available
1023 * components and our mirror is in the second
1024 * half. This only works with a single
1025 * interleave array because doubling up
1026 * doubles the number of sectors, so there
1027 * cannot be another interleave array because
1028 * the next interleave array's calculations
1029 * would be off.
1030 */
1031 int ndisk2 = ii->ii_ndisk / 2;
1032 ccdisk = ii->ii_index[off % ndisk2];
1033 cbn = ii->ii_startoff + off / ndisk2;
1034 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1035 } else if (cs->sc_cflags & CCDF_PARITY) {
1036 /*
1037 * XXX not implemented yet
1038 */
1039 int ndisk2 = ii->ii_ndisk - 1;
1040 ccdisk = ii->ii_index[off % ndisk2];
1041 cbn = ii->ii_startoff + off / ndisk2;
1042 if (cbn % ii->ii_ndisk <= ccdisk)
1043 ccdisk++;
1044 } else {
1045 ccdisk = ii->ii_index[off % ii->ii_ndisk];
1046 cbn = ii->ii_startoff + off / ii->ii_ndisk;
1047 }
1048 }
1049
1050 ci = &cs->sc_cinfo[ccdisk];
1051
1052 /*
1053 * Convert cbn from a superblock to a normal block so it
1054 * can be used to calculate (along with cboff) the normal
1055 * block index into this particular disk.
1056 */
1057 cbn *= cs->sc_ileave;
1058 }
1059
1060 /*
1061 * Fill in the component buf structure.
1062 */
1063 cbp = getccdbuf(NULL);
1064 cbp->cb_buf.bio_cmd = bp->bio_cmd;
1065 cbp->cb_buf.bio_done = ccdiodone;
1066 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */
1067 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET;
1068 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET);
1069 cbp->cb_buf.bio_data = addr;
1070 if (cs->sc_ileave == 0)
1071 cbc = dbtob((off_t)(ci->ci_size - cbn));
1072 else
1073 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1074 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount;
1075 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount;
1076
1077 /*
1078 * context for ccdiodone
1079 */
1080 cbp->cb_obp = bp;
1081 cbp->cb_unit = cs->sc_unit;
1082 cbp->cb_comp = ci - cs->sc_cinfo;
1083
1084#ifdef DEBUG
1085 if (ccddebug & CCDB_IO)
1086 printf(" dev %p(u%ld): cbp %p bn %jd addr %p bcnt %ld\n",
1087 ci->ci_dev, (unsigned long)(ci-cs->sc_cinfo), cbp,
1088 (intmax_t)cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data,
1089 cbp->cb_buf.bio_bcount);
1090#endif
1091 cb[0] = cbp;
1092
1093 /*
1094 * Note: both I/O's setup when reading from mirror, but only one
1095 * will be executed.
1096 */
1097 if (cs->sc_cflags & CCDF_MIRROR) {
1098 /* mirror, setup second I/O */
1099 cbp = getccdbuf(cb[0]);
1100 cbp->cb_buf.bio_dev = ci2->ci_dev;
1101 cbp->cb_comp = ci2 - cs->sc_cinfo;
1102 cb[1] = cbp;
1103 /* link together the ccdbuf's and clear "mirror done" flag */
1104 cb[0]->cb_mirror = cb[1];
1105 cb[1]->cb_mirror = cb[0];
1106 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1107 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1108 }
1109}
1110
1111static void
1112ccdintr(struct ccd_s *cs, struct bio *bp)
1113{
1114#ifdef DEBUG
1115 if (ccddebug & CCDB_FOLLOW)
1116 printf("ccdintr(%p, %p)\n", cs, bp);
1117#endif
1118 /*
1119 * Request is done for better or worse, wakeup the top half.
1120 */
1121 if (bp->bio_flags & BIO_ERROR)
1122 bp->bio_resid = bp->bio_bcount;
1123 biofinish(bp, &cs->device_stats, 0);
1124}
1125
1126/*
1127 * Called at interrupt time.
1128 * Mark the component as done and if all components are done,
1129 * take a ccd interrupt.
1130 */
1131static void
1132ccdiodone(struct bio *ibp)
1133{
1134 struct ccdbuf *cbp = (struct ccdbuf *)ibp;
1135 struct bio *bp = cbp->cb_obp;
1136 int unit = cbp->cb_unit;
1137 int count, s;
1138
1139 s = splbio();
1140#ifdef DEBUG
1141 if (ccddebug & CCDB_FOLLOW)
1142 printf("ccdiodone(%p)\n", cbp);
1143 if (ccddebug & CCDB_IO) {
1144 printf("ccdiodone: bp %p bcount %ld resid %ld\n",
1145 bp, bp->bio_bcount, bp->bio_resid);
1146 printf(" dev %p(u%d), cbp %p bn %jd addr %p bcnt %ld\n",
1147 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp,
1148 (intmax_t)cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data,
1149 cbp->cb_buf.bio_bcount);
1150 }
1151#endif
1152 /*
1153 * If an error occured, report it. If this is a mirrored
1154 * configuration and the first of two possible reads, do not
1155 * set the error in the bp yet because the second read may
1156 * succeed.
1157 */
1158
1159 if (cbp->cb_buf.bio_flags & BIO_ERROR) {
1160 const char *msg = "";
1161
1162 if ((ccdfind(unit)->sc_cflags & CCDF_MIRROR) &&
1163 (cbp->cb_buf.bio_cmd == BIO_READ) &&
1164 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1165 /*
1166 * We will try our read on the other disk down
1167 * below, also reverse the default pick so if we
1168 * are doing a scan we do not keep hitting the
1169 * bad disk first.
1170 */
1171 struct ccd_s *cs = ccdfind(unit);
1172
1173 msg = ", trying other disk";
1174 cs->sc_pick = 1 - cs->sc_pick;
1175 cs->sc_blk[cs->sc_pick] = bp->bio_blkno;
1176 } else {
1177 bp->bio_flags |= BIO_ERROR;
1178 bp->bio_error = cbp->cb_buf.bio_error ?
1179 cbp->cb_buf.bio_error : EIO;
1180 }
1181 printf("ccd%d: error %d on component %d block %jd "
1182 "(ccd block %jd)%s\n", unit, bp->bio_error, cbp->cb_comp,
1183 (intmax_t)cbp->cb_buf.bio_blkno, (intmax_t)bp->bio_blkno,
1184 msg);
1185 }
1186
1187 /*
1188 * Process mirror. If we are writing, I/O has been initiated on both
1189 * buffers and we fall through only after both are finished.
1190 *
1191 * If we are reading only one I/O is initiated at a time. If an
1192 * error occurs we initiate the second I/O and return, otherwise
1193 * we free the second I/O without initiating it.
1194 */
1195
1196 if (ccdfind(unit)->sc_cflags & CCDF_MIRROR) {
1197 if (cbp->cb_buf.bio_cmd == BIO_WRITE) {
1198 /*
1199 * When writing, handshake with the second buffer
1200 * to determine when both are done. If both are not
1201 * done, return here.
1202 */
1203 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1204 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1205 putccdbuf(cbp);
1206 splx(s);
1207 return;
1208 }
1209 } else {
1210 /*
1211 * When reading, either dispose of the second buffer
1212 * or initiate I/O on the second buffer if an error
1213 * occured with this one.
1214 */
1215 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1216 if (cbp->cb_buf.bio_flags & BIO_ERROR) {
1217 cbp->cb_mirror->cb_pflags |=
1218 CCDPF_MIRROR_DONE;
1219 BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0);
1220 putccdbuf(cbp);
1221 splx(s);
1222 return;
1223 } else {
1224 putccdbuf(cbp->cb_mirror);
1225 /* fall through */
1226 }
1227 }
1228 }
1229 }
1230
1231 /*
1232 * use bio_caller1 to determine how big the original request was rather
1233 * then bio_bcount, because bio_bcount may have been truncated for EOF.
1234 *
1235 * XXX We check for an error, but we do not test the resid for an
1236 * aligned EOF condition. This may result in character & block
1237 * device access not recognizing EOF properly when read or written
1238 * sequentially, but will not effect filesystems.
1239 */
1240 count = (long)cbp->cb_buf.bio_caller1;
1241 putccdbuf(cbp);
1242
1243 /*
1244 * If all done, "interrupt".
1245 */
1246 bp->bio_resid -= count;
1247 if (bp->bio_resid < 0)
1248 panic("ccdiodone: count");
1249 if (bp->bio_resid == 0)
1250 ccdintr(ccdfind(unit), bp);
1251 splx(s);
1252}
1253
1254static int
1255ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
1256{
1257 int unit = ccdunit(dev);
1258 int i, j, lookedup = 0, error = 0;
1259 int part, pmask, s;
1260 struct ccd_s *cs;
1261 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1262 char **cpp;
1263 struct vnode **vpp;
1264
1265 if (!IS_ALLOCATED(unit))
1266 return (ENXIO);
1267 cs = ccdfind(unit);
1268
1269 switch (cmd) {
1270 case CCDIOCSET:
1271 if (IS_INITED(cs))
1272 return (EBUSY);
1273
1274 if ((flag & FWRITE) == 0)
1275 return (EBADF);
1276
1277 if ((error = ccdlock(cs)) != 0)
1278 return (error);
1279
1280 if (ccio->ccio_ndisks > CCD_MAXNDISKS)
1281 return (EINVAL);
1282
1283 /* Fill in some important bits. */
1284 cs->sc_ileave = ccio->ccio_ileave;
1285 if (cs->sc_ileave == 0 &&
1286 ((ccio->ccio_flags & CCDF_MIRROR) ||
1287 (ccio->ccio_flags & CCDF_PARITY))) {
1288 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1289 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1290 }
1291 if ((ccio->ccio_flags & CCDF_MIRROR) &&
1292 (ccio->ccio_flags & CCDF_PARITY)) {
1293 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1294 ccio->ccio_flags &= ~CCDF_PARITY;
1295 }
1296 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1297 !(ccio->ccio_flags & CCDF_UNIFORM)) {
1298 printf("ccd%d: mirror/parity forces uniform flag\n",
1299 unit);
1300 ccio->ccio_flags |= CCDF_UNIFORM;
1301 }
1302 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK;
1303
1304 /*
1305 * Allocate space for and copy in the array of
1306 * componet pathnames and device numbers.
1307 */
1308 cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1309 M_DEVBUF, M_WAITOK);
1310 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1311 M_DEVBUF, M_WAITOK);
1312
1313 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1314 ccio->ccio_ndisks * sizeof(char **));
1315 if (error) {
1316 free(vpp, M_DEVBUF);
1317 free(cpp, M_DEVBUF);
1318 ccdunlock(cs);
1319 return (error);
1320 }
1321
1322#ifdef DEBUG
1323 if (ccddebug & CCDB_INIT)
1324 for (i = 0; i < ccio->ccio_ndisks; ++i)
1325 printf("ccdioctl: component %d: %p\n",
1326 i, cpp[i]);
1327#endif
1328
1329 for (i = 0; i < ccio->ccio_ndisks; ++i) {
1330#ifdef DEBUG
1331 if (ccddebug & CCDB_INIT)
1332 printf("ccdioctl: lookedup = %d\n", lookedup);
1333#endif
1334 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) {
1335 for (j = 0; j < lookedup; ++j)
1336 (void)vn_close(vpp[j], FREAD|FWRITE,
1337 td->td_ucred, td);
1338 free(vpp, M_DEVBUF);
1339 free(cpp, M_DEVBUF);
1340 ccdunlock(cs);
1341 return (error);
1342 }
1343 ++lookedup;
1344 }
1345 cs->sc_vpp = vpp;
1346 cs->sc_nccdisks = ccio->ccio_ndisks;
1347
1348 /*
1349 * Initialize the ccd. Fills in the softc for us.
1350 */
1351 if ((error = ccdinit(cs, cpp, td)) != 0) {
1352 for (j = 0; j < lookedup; ++j)
1353 (void)vn_close(vpp[j], FREAD|FWRITE,
1354 td->td_ucred, td);
1355 /*
1356 * We can't ccddestroy() cs just yet, because nothing
1357 * prevents user-level app to do another ioctl()
1358 * without closing the device first, therefore
1359 * declare unit null and void and let ccdclose()
1360 * destroy it when it is safe to do so.
1361 */
1362 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED);
1363 free(vpp, M_DEVBUF);
1364 free(cpp, M_DEVBUF);
1365 ccdunlock(cs);
1366 return (error);
1367 }
1368
1369 /*
1370 * The ccd has been successfully initialized, so
1371 * we can place it into the array and read the disklabel.
1372 */
1373 ccio->ccio_unit = unit;
1374 ccio->ccio_size = cs->sc_size;
1375 ccdgetdisklabel(dev);
1376
1377 ccdunlock(cs);
1378
1379 break;
1380
1381 case CCDIOCCLR:
1382 if (!IS_INITED(cs))
1383 return (ENXIO);
1384
1385 if ((flag & FWRITE) == 0)
1386 return (EBADF);
1387
1388 if ((error = ccdlock(cs)) != 0)
1389 return (error);
1390
1391 /* Don't unconfigure if any other partitions are open */
1392 part = ccdpart(dev);
1393 pmask = (1 << part);
1394 if ((cs->sc_openmask & ~pmask)) {
1395 ccdunlock(cs);
1396 return (EBUSY);
1397 }
1398
1399 /* Declare unit null and void (reset all flags) */
1400 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED);
1401
1402 /* Close the components and free their pathnames. */
1403 for (i = 0; i < cs->sc_nccdisks; ++i) {
1404 /*
1405 * XXX: this close could potentially fail and
1406 * cause Bad Things. Maybe we need to force
1407 * the close to happen?
1408 */
1409#ifdef DEBUG
1410 if (ccddebug & CCDB_VNODE)
1411 vprint("CCDIOCCLR: vnode info",
1412 cs->sc_cinfo[i].ci_vp);
1413#endif
1414 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE,
1415 td->td_ucred, td);
1416 free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1417 }
1418
1419 /* Free interleave index. */
1420 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1421 free(cs->sc_itable[i].ii_index, M_DEVBUF);
1422
1423 /* Free component info and interleave table. */
1424 free(cs->sc_cinfo, M_DEVBUF);
1425 free(cs->sc_itable, M_DEVBUF);
1426 free(cs->sc_vpp, M_DEVBUF);
1427
1428 /* And remove the devstat entry. */
1429 devstat_remove_entry(&cs->device_stats);
1430
1431 /* This must be atomic. */
1432 s = splhigh();
1433 ccdunlock(cs);
1434 splx(s);
1435
1436 break;
1437
1438 case CCDCONFINFO:
1439 {
1440 int ninit = 0;
1441 struct ccdconf *conf = (struct ccdconf *)data;
1442 struct ccd_s *tmpcs;
1443 struct ccd_s *ubuf = conf->buffer;
1444
1445 /* XXX: LOCK(unique unit numbers) */
1446 LIST_FOREACH(tmpcs, &ccd_softc_list, list)
1447 if (IS_INITED(tmpcs))
1448 ninit++;
1449
1450 if (conf->size == 0) {
1451 conf->size = sizeof(struct ccd_s) * ninit;
1452 break;
1453 } else if ((conf->size / sizeof(struct ccd_s) != ninit) ||
1454 (conf->size % sizeof(struct ccd_s) != 0)) {
1455 /* XXX: UNLOCK(unique unit numbers) */
1456 return (EINVAL);
1457 }
1458
1459 ubuf += ninit;
1460 LIST_FOREACH(tmpcs, &ccd_softc_list, list) {
1461 if (!IS_INITED(tmpcs))
1462 continue;
1463 error = copyout(tmpcs, --ubuf,
1464 sizeof(struct ccd_s));
1465 if (error != 0)
1466 /* XXX: UNLOCK(unique unit numbers) */
1467 return (error);
1468 }
1469 /* XXX: UNLOCK(unique unit numbers) */
1470 }
1471 break;
1472
1473 case CCDCPPINFO:
1474 if (!IS_INITED(cs))
1475 return (ENXIO);
1476
1477 {
1478 int len = 0;
1479 struct ccdcpps *cpps = (struct ccdcpps *)data;
1480 char *ubuf = cpps->buffer;
1481
1482
1483 for (i = 0; i < cs->sc_nccdisks; ++i)
1484 len += cs->sc_cinfo[i].ci_pathlen;
1485
1486 if (cpps->size == 0) {
1487 cpps->size = len;
1488 break;
1489 } else if (cpps->size != len) {
1490 return (EINVAL);
1491 }
1492
1493 for (i = 0; i < cs->sc_nccdisks; ++i) {
1494 len = cs->sc_cinfo[i].ci_pathlen;
1495 error = copyout(cs->sc_cinfo[i].ci_path, ubuf,
1496 len);
1497 if (error != 0)
1498 return (error);
1499 ubuf += len;
1500 }
1501 }
1502 break;
1503
1504 case DIOCGDINFO:
1505 if (!IS_INITED(cs))
1506 return (ENXIO);
1507
1508 *(struct disklabel *)data = cs->sc_label;
1509 break;
1510
1511 case DIOCWDINFO:
1512 case DIOCSDINFO:
1513 if (!IS_INITED(cs))
1514 return (ENXIO);
1515
1516 if ((flag & FWRITE) == 0)
1517 return (EBADF);
1518
1519 if ((error = ccdlock(cs)) != 0)
1520 return (error);
1521
1522 cs->sc_flags |= CCDF_LABELLING;
1523
1524 error = setdisklabel(&cs->sc_label,
1525 (struct disklabel *)data, 0);
1526 if (error == 0) {
1527 if (cmd == DIOCWDINFO)
1528 error = writedisklabel(CCDLABELDEV(dev),
1529 &cs->sc_label);
1530 }
1531
1532 cs->sc_flags &= ~CCDF_LABELLING;
1533
1534 ccdunlock(cs);
1535
1536 if (error)
1537 return (error);
1538 break;
1539
1540 case DIOCWLABEL:
1541 if (!IS_INITED(cs))
1542 return (ENXIO);
1543
1544 if ((flag & FWRITE) == 0)
1545 return (EBADF);
1546 if (*(int *)data != 0)
1547 cs->sc_flags |= CCDF_WLABEL;
1548 else
1549 cs->sc_flags &= ~CCDF_WLABEL;
1550 break;
1551
1552 default:
1553 return (ENOTTY);
1554 }
1555
1556 return (0);
1557}
1558
1559static int
1560ccdsize(dev_t dev)
1561{
1562 struct ccd_s *cs;
1563 int part, size;
1564
1565 if (ccdopen(dev, 0, S_IFCHR, curthread))
1566 return (-1);
1567
1568 cs = ccdfind(ccdunit(dev));
1569 part = ccdpart(dev);
1570
1571 if (!IS_INITED(cs))
1572 return (-1);
1573
1574 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1575 size = -1;
1576 else
1577 size = cs->sc_label.d_partitions[part].p_size;
1578
1579 if (ccdclose(dev, 0, S_IFCHR, curthread))
1580 return (-1);
1581
1582 return (size);
1583}
1584
1585/*
1586 * Lookup the provided name in the filesystem. If the file exists,
1587 * is a valid block device, and isn't being used by anyone else,
1588 * set *vpp to the file's vnode.
1589 */
1590static int
1591ccdlookup(char *path, struct thread *td, struct vnode **vpp)
1592{
1593 struct nameidata nd;
1594 struct vnode *vp;
1595 int error, flags;
1596
1597 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, td);
1598 flags = FREAD | FWRITE;
1599 if ((error = vn_open(&nd, &flags, 0)) != 0) {
1600#ifdef DEBUG
1601 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
1602 printf("ccdlookup: vn_open error = %d\n", error);
1603#endif
1604 return (error);
1605 }
1606 vp = nd.ni_vp;
1607
1608 if (vrefcnt(vp) > 1) {
1609 error = EBUSY;
1610 goto bad;
1611 }
1612
1613 if (!vn_isdisk(vp, &error))
1614 goto bad;
1615
1616#ifdef DEBUG
1617 if (ccddebug & CCDB_VNODE)
1618 vprint("ccdlookup: vnode info", vp);
1619#endif
1620
1621 VOP_UNLOCK(vp, 0, td);
1622 NDFREE(&nd, NDF_ONLY_PNBUF);
1623 *vpp = vp;
1624 return (0);
1625bad:
1626 VOP_UNLOCK(vp, 0, td);
1627 NDFREE(&nd, NDF_ONLY_PNBUF);
1628 /* vn_close does vrele() for vp */
1629 (void)vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
1630 return (error);
1631}
1632
1633/*
1634 * Read the disklabel from the ccd. If one is not present, fake one
1635 * up.
1636 */
1637static void
1638ccdgetdisklabel(dev_t dev)
1639{
1640 int unit = ccdunit(dev);
1641 struct ccd_s *cs = ccdfind(unit);
1642 char *errstring;
1643 struct disklabel *lp = &cs->sc_label;
1644 struct ccdgeom *ccg = &cs->sc_geom;
1645
1646 bzero(lp, sizeof(*lp));
1647
1648 lp->d_secperunit = cs->sc_size;
1649 lp->d_secsize = ccg->ccg_secsize;
1650 lp->d_nsectors = ccg->ccg_nsectors;
1651 lp->d_ntracks = ccg->ccg_ntracks;
1652 lp->d_ncylinders = ccg->ccg_ncylinders;
1653 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1654
1655 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1656 lp->d_type = DTYPE_CCD;
1657 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1658 lp->d_rpm = 3600;
1659 lp->d_interleave = 1;
1660 lp->d_flags = 0;
1661
1662 lp->d_partitions[RAW_PART].p_offset = 0;
1663 lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1664 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1665 lp->d_npartitions = RAW_PART + 1;
1666
1667 lp->d_bbsize = BBSIZE; /* XXX */
1668 lp->d_sbsize = 0;
1669
1670 lp->d_magic = DISKMAGIC;
1671 lp->d_magic2 = DISKMAGIC;
1672 lp->d_checksum = dkcksum(&cs->sc_label);
1673
1674 /*
1675 * Call the generic disklabel extraction routine.
1676 */
1677 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label);
1678 if (errstring != NULL)
1679 ccdmakedisklabel(cs);
1680
1681#ifdef DEBUG
1682 /* It's actually extremely common to have unlabeled ccds. */
1683 if (ccddebug & CCDB_LABEL)
1684 if (errstring != NULL)
1685 printf("ccd%d: %s\n", unit, errstring);
1686#endif
1687}
1688
1689/*
1690 * Take care of things one might want to take care of in the event
1691 * that a disklabel isn't present.
1692 */
1693static void
1694ccdmakedisklabel(struct ccd_s *cs)
1695{
1696 struct disklabel *lp = &cs->sc_label;
1697
1698 /*
1699 * For historical reasons, if there's no disklabel present
1700 * the raw partition must be marked FS_BSDFFS.
1701 */
1702 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1703
1704 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1705}
1706
1707/*
1708 * Wait interruptibly for an exclusive lock.
1709 *
1710 * XXX
1711 * Several drivers do this; it should be abstracted and made MP-safe.
1712 */
1713static int
1714ccdlock(struct ccd_s *cs)
1715{
1716 int error;
1717
1718 while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1719 cs->sc_flags |= CCDF_WANTED;
1720 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0)
1721 return (error);
1722 }
1723 cs->sc_flags |= CCDF_LOCKED;
1724 return (0);
1725}
1726
1727/*
1728 * Unlock and wake up any waiters.
1729 */
1730static void
1731ccdunlock(struct ccd_s *cs)
1732{
1733
1734 cs->sc_flags &= ~CCDF_LOCKED;
1735 if ((cs->sc_flags & CCDF_WANTED) != 0) {
1736 cs->sc_flags &= ~CCDF_WANTED;
1737 wakeup(cs);
1738 }
1739}
1740
1741#ifdef DEBUG
1742static void
1743printiinfo(struct ccdiinfo *ii)
1744{
1745 int ix, i;
1746
1747 for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1748 printf(" itab[%d]: #dk %d sblk %lld soff %lld",
1749 ix, ii->ii_ndisk, (long long)ii->ii_startblk,
1750 (long long)ii->ii_startoff);
1751 for (i = 0; i < ii->ii_ndisk; i++)
1752 printf(" %d", ii->ii_index[i]);
1753 printf("\n");
1754 }
1755}
1756#endif