geom_ccd.c revision 58349
1132718Skan/* $FreeBSD: head/sys/geom/geom_ccd.c 58349 2000-03-20 11:29:10Z phk $ */ 2169689Skan 3169689Skan/* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4132718Skan 5132718Skan/* 6132718Skan * Copyright (c) 1995 Jason R. Thorpe. 7132718Skan * All rights reserved. 8132718Skan * 9132718Skan * Redistribution and use in source and binary forms, with or without 10132718Skan * modification, are permitted provided that the following conditions 11132718Skan * are met: 12132718Skan * 1. Redistributions of source code must retain the above copyright 13132718Skan * notice, this list of conditions and the following disclaimer. 14132718Skan * 2. Redistributions in binary form must reproduce the above copyright 15132718Skan * notice, this list of conditions and the following disclaimer in the 16132718Skan * documentation and/or other materials provided with the distribution. 17132718Skan * 3. All advertising materials mentioning features or use of this software 18132718Skan * must display the following acknowledgement: 19132718Skan * This product includes software developed for the NetBSD Project 20132718Skan * by Jason R. Thorpe. 21169689Skan * 4. The name of the author may not be used to endorse or promote products 22169689Skan * derived from this software without specific prior written permission. 23132718Skan * 24132718Skan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25132718Skan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26132718Skan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27132718Skan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28132718Skan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29132718Skan * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30132718Skan * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31132718Skan * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32132718Skan * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33132718Skan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34132718Skan * SUCH DAMAGE. 35132718Skan */ 36132718Skan 37132718Skan/* 38132718Skan * Copyright (c) 1988 University of Utah. 39132718Skan * Copyright (c) 1990, 1993 40132718Skan * The Regents of the University of California. All rights reserved. 41132718Skan * 42132718Skan * This code is derived from software contributed to Berkeley by 43132718Skan * the Systems Programming Group of the University of Utah Computer 44132718Skan * Science Department. 45132718Skan * 46132718Skan * Redistribution and use in source and binary forms, with or without 47132718Skan * modification, are permitted provided that the following conditions 48132718Skan * are met: 49132718Skan * 1. Redistributions of source code must retain the above copyright 50132718Skan * notice, this list of conditions and the following disclaimer. 51132718Skan * 2. Redistributions in binary form must reproduce the above copyright 52132718Skan * notice, this list of conditions and the following disclaimer in the 53132718Skan * documentation and/or other materials provided with the distribution. 54132718Skan * 3. All advertising materials mentioning features or use of this software 55132718Skan * must display the following acknowledgement: 56132718Skan * This product includes software developed by the University of 57132718Skan * California, Berkeley and its contributors. 58132718Skan * 4. Neither the name of the University nor the names of its contributors 59132718Skan * may be used to endorse or promote products derived from this software 60132718Skan * without specific prior written permission. 61132718Skan * 62132718Skan * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63132718Skan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64132718Skan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65132718Skan * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66132718Skan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67132718Skan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68132718Skan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69132718Skan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70132718Skan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71132718Skan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72132718Skan * SUCH DAMAGE. 73132718Skan * 74132718Skan * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75132718Skan * 76132718Skan * @(#)cd.c 8.2 (Berkeley) 11/16/93 77169689Skan */ 78132718Skan 79132718Skan/* 80132718Skan * "Concatenated" disk driver. 81132718Skan * 82132718Skan * Dynamic configuration and disklabel support by: 83132718Skan * Jason R. Thorpe <thorpej@nas.nasa.gov> 84132718Skan * Numerical Aerodynamic Simulation Facility 85132718Skan * Mail Stop 258-6 86132718Skan * NASA Ames Research Center 87132718Skan * Moffett Field, CA 94035 88132718Skan */ 89132718Skan 90132718Skan#include "ccd.h" 91132718Skan 92132718Skan#include <sys/param.h> 93132718Skan#include <sys/systm.h> 94132718Skan#include <sys/kernel.h> 95132718Skan#include <sys/module.h> 96132718Skan#include <sys/proc.h> 97132718Skan#include <sys/buf.h> 98132718Skan#include <sys/malloc.h> 99132718Skan#include <sys/namei.h> 100132718Skan#include <sys/conf.h> 101132718Skan#include <sys/stat.h> 102132718Skan#include <sys/sysctl.h> 103132718Skan#include <sys/disklabel.h> 104132718Skan#include <ufs/ffs/fs.h> 105132718Skan#include <sys/devicestat.h> 106132718Skan#include <sys/fcntl.h> 107132718Skan#include <sys/vnode.h> 108132718Skan 109132718Skan#include <sys/ccdvar.h> 110132718Skan 111132718Skan#include <vm/vm_zone.h> 112132718Skan 113132718Skan#if defined(CCDDEBUG) && !defined(DEBUG) 114132718Skan#define DEBUG 115132718Skan#endif 116132718Skan 117132718Skan#ifdef DEBUG 118132718Skan#define CCDB_FOLLOW 0x01 119132718Skan#define CCDB_INIT 0x02 120132718Skan#define CCDB_IO 0x04 121132718Skan#define CCDB_LABEL 0x08 122132718Skan#define CCDB_VNODE 0x10 123132718Skanstatic int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 124132718Skan CCDB_VNODE; 125132718SkanSYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 126132718Skan#undef DEBUG 127132718Skan#endif 128132718Skan 129132718Skan#define ccdunit(x) dkunit(x) 130132718Skan#define ccdpart(x) dkpart(x) 131132718Skan 132132718Skan/* 133132718Skan This is how mirroring works (only writes are special): 134132718Skan 135132718Skan When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 136132718Skan linked together by the cb_mirror field. "cb_pflags & 137132718Skan CCDPF_MIRROR_DONE" is set to 0 on both of them. 138132718Skan 139132718Skan When a component returns to ccdiodone(), it checks if "cb_pflags & 140132718Skan CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 141132718Skan flag and returns. If it is, it means its partner has already 142132718Skan returned, so it will go to the regular cleanup. 143132718Skan 144132718Skan */ 145132718Skan 146132718Skanstruct ccdbuf { 147132718Skan struct buf cb_buf; /* new I/O buf */ 148132718Skan struct buf *cb_obp; /* ptr. to original I/O buf */ 149132718Skan struct ccdbuf *cb_freenext; /* free list link */ 150132718Skan int cb_unit; /* target unit */ 151132718Skan int cb_comp; /* target component */ 152132718Skan int cb_pflags; /* mirror/parity status flag */ 153132718Skan struct ccdbuf *cb_mirror; /* mirror counterpart */ 154132718Skan}; 155132718Skan 156132718Skan/* bits in cb_pflags */ 157132718Skan#define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 158132718Skan 159132718Skan#define CCDLABELDEV(dev) \ 160132718Skan (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 161132718Skan 162132718Skanstatic d_open_t ccdopen; 163132718Skanstatic d_close_t ccdclose; 164132718Skanstatic d_strategy_t ccdstrategy; 165132718Skanstatic d_ioctl_t ccdioctl; 166132718Skanstatic d_dump_t ccddump; 167132718Skanstatic d_psize_t ccdsize; 168132718Skan 169132718Skan#define NCCDFREEHIWAT 16 170132718Skan 171132718Skan#define CDEV_MAJOR 74 172132718Skan#define BDEV_MAJOR 21 173132718Skan 174132718Skanstatic struct cdevsw ccd_cdevsw = { 175132718Skan /* open */ ccdopen, 176132718Skan /* close */ ccdclose, 177132718Skan /* read */ physread, 178132718Skan /* write */ physwrite, 179132718Skan /* ioctl */ ccdioctl, 180132718Skan /* poll */ nopoll, 181132718Skan /* mmap */ nommap, 182132718Skan /* strategy */ ccdstrategy, 183132718Skan /* name */ "ccd", 184132718Skan /* maj */ CDEV_MAJOR, 185132718Skan /* dump */ ccddump, 186132718Skan /* psize */ ccdsize, 187132718Skan /* flags */ D_DISK, 188132718Skan /* bmaj */ BDEV_MAJOR 189132718Skan}; 190132718Skan 191132718Skan/* called during module initialization */ 192132718Skanstatic void ccdattach __P((void)); 193132718Skanstatic int ccd_modevent __P((module_t, int, void *)); 194132718Skan 195132718Skan/* called by biodone() at interrupt time */ 196132718Skanstatic void ccdiodone __P((struct buf *bp)); 197132718Skan 198132718Skanstatic void ccdstart __P((struct ccd_softc *, struct buf *)); 199132718Skanstatic void ccdinterleave __P((struct ccd_softc *, int)); 200132718Skanstatic void ccdintr __P((struct ccd_softc *, struct buf *)); 201132718Skanstatic int ccdinit __P((struct ccddevice *, char **, struct proc *)); 202132718Skanstatic int ccdlookup __P((char *, struct proc *p, struct vnode **)); 203132718Skanstatic void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *, 204132718Skan struct buf *, daddr_t, caddr_t, long)); 205132718Skanstatic void ccdgetdisklabel __P((dev_t)); 206132718Skanstatic void ccdmakedisklabel __P((struct ccd_softc *)); 207132718Skanstatic int ccdlock __P((struct ccd_softc *)); 208132718Skanstatic void ccdunlock __P((struct ccd_softc *)); 209132718Skan 210132718Skan#ifdef DEBUG 211132718Skanstatic void printiinfo __P((struct ccdiinfo *)); 212132718Skan#endif 213132718Skan 214132718Skan/* Non-private for the benefit of libkvm. */ 215132718Skanstruct ccd_softc *ccd_softc; 216132718Skanstruct ccddevice *ccddevs; 217132718Skanstruct ccdbuf *ccdfreebufs; 218132718Skanstatic int numccdfreebufs; 219132718Skanstatic int numccd = 0; 220132718Skan 221132718Skan/* 222132718Skan * getccdbuf() - Allocate and zero a ccd buffer. 223132718Skan * 224169689Skan * This routine is called at splbio(). 225132718Skan */ 226132718Skan 227132718Skanstatic __inline 228132718Skanstruct ccdbuf * 229132718Skangetccdbuf(struct ccdbuf *cpy) 230132718Skan{ 231169689Skan struct ccdbuf *cbp; 232132718Skan 233132718Skan /* 234132718Skan * Allocate from freelist or malloc as necessary 235132718Skan */ 236132718Skan if ((cbp = ccdfreebufs) != NULL) { 237132718Skan ccdfreebufs = cbp->cb_freenext; 238132718Skan --numccdfreebufs; 239132718Skan } else { 240132718Skan cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 241132718Skan } 242132718Skan 243132718Skan /* 244132718Skan * Used by mirroring code 245132718Skan */ 246132718Skan if (cpy) 247132718Skan bcopy(cpy, cbp, sizeof(struct ccdbuf)); 248132718Skan else 249132718Skan bzero(cbp, sizeof(struct ccdbuf)); 250132718Skan 251132718Skan /* 252132718Skan * independant struct buf initialization 253132718Skan */ 254132718Skan LIST_INIT(&cbp->cb_buf.b_dep); 255132718Skan BUF_LOCKINIT(&cbp->cb_buf); 256132718Skan BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 257132718Skan BUF_KERNPROC(&cbp->cb_buf); 258132718Skan 259132718Skan return(cbp); 260132718Skan} 261132718Skan 262132718Skan/* 263132718Skan * putccdbuf() - Free a ccd buffer. 264132718Skan * 265132718Skan * This routine is called at splbio(). 266132718Skan */ 267132718Skan 268132718Skanstatic __inline 269132718Skanvoid 270132718Skanputccdbuf(struct ccdbuf *cbp) 271132718Skan{ 272132718Skan BUF_UNLOCK(&cbp->cb_buf); 273132718Skan BUF_LOCKFREE(&cbp->cb_buf); 274132718Skan 275132718Skan if (numccdfreebufs < NCCDFREEHIWAT) { 276132718Skan cbp->cb_freenext = ccdfreebufs; 277132718Skan ccdfreebufs = cbp; 278132718Skan ++numccdfreebufs; 279132718Skan } else { 280132718Skan free((caddr_t)cbp, M_DEVBUF); 281132718Skan } 282132718Skan} 283132718Skan 284132718Skan 285132718Skan/* 286132718Skan * Number of blocks to untouched in front of a component partition. 287132718Skan * This is to avoid violating its disklabel area when it starts at the 288132718Skan * beginning of the slice. 289132718Skan */ 290132718Skan#if !defined(CCD_OFFSET) 291132718Skan#define CCD_OFFSET 16 292132718Skan#endif 293132718Skan 294132718Skan/* 295132718Skan * Called by main() during pseudo-device attachment. All we need 296132718Skan * to do is allocate enough space for devices to be configured later, and 297132718Skan * add devsw entries. 298132718Skan */ 299132718Skanstatic void 300132718Skanccdattach() 301132718Skan{ 302132718Skan int i; 303132718Skan int num = NCCD; 304132718Skan 305132718Skan if (num > 1) 306132718Skan printf("ccd0-%d: Concatenated disk drivers\n", num-1); 307132718Skan else 308132718Skan printf("ccd0: Concatenated disk driver\n"); 309132718Skan 310132718Skan ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc), 311132718Skan M_DEVBUF, M_NOWAIT); 312132718Skan ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice), 313132718Skan M_DEVBUF, M_NOWAIT); 314132718Skan if ((ccd_softc == NULL) || (ccddevs == NULL)) { 315132718Skan printf("WARNING: no memory for concatenated disks\n"); 316132718Skan if (ccd_softc != NULL) 317132718Skan free(ccd_softc, M_DEVBUF); 318132718Skan if (ccddevs != NULL) 319132718Skan free(ccddevs, M_DEVBUF); 320132718Skan return; 321132718Skan } 322132718Skan numccd = num; 323132718Skan bzero(ccd_softc, num * sizeof(struct ccd_softc)); 324132718Skan bzero(ccddevs, num * sizeof(struct ccddevice)); 325132718Skan 326169689Skan cdevsw_add(&ccd_cdevsw); 327169689Skan /* XXX: is this necessary? */ 328169689Skan for (i = 0; i < numccd; ++i) 329132718Skan ccddevs[i].ccd_dk = -1; 330132718Skan} 331132718Skan 332132718Skanstatic int 333132718Skanccd_modevent(mod, type, data) 334132718Skan module_t mod; 335132718Skan int type; 336132718Skan void *data; 337132718Skan{ 338132718Skan int error = 0; 339132718Skan 340132718Skan switch (type) { 341132718Skan case MOD_LOAD: 342132718Skan ccdattach(); 343132718Skan break; 344132718Skan 345132718Skan case MOD_UNLOAD: 346132718Skan printf("ccd0: Unload not supported!\n"); 347132718Skan error = EOPNOTSUPP; 348132718Skan break; 349132718Skan 350132718Skan default: /* MOD_SHUTDOWN etc */ 351132718Skan break; 352132718Skan } 353132718Skan return (error); 354132718Skan} 355132718Skan 356132718SkanDEV_MODULE(ccd, ccd_modevent, NULL); 357132718Skan 358132718Skanstatic int 359132718Skanccdinit(ccd, cpaths, p) 360132718Skan struct ccddevice *ccd; 361132718Skan char **cpaths; 362132718Skan struct proc *p; 363132718Skan{ 364132718Skan struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 365132718Skan struct ccdcinfo *ci = NULL; /* XXX */ 366132718Skan size_t size; 367132718Skan int ix; 368132718Skan struct vnode *vp; 369132718Skan size_t minsize; 370132718Skan int maxsecsize; 371132718Skan struct partinfo dpart; 372132718Skan struct ccdgeom *ccg = &cs->sc_geom; 373132718Skan char tmppath[MAXPATHLEN]; 374132718Skan int error = 0; 375132718Skan 376132718Skan#ifdef DEBUG 377132718Skan if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 378132718Skan printf("ccdinit: unit %d\n", ccd->ccd_unit); 379132718Skan#endif 380132718Skan 381132718Skan cs->sc_size = 0; 382132718Skan cs->sc_ileave = ccd->ccd_interleave; 383169689Skan cs->sc_nccdisks = ccd->ccd_ndev; 384132718Skan 385132718Skan /* Allocate space for the component info. */ 386132718Skan cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 387132718Skan M_DEVBUF, M_WAITOK); 388132718Skan 389132718Skan /* 390169689Skan * Verify that each component piece exists and record 391132718Skan * relevant information about it. 392132718Skan */ 393132718Skan maxsecsize = 0; 394132718Skan minsize = 0; 395132718Skan for (ix = 0; ix < cs->sc_nccdisks; ix++) { 396132718Skan vp = ccd->ccd_vpp[ix]; 397132718Skan ci = &cs->sc_cinfo[ix]; 398132718Skan ci->ci_vp = vp; 399169689Skan 400132718Skan /* 401132718Skan * Copy in the pathname of the component. 402132718Skan */ 403132718Skan bzero(tmppath, sizeof(tmppath)); /* sanity */ 404132718Skan if ((error = copyinstr(cpaths[ix], tmppath, 405132718Skan MAXPATHLEN, &ci->ci_pathlen)) != 0) { 406132718Skan#ifdef DEBUG 407132718Skan if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 408132718Skan printf("ccd%d: can't copy path, error = %d\n", 409132718Skan ccd->ccd_unit, error); 410132718Skan#endif 411132718Skan goto fail; 412132718Skan } 413132718Skan ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 414132718Skan bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 415132718Skan 416132718Skan ci->ci_dev = vn_todev(vp); 417132718Skan 418132718Skan /* 419132718Skan * Get partition information for the component. 420132718Skan */ 421132718Skan if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 422132718Skan FREAD, p->p_ucred, p)) != 0) { 423132718Skan#ifdef DEBUG 424132718Skan if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 425132718Skan printf("ccd%d: %s: ioctl failed, error = %d\n", 426132718Skan ccd->ccd_unit, ci->ci_path, error); 427132718Skan#endif 428132718Skan goto fail; 429132718Skan } 430132718Skan if (dpart.part->p_fstype == FS_BSDFFS) { 431132718Skan maxsecsize = 432132718Skan ((dpart.disklab->d_secsize > maxsecsize) ? 433132718Skan dpart.disklab->d_secsize : maxsecsize); 434132718Skan size = dpart.part->p_size - CCD_OFFSET; 435132718Skan } else { 436132718Skan#ifdef DEBUG 437132718Skan if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 438132718Skan printf("ccd%d: %s: incorrect partition type\n", 439132718Skan ccd->ccd_unit, ci->ci_path); 440132718Skan#endif 441132718Skan error = EFTYPE; 442132718Skan goto fail; 443132718Skan } 444132718Skan 445132718Skan /* 446132718Skan * Calculate the size, truncating to an interleave 447132718Skan * boundary if necessary. 448132718Skan */ 449132718Skan 450132718Skan if (cs->sc_ileave > 1) 451132718Skan size -= size % cs->sc_ileave; 452169689Skan 453132718Skan if (size == 0) { 454132718Skan#ifdef DEBUG 455132718Skan if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 456132718Skan printf("ccd%d: %s: size == 0\n", 457132718Skan ccd->ccd_unit, ci->ci_path); 458132718Skan#endif 459132718Skan error = ENODEV; 460132718Skan goto fail; 461132718Skan } 462132718Skan 463132718Skan if (minsize == 0 || size < minsize) 464132718Skan minsize = size; 465132718Skan ci->ci_size = size; 466132718Skan cs->sc_size += size; 467132718Skan } 468132718Skan 469132718Skan /* 470132718Skan * Don't allow the interleave to be smaller than 471132718Skan * the biggest component sector. 472132718Skan */ 473132718Skan if ((cs->sc_ileave > 0) && 474132718Skan (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 475132718Skan#ifdef DEBUG 476132718Skan if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 477132718Skan printf("ccd%d: interleave must be at least %d\n", 478132718Skan ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 479132718Skan#endif 480132718Skan error = EINVAL; 481132718Skan goto fail; 482132718Skan } 483132718Skan 484132718Skan /* 485132718Skan * If uniform interleave is desired set all sizes to that of 486132718Skan * the smallest component. This will guarentee that a single 487132718Skan * interleave table is generated. 488132718Skan * 489132718Skan * Lost space must be taken into account when calculating the 490132718Skan * overall size. Half the space is lost when CCDF_MIRROR is 491132718Skan * specified. One disk is lost when CCDF_PARITY is specified. 492132718Skan */ 493132718Skan if (ccd->ccd_flags & CCDF_UNIFORM) { 494132718Skan for (ci = cs->sc_cinfo; 495132718Skan ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 496132718Skan ci->ci_size = minsize; 497169689Skan } 498132718Skan if (ccd->ccd_flags & CCDF_MIRROR) { 499132718Skan /* 500132718Skan * Check to see if an even number of components 501132718Skan * have been specified. The interleave must also 502132718Skan * be non-zero in order for us to be able to 503132718Skan * guarentee the topology. 504132718Skan */ 505132718Skan if (cs->sc_nccdisks % 2) { 506132718Skan printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 507132718Skan error = EINVAL; 508132718Skan goto fail; 509132718Skan } 510132718Skan if (cs->sc_ileave == 0) { 511169689Skan printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 512132718Skan error = EINVAL; 513132718Skan goto fail; 514132718Skan } 515132718Skan cs->sc_size = (cs->sc_nccdisks/2) * minsize; 516169689Skan } else if (ccd->ccd_flags & CCDF_PARITY) { 517132718Skan cs->sc_size = (cs->sc_nccdisks-1) * minsize; 518132718Skan } else { 519169689Skan if (cs->sc_ileave == 0) { 520132718Skan printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 521132718Skan error = EINVAL; 522132718Skan goto fail; 523132718Skan } 524132718Skan cs->sc_size = cs->sc_nccdisks * minsize; 525132718Skan } 526132718Skan } 527132718Skan 528132718Skan /* 529132718Skan * Construct the interleave table. 530132718Skan */ 531132718Skan ccdinterleave(cs, ccd->ccd_unit); 532132718Skan 533132718Skan /* 534132718Skan * Create pseudo-geometry based on 1MB cylinders. It's 535132718Skan * pretty close. 536132718Skan */ 537132718Skan ccg->ccg_secsize = maxsecsize; 538 ccg->ccg_ntracks = 1; 539 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 540 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 541 542 /* 543 * Add an devstat entry for this device. 544 */ 545 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 546 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 547 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 548 DEVSTAT_PRIORITY_ARRAY); 549 550 cs->sc_flags |= CCDF_INITED; 551 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 552 cs->sc_unit = ccd->ccd_unit; 553 return (0); 554fail: 555 while (ci > cs->sc_cinfo) { 556 ci--; 557 free(ci->ci_path, M_DEVBUF); 558 } 559 free(cs->sc_cinfo, M_DEVBUF); 560 return (error); 561} 562 563static void 564ccdinterleave(cs, unit) 565 struct ccd_softc *cs; 566 int unit; 567{ 568 struct ccdcinfo *ci, *smallci; 569 struct ccdiinfo *ii; 570 daddr_t bn, lbn; 571 int ix; 572 u_long size; 573 574#ifdef DEBUG 575 if (ccddebug & CCDB_INIT) 576 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 577#endif 578 579 /* 580 * Allocate an interleave table. The worst case occurs when each 581 * of N disks is of a different size, resulting in N interleave 582 * tables. 583 * 584 * Chances are this is too big, but we don't care. 585 */ 586 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 587 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 588 bzero((caddr_t)cs->sc_itable, size); 589 590 /* 591 * Trivial case: no interleave (actually interleave of disk size). 592 * Each table entry represents a single component in its entirety. 593 * 594 * An interleave of 0 may not be used with a mirror or parity setup. 595 */ 596 if (cs->sc_ileave == 0) { 597 bn = 0; 598 ii = cs->sc_itable; 599 600 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 601 /* Allocate space for ii_index. */ 602 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 603 ii->ii_ndisk = 1; 604 ii->ii_startblk = bn; 605 ii->ii_startoff = 0; 606 ii->ii_index[0] = ix; 607 bn += cs->sc_cinfo[ix].ci_size; 608 ii++; 609 } 610 ii->ii_ndisk = 0; 611#ifdef DEBUG 612 if (ccddebug & CCDB_INIT) 613 printiinfo(cs->sc_itable); 614#endif 615 return; 616 } 617 618 /* 619 * The following isn't fast or pretty; it doesn't have to be. 620 */ 621 size = 0; 622 bn = lbn = 0; 623 for (ii = cs->sc_itable; ; ii++) { 624 /* 625 * Allocate space for ii_index. We might allocate more then 626 * we use. 627 */ 628 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 629 M_DEVBUF, M_WAITOK); 630 631 /* 632 * Locate the smallest of the remaining components 633 */ 634 smallci = NULL; 635 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 636 ci++) { 637 if (ci->ci_size > size && 638 (smallci == NULL || 639 ci->ci_size < smallci->ci_size)) { 640 smallci = ci; 641 } 642 } 643 644 /* 645 * Nobody left, all done 646 */ 647 if (smallci == NULL) { 648 ii->ii_ndisk = 0; 649 break; 650 } 651 652 /* 653 * Record starting logical block using an sc_ileave blocksize. 654 */ 655 ii->ii_startblk = bn / cs->sc_ileave; 656 657 /* 658 * Record starting comopnent block using an sc_ileave 659 * blocksize. This value is relative to the beginning of 660 * a component disk. 661 */ 662 ii->ii_startoff = lbn; 663 664 /* 665 * Determine how many disks take part in this interleave 666 * and record their indices. 667 */ 668 ix = 0; 669 for (ci = cs->sc_cinfo; 670 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 671 if (ci->ci_size >= smallci->ci_size) { 672 ii->ii_index[ix++] = ci - cs->sc_cinfo; 673 } 674 } 675 ii->ii_ndisk = ix; 676 bn += ix * (smallci->ci_size - size); 677 lbn = smallci->ci_size / cs->sc_ileave; 678 size = smallci->ci_size; 679 } 680#ifdef DEBUG 681 if (ccddebug & CCDB_INIT) 682 printiinfo(cs->sc_itable); 683#endif 684} 685 686/* ARGSUSED */ 687static int 688ccdopen(dev, flags, fmt, p) 689 dev_t dev; 690 int flags, fmt; 691 struct proc *p; 692{ 693 int unit = ccdunit(dev); 694 struct ccd_softc *cs; 695 struct disklabel *lp; 696 int error = 0, part, pmask; 697 698#ifdef DEBUG 699 if (ccddebug & CCDB_FOLLOW) 700 printf("ccdopen(%x, %x)\n", dev, flags); 701#endif 702 if (unit >= numccd) 703 return (ENXIO); 704 cs = &ccd_softc[unit]; 705 706 if ((error = ccdlock(cs)) != 0) 707 return (error); 708 709 lp = &cs->sc_label; 710 711 part = ccdpart(dev); 712 pmask = (1 << part); 713 714 /* 715 * If we're initialized, check to see if there are any other 716 * open partitions. If not, then it's safe to update 717 * the in-core disklabel. 718 */ 719 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 720 ccdgetdisklabel(dev); 721 722 /* Check that the partition exists. */ 723 if (part != RAW_PART && ((part >= lp->d_npartitions) || 724 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 725 error = ENXIO; 726 goto done; 727 } 728 729 cs->sc_openmask |= pmask; 730 done: 731 ccdunlock(cs); 732 return (0); 733} 734 735/* ARGSUSED */ 736static int 737ccdclose(dev, flags, fmt, p) 738 dev_t dev; 739 int flags, fmt; 740 struct proc *p; 741{ 742 int unit = ccdunit(dev); 743 struct ccd_softc *cs; 744 int error = 0, part; 745 746#ifdef DEBUG 747 if (ccddebug & CCDB_FOLLOW) 748 printf("ccdclose(%x, %x)\n", dev, flags); 749#endif 750 751 if (unit >= numccd) 752 return (ENXIO); 753 cs = &ccd_softc[unit]; 754 755 if ((error = ccdlock(cs)) != 0) 756 return (error); 757 758 part = ccdpart(dev); 759 760 /* ...that much closer to allowing unconfiguration... */ 761 cs->sc_openmask &= ~(1 << part); 762 ccdunlock(cs); 763 return (0); 764} 765 766static void 767ccdstrategy(bp) 768 struct buf *bp; 769{ 770 int unit = ccdunit(bp->b_dev); 771 struct ccd_softc *cs = &ccd_softc[unit]; 772 int s; 773 int wlabel; 774 struct disklabel *lp; 775 776#ifdef DEBUG 777 if (ccddebug & CCDB_FOLLOW) 778 printf("ccdstrategy(%x): unit %d\n", bp, unit); 779#endif 780 if ((cs->sc_flags & CCDF_INITED) == 0) { 781 bp->b_error = ENXIO; 782 bp->b_flags |= B_ERROR; 783 goto done; 784 } 785 786 /* If it's a nil transfer, wake up the top half now. */ 787 if (bp->b_bcount == 0) 788 goto done; 789 790 lp = &cs->sc_label; 791 792 /* 793 * Do bounds checking and adjust transfer. If there's an 794 * error, the bounds check will flag that for us. 795 */ 796 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 797 if (ccdpart(bp->b_dev) != RAW_PART) { 798 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 799 goto done; 800 } else { 801 int pbn; /* in sc_secsize chunks */ 802 long sz; /* in sc_secsize chunks */ 803 804 pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 805 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 806 807 /* 808 * If out of bounds return an error. If at the EOF point, 809 * simply read or write less. 810 */ 811 812 if (pbn < 0 || pbn >= cs->sc_size) { 813 bp->b_resid = bp->b_bcount; 814 if (pbn != cs->sc_size) { 815 bp->b_error = EINVAL; 816 bp->b_flags |= B_ERROR | B_INVAL; 817 } 818 goto done; 819 } 820 821 /* 822 * If the request crosses EOF, truncate the request. 823 */ 824 if (pbn + sz > cs->sc_size) { 825 bp->b_bcount = (cs->sc_size - pbn) * 826 cs->sc_geom.ccg_secsize; 827 } 828 } 829 830 bp->b_resid = bp->b_bcount; 831 832 /* 833 * "Start" the unit. 834 */ 835 s = splbio(); 836 ccdstart(cs, bp); 837 splx(s); 838 return; 839done: 840 biodone(bp); 841} 842 843static void 844ccdstart(cs, bp) 845 struct ccd_softc *cs; 846 struct buf *bp; 847{ 848 long bcount, rcount; 849 struct ccdbuf *cbp[4]; 850 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 851 caddr_t addr; 852 daddr_t bn; 853 struct partition *pp; 854 855#ifdef DEBUG 856 if (ccddebug & CCDB_FOLLOW) 857 printf("ccdstart(%x, %x)\n", cs, bp); 858#endif 859 860 /* Record the transaction start */ 861 devstat_start_transaction(&cs->device_stats); 862 863 /* 864 * Translate the partition-relative block number to an absolute. 865 */ 866 bn = bp->b_blkno; 867 if (ccdpart(bp->b_dev) != RAW_PART) { 868 pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)]; 869 bn += pp->p_offset; 870 } 871 872 /* 873 * Allocate component buffers and fire off the requests 874 */ 875 addr = bp->b_data; 876 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 877 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 878 rcount = cbp[0]->cb_buf.b_bcount; 879 880 if (cs->sc_cflags & CCDF_MIRROR) { 881 /* 882 * Mirroring. Writes go to both disks, reads are 883 * taken from whichever disk seems most appropriate. 884 * 885 * We attempt to localize reads to the disk whos arm 886 * is nearest the read request. We ignore seeks due 887 * to writes when making this determination and we 888 * also try to avoid hogging. 889 */ 890 if (cbp[0]->cb_buf.b_iocmd == BIO_WRITE) { 891 cbp[0]->cb_buf.b_vp->v_numoutput++; 892 cbp[1]->cb_buf.b_vp->v_numoutput++; 893 BUF_STRATEGY(&cbp[0]->cb_buf); 894 BUF_STRATEGY(&cbp[1]->cb_buf); 895 } else { 896 int pick = cs->sc_pick; 897 daddr_t range = cs->sc_size / 16; 898 899 if (bn < cs->sc_blk[pick] - range || 900 bn > cs->sc_blk[pick] + range 901 ) { 902 cs->sc_pick = pick = 1 - pick; 903 } 904 cs->sc_blk[pick] = bn + btodb(rcount); 905 BUF_STRATEGY(&cbp[pick]->cb_buf); 906 } 907 } else { 908 /* 909 * Not mirroring 910 */ 911 if (cbp[0]->cb_buf.b_iocmd == BIO_WRITE) 912 cbp[0]->cb_buf.b_vp->v_numoutput++; 913 BUF_STRATEGY(&cbp[0]->cb_buf); 914 } 915 bn += btodb(rcount); 916 addr += rcount; 917 } 918} 919 920/* 921 * Build a component buffer header. 922 */ 923static void 924ccdbuffer(cb, cs, bp, bn, addr, bcount) 925 struct ccdbuf **cb; 926 struct ccd_softc *cs; 927 struct buf *bp; 928 daddr_t bn; 929 caddr_t addr; 930 long bcount; 931{ 932 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 933 struct ccdbuf *cbp; 934 daddr_t cbn, cboff; 935 off_t cbc; 936 937#ifdef DEBUG 938 if (ccddebug & CCDB_IO) 939 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 940 cs, bp, bn, addr, bcount); 941#endif 942 /* 943 * Determine which component bn falls in. 944 */ 945 cbn = bn; 946 cboff = 0; 947 948 if (cs->sc_ileave == 0) { 949 /* 950 * Serially concatenated and neither a mirror nor a parity 951 * config. This is a special case. 952 */ 953 daddr_t sblk; 954 955 sblk = 0; 956 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 957 sblk += ci->ci_size; 958 cbn -= sblk; 959 } else { 960 struct ccdiinfo *ii; 961 int ccdisk, off; 962 963 /* 964 * Calculate cbn, the logical superblock (sc_ileave chunks), 965 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 966 * to cbn. 967 */ 968 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 969 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 970 971 /* 972 * Figure out which interleave table to use. 973 */ 974 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 975 if (ii->ii_startblk > cbn) 976 break; 977 } 978 ii--; 979 980 /* 981 * off is the logical superblock relative to the beginning 982 * of this interleave block. 983 */ 984 off = cbn - ii->ii_startblk; 985 986 /* 987 * We must calculate which disk component to use (ccdisk), 988 * and recalculate cbn to be the superblock relative to 989 * the beginning of the component. This is typically done by 990 * adding 'off' and ii->ii_startoff together. However, 'off' 991 * must typically be divided by the number of components in 992 * this interleave array to be properly convert it from a 993 * CCD-relative logical superblock number to a 994 * component-relative superblock number. 995 */ 996 if (ii->ii_ndisk == 1) { 997 /* 998 * When we have just one disk, it can't be a mirror 999 * or a parity config. 1000 */ 1001 ccdisk = ii->ii_index[0]; 1002 cbn = ii->ii_startoff + off; 1003 } else { 1004 if (cs->sc_cflags & CCDF_MIRROR) { 1005 /* 1006 * We have forced a uniform mapping, resulting 1007 * in a single interleave array. We double 1008 * up on the first half of the available 1009 * components and our mirror is in the second 1010 * half. This only works with a single 1011 * interleave array because doubling up 1012 * doubles the number of sectors, so there 1013 * cannot be another interleave array because 1014 * the next interleave array's calculations 1015 * would be off. 1016 */ 1017 int ndisk2 = ii->ii_ndisk / 2; 1018 ccdisk = ii->ii_index[off % ndisk2]; 1019 cbn = ii->ii_startoff + off / ndisk2; 1020 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1021 } else if (cs->sc_cflags & CCDF_PARITY) { 1022 /* 1023 * XXX not implemented yet 1024 */ 1025 int ndisk2 = ii->ii_ndisk - 1; 1026 ccdisk = ii->ii_index[off % ndisk2]; 1027 cbn = ii->ii_startoff + off / ndisk2; 1028 if (cbn % ii->ii_ndisk <= ccdisk) 1029 ccdisk++; 1030 } else { 1031 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1032 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1033 } 1034 } 1035 1036 ci = &cs->sc_cinfo[ccdisk]; 1037 1038 /* 1039 * Convert cbn from a superblock to a normal block so it 1040 * can be used to calculate (along with cboff) the normal 1041 * block index into this particular disk. 1042 */ 1043 cbn *= cs->sc_ileave; 1044 } 1045 1046 /* 1047 * Fill in the component buf structure. 1048 */ 1049 cbp = getccdbuf(NULL); 1050 cbp->cb_buf.b_flags = bp->b_flags; 1051 cbp->cb_buf.b_iocmd = bp->b_iocmd; 1052 cbp->cb_buf.b_iodone = ccdiodone; 1053 cbp->cb_buf.b_dev = ci->ci_dev; /* XXX */ 1054 cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET; 1055 cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET); 1056 cbp->cb_buf.b_data = addr; 1057 cbp->cb_buf.b_vp = ci->ci_vp; 1058 if (cs->sc_ileave == 0) 1059 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1060 else 1061 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1062 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1063 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1064 1065 /* 1066 * context for ccdiodone 1067 */ 1068 cbp->cb_obp = bp; 1069 cbp->cb_unit = cs - ccd_softc; 1070 cbp->cb_comp = ci - cs->sc_cinfo; 1071 1072#ifdef DEBUG 1073 if (ccddebug & CCDB_IO) 1074 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n", 1075 ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno, 1076 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1077#endif 1078 cb[0] = cbp; 1079 1080 /* 1081 * Note: both I/O's setup when reading from mirror, but only one 1082 * will be executed. 1083 */ 1084 if (cs->sc_cflags & CCDF_MIRROR) { 1085 /* mirror, setup second I/O */ 1086 cbp = getccdbuf(cb[0]); 1087 cbp->cb_buf.b_dev = ci2->ci_dev; 1088 cbp->cb_buf.b_vp = ci2->ci_vp; 1089 cbp->cb_comp = ci2 - cs->sc_cinfo; 1090 cb[1] = cbp; 1091 /* link together the ccdbuf's and clear "mirror done" flag */ 1092 cb[0]->cb_mirror = cb[1]; 1093 cb[1]->cb_mirror = cb[0]; 1094 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1095 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1096 } 1097} 1098 1099static void 1100ccdintr(cs, bp) 1101 struct ccd_softc *cs; 1102 struct buf *bp; 1103{ 1104#ifdef DEBUG 1105 if (ccddebug & CCDB_FOLLOW) 1106 printf("ccdintr(%x, %x)\n", cs, bp); 1107#endif 1108 /* 1109 * Request is done for better or worse, wakeup the top half. 1110 */ 1111 if (bp->b_flags & B_ERROR) 1112 bp->b_resid = bp->b_bcount; 1113 devstat_end_transaction_buf(&cs->device_stats, bp); 1114 biodone(bp); 1115} 1116 1117/* 1118 * Called at interrupt time. 1119 * Mark the component as done and if all components are done, 1120 * take a ccd interrupt. 1121 */ 1122static void 1123ccdiodone(ibp) 1124 struct buf *ibp; 1125{ 1126 struct ccdbuf *cbp = (struct ccdbuf *)ibp; 1127 struct buf *bp = cbp->cb_obp; 1128 int unit = cbp->cb_unit; 1129 int count, s; 1130 1131 s = splbio(); 1132#ifdef DEBUG 1133 if (ccddebug & CCDB_FOLLOW) 1134 printf("ccdiodone(%x)\n", cbp); 1135 if (ccddebug & CCDB_IO) { 1136 printf("ccdiodone: bp %x bcount %d resid %d\n", 1137 bp, bp->b_bcount, bp->b_resid); 1138 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n", 1139 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1140 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 1141 cbp->cb_buf.b_bcount); 1142 } 1143#endif 1144 /* 1145 * If an error occured, report it. If this is a mirrored 1146 * configuration and the first of two possible reads, do not 1147 * set the error in the bp yet because the second read may 1148 * succeed. 1149 */ 1150 1151 if (cbp->cb_buf.b_flags & B_ERROR) { 1152 const char *msg = ""; 1153 1154 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1155 (cbp->cb_buf.b_iocmd == BIO_READ) && 1156 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1157 /* 1158 * We will try our read on the other disk down 1159 * below, also reverse the default pick so if we 1160 * are doing a scan we do not keep hitting the 1161 * bad disk first. 1162 */ 1163 struct ccd_softc *cs = &ccd_softc[unit]; 1164 1165 msg = ", trying other disk"; 1166 cs->sc_pick = 1 - cs->sc_pick; 1167 cs->sc_blk[cs->sc_pick] = bp->b_blkno; 1168 } else { 1169 bp->b_flags |= B_ERROR; 1170 bp->b_error = cbp->cb_buf.b_error ? 1171 cbp->cb_buf.b_error : EIO; 1172 } 1173 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1174 unit, bp->b_error, cbp->cb_comp, 1175 (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg); 1176 } 1177 1178 /* 1179 * Process mirror. If we are writing, I/O has been initiated on both 1180 * buffers and we fall through only after both are finished. 1181 * 1182 * If we are reading only one I/O is initiated at a time. If an 1183 * error occurs we initiate the second I/O and return, otherwise 1184 * we free the second I/O without initiating it. 1185 */ 1186 1187 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1188 if (cbp->cb_buf.b_iocmd == BIO_WRITE) { 1189 /* 1190 * When writing, handshake with the second buffer 1191 * to determine when both are done. If both are not 1192 * done, return here. 1193 */ 1194 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1195 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1196 putccdbuf(cbp); 1197 splx(s); 1198 return; 1199 } 1200 } else { 1201 /* 1202 * When reading, either dispose of the second buffer 1203 * or initiate I/O on the second buffer if an error 1204 * occured with this one. 1205 */ 1206 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1207 if (cbp->cb_buf.b_flags & B_ERROR) { 1208 cbp->cb_mirror->cb_pflags |= 1209 CCDPF_MIRROR_DONE; 1210 BUF_STRATEGY(&cbp->cb_mirror->cb_buf); 1211 putccdbuf(cbp); 1212 splx(s); 1213 return; 1214 } else { 1215 putccdbuf(cbp->cb_mirror); 1216 /* fall through */ 1217 } 1218 } 1219 } 1220 } 1221 1222 /* 1223 * use b_bufsize to determine how big the original request was rather 1224 * then b_bcount, because b_bcount may have been truncated for EOF. 1225 * 1226 * XXX We check for an error, but we do not test the resid for an 1227 * aligned EOF condition. This may result in character & block 1228 * device access not recognizing EOF properly when read or written 1229 * sequentially, but will not effect filesystems. 1230 */ 1231 count = cbp->cb_buf.b_bufsize; 1232 putccdbuf(cbp); 1233 1234 /* 1235 * If all done, "interrupt". 1236 */ 1237 bp->b_resid -= count; 1238 if (bp->b_resid < 0) 1239 panic("ccdiodone: count"); 1240 if (bp->b_resid == 0) 1241 ccdintr(&ccd_softc[unit], bp); 1242 splx(s); 1243} 1244 1245static int 1246ccdioctl(dev, cmd, data, flag, p) 1247 dev_t dev; 1248 u_long cmd; 1249 caddr_t data; 1250 int flag; 1251 struct proc *p; 1252{ 1253 int unit = ccdunit(dev); 1254 int i, j, lookedup = 0, error = 0; 1255 int part, pmask, s; 1256 struct ccd_softc *cs; 1257 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1258 struct ccddevice ccd; 1259 char **cpp; 1260 struct vnode **vpp; 1261 1262 if (unit >= numccd) 1263 return (ENXIO); 1264 cs = &ccd_softc[unit]; 1265 1266 bzero(&ccd, sizeof(ccd)); 1267 1268 switch (cmd) { 1269 case CCDIOCSET: 1270 if (cs->sc_flags & CCDF_INITED) 1271 return (EBUSY); 1272 1273 if ((flag & FWRITE) == 0) 1274 return (EBADF); 1275 1276 if ((error = ccdlock(cs)) != 0) 1277 return (error); 1278 1279 /* Fill in some important bits. */ 1280 ccd.ccd_unit = unit; 1281 ccd.ccd_interleave = ccio->ccio_ileave; 1282 if (ccd.ccd_interleave == 0 && 1283 ((ccio->ccio_flags & CCDF_MIRROR) || 1284 (ccio->ccio_flags & CCDF_PARITY))) { 1285 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1286 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1287 } 1288 if ((ccio->ccio_flags & CCDF_MIRROR) && 1289 (ccio->ccio_flags & CCDF_PARITY)) { 1290 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1291 ccio->ccio_flags &= ~CCDF_PARITY; 1292 } 1293 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1294 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1295 printf("ccd%d: mirror/parity forces uniform flag\n", 1296 unit); 1297 ccio->ccio_flags |= CCDF_UNIFORM; 1298 } 1299 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1300 1301 /* 1302 * Allocate space for and copy in the array of 1303 * componet pathnames and device numbers. 1304 */ 1305 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1306 M_DEVBUF, M_WAITOK); 1307 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1308 M_DEVBUF, M_WAITOK); 1309 1310 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1311 ccio->ccio_ndisks * sizeof(char **)); 1312 if (error) { 1313 free(vpp, M_DEVBUF); 1314 free(cpp, M_DEVBUF); 1315 ccdunlock(cs); 1316 return (error); 1317 } 1318 1319#ifdef DEBUG 1320 if (ccddebug & CCDB_INIT) 1321 for (i = 0; i < ccio->ccio_ndisks; ++i) 1322 printf("ccdioctl: component %d: 0x%x\n", 1323 i, cpp[i]); 1324#endif 1325 1326 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1327#ifdef DEBUG 1328 if (ccddebug & CCDB_INIT) 1329 printf("ccdioctl: lookedup = %d\n", lookedup); 1330#endif 1331 if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) { 1332 for (j = 0; j < lookedup; ++j) 1333 (void)vn_close(vpp[j], FREAD|FWRITE, 1334 p->p_ucred, p); 1335 free(vpp, M_DEVBUF); 1336 free(cpp, M_DEVBUF); 1337 ccdunlock(cs); 1338 return (error); 1339 } 1340 ++lookedup; 1341 } 1342 ccd.ccd_cpp = cpp; 1343 ccd.ccd_vpp = vpp; 1344 ccd.ccd_ndev = ccio->ccio_ndisks; 1345 1346 /* 1347 * Initialize the ccd. Fills in the softc for us. 1348 */ 1349 if ((error = ccdinit(&ccd, cpp, p)) != 0) { 1350 for (j = 0; j < lookedup; ++j) 1351 (void)vn_close(vpp[j], FREAD|FWRITE, 1352 p->p_ucred, p); 1353 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1354 free(vpp, M_DEVBUF); 1355 free(cpp, M_DEVBUF); 1356 ccdunlock(cs); 1357 return (error); 1358 } 1359 1360 /* 1361 * The ccd has been successfully initialized, so 1362 * we can place it into the array and read the disklabel. 1363 */ 1364 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1365 ccio->ccio_unit = unit; 1366 ccio->ccio_size = cs->sc_size; 1367 ccdgetdisklabel(dev); 1368 1369 ccdunlock(cs); 1370 1371 break; 1372 1373 case CCDIOCCLR: 1374 if ((cs->sc_flags & CCDF_INITED) == 0) 1375 return (ENXIO); 1376 1377 if ((flag & FWRITE) == 0) 1378 return (EBADF); 1379 1380 if ((error = ccdlock(cs)) != 0) 1381 return (error); 1382 1383 /* Don't unconfigure if any other partitions are open */ 1384 part = ccdpart(dev); 1385 pmask = (1 << part); 1386 if ((cs->sc_openmask & ~pmask)) { 1387 ccdunlock(cs); 1388 return (EBUSY); 1389 } 1390 1391 /* 1392 * Free ccd_softc information and clear entry. 1393 */ 1394 1395 /* Close the components and free their pathnames. */ 1396 for (i = 0; i < cs->sc_nccdisks; ++i) { 1397 /* 1398 * XXX: this close could potentially fail and 1399 * cause Bad Things. Maybe we need to force 1400 * the close to happen? 1401 */ 1402#ifdef DEBUG 1403 if (ccddebug & CCDB_VNODE) 1404 vprint("CCDIOCCLR: vnode info", 1405 cs->sc_cinfo[i].ci_vp); 1406#endif 1407 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1408 p->p_ucred, p); 1409 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1410 } 1411 1412 /* Free interleave index. */ 1413 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1414 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1415 1416 /* Free component info and interleave table. */ 1417 free(cs->sc_cinfo, M_DEVBUF); 1418 free(cs->sc_itable, M_DEVBUF); 1419 cs->sc_flags &= ~CCDF_INITED; 1420 1421 /* 1422 * Free ccddevice information and clear entry. 1423 */ 1424 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1425 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1426 ccd.ccd_dk = -1; 1427 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1428 1429 /* 1430 * And remove the devstat entry. 1431 */ 1432 devstat_remove_entry(&cs->device_stats); 1433 1434 /* This must be atomic. */ 1435 s = splhigh(); 1436 ccdunlock(cs); 1437 bzero(cs, sizeof(struct ccd_softc)); 1438 splx(s); 1439 1440 break; 1441 1442 case DIOCGDINFO: 1443 if ((cs->sc_flags & CCDF_INITED) == 0) 1444 return (ENXIO); 1445 1446 *(struct disklabel *)data = cs->sc_label; 1447 break; 1448 1449 case DIOCGPART: 1450 if ((cs->sc_flags & CCDF_INITED) == 0) 1451 return (ENXIO); 1452 1453 ((struct partinfo *)data)->disklab = &cs->sc_label; 1454 ((struct partinfo *)data)->part = 1455 &cs->sc_label.d_partitions[ccdpart(dev)]; 1456 break; 1457 1458 case DIOCWDINFO: 1459 case DIOCSDINFO: 1460 if ((cs->sc_flags & CCDF_INITED) == 0) 1461 return (ENXIO); 1462 1463 if ((flag & FWRITE) == 0) 1464 return (EBADF); 1465 1466 if ((error = ccdlock(cs)) != 0) 1467 return (error); 1468 1469 cs->sc_flags |= CCDF_LABELLING; 1470 1471 error = setdisklabel(&cs->sc_label, 1472 (struct disklabel *)data, 0); 1473 if (error == 0) { 1474 if (cmd == DIOCWDINFO) 1475 error = writedisklabel(CCDLABELDEV(dev), 1476 &cs->sc_label); 1477 } 1478 1479 cs->sc_flags &= ~CCDF_LABELLING; 1480 1481 ccdunlock(cs); 1482 1483 if (error) 1484 return (error); 1485 break; 1486 1487 case DIOCWLABEL: 1488 if ((cs->sc_flags & CCDF_INITED) == 0) 1489 return (ENXIO); 1490 1491 if ((flag & FWRITE) == 0) 1492 return (EBADF); 1493 if (*(int *)data != 0) 1494 cs->sc_flags |= CCDF_WLABEL; 1495 else 1496 cs->sc_flags &= ~CCDF_WLABEL; 1497 break; 1498 1499 default: 1500 return (ENOTTY); 1501 } 1502 1503 return (0); 1504} 1505 1506static int 1507ccdsize(dev) 1508 dev_t dev; 1509{ 1510 struct ccd_softc *cs; 1511 int part, size; 1512 1513 if (ccdopen(dev, 0, S_IFCHR, curproc)) 1514 return (-1); 1515 1516 cs = &ccd_softc[ccdunit(dev)]; 1517 part = ccdpart(dev); 1518 1519 if ((cs->sc_flags & CCDF_INITED) == 0) 1520 return (-1); 1521 1522 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1523 size = -1; 1524 else 1525 size = cs->sc_label.d_partitions[part].p_size; 1526 1527 if (ccdclose(dev, 0, S_IFCHR, curproc)) 1528 return (-1); 1529 1530 return (size); 1531} 1532 1533static int 1534ccddump(dev) 1535 dev_t dev; 1536{ 1537 1538 /* Not implemented. */ 1539 return ENXIO; 1540} 1541 1542/* 1543 * Lookup the provided name in the filesystem. If the file exists, 1544 * is a valid block device, and isn't being used by anyone else, 1545 * set *vpp to the file's vnode. 1546 */ 1547static int 1548ccdlookup(path, p, vpp) 1549 char *path; 1550 struct proc *p; 1551 struct vnode **vpp; /* result */ 1552{ 1553 struct nameidata nd; 1554 struct vnode *vp; 1555 int error; 1556 1557 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p); 1558 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 1559#ifdef DEBUG 1560 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1561 printf("ccdlookup: vn_open error = %d\n", error); 1562#endif 1563 return (error); 1564 } 1565 vp = nd.ni_vp; 1566 1567 if (vp->v_usecount > 1) { 1568 error = EBUSY; 1569 goto bad; 1570 } 1571 1572 if (!vn_isdisk(vp, &error)) 1573 goto bad; 1574 1575#ifdef DEBUG 1576 if (ccddebug & CCDB_VNODE) 1577 vprint("ccdlookup: vnode info", vp); 1578#endif 1579 1580 VOP_UNLOCK(vp, 0, p); 1581 NDFREE(&nd, NDF_ONLY_PNBUF); 1582 *vpp = vp; 1583 return (0); 1584bad: 1585 VOP_UNLOCK(vp, 0, p); 1586 NDFREE(&nd, NDF_ONLY_PNBUF); 1587 /* vn_close does vrele() for vp */ 1588 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); 1589 return (error); 1590} 1591 1592/* 1593 * Read the disklabel from the ccd. If one is not present, fake one 1594 * up. 1595 */ 1596static void 1597ccdgetdisklabel(dev) 1598 dev_t dev; 1599{ 1600 int unit = ccdunit(dev); 1601 struct ccd_softc *cs = &ccd_softc[unit]; 1602 char *errstring; 1603 struct disklabel *lp = &cs->sc_label; 1604 struct ccdgeom *ccg = &cs->sc_geom; 1605 1606 bzero(lp, sizeof(*lp)); 1607 1608 lp->d_secperunit = cs->sc_size; 1609 lp->d_secsize = ccg->ccg_secsize; 1610 lp->d_nsectors = ccg->ccg_nsectors; 1611 lp->d_ntracks = ccg->ccg_ntracks; 1612 lp->d_ncylinders = ccg->ccg_ncylinders; 1613 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1614 1615 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1616 lp->d_type = DTYPE_CCD; 1617 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1618 lp->d_rpm = 3600; 1619 lp->d_interleave = 1; 1620 lp->d_flags = 0; 1621 1622 lp->d_partitions[RAW_PART].p_offset = 0; 1623 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1624 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1625 lp->d_npartitions = RAW_PART + 1; 1626 1627 lp->d_bbsize = BBSIZE; /* XXX */ 1628 lp->d_sbsize = SBSIZE; /* XXX */ 1629 1630 lp->d_magic = DISKMAGIC; 1631 lp->d_magic2 = DISKMAGIC; 1632 lp->d_checksum = dkcksum(&cs->sc_label); 1633 1634 /* 1635 * Call the generic disklabel extraction routine. 1636 */ 1637 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1638 if (errstring != NULL) 1639 ccdmakedisklabel(cs); 1640 1641#ifdef DEBUG 1642 /* It's actually extremely common to have unlabeled ccds. */ 1643 if (ccddebug & CCDB_LABEL) 1644 if (errstring != NULL) 1645 printf("ccd%d: %s\n", unit, errstring); 1646#endif 1647} 1648 1649/* 1650 * Take care of things one might want to take care of in the event 1651 * that a disklabel isn't present. 1652 */ 1653static void 1654ccdmakedisklabel(cs) 1655 struct ccd_softc *cs; 1656{ 1657 struct disklabel *lp = &cs->sc_label; 1658 1659 /* 1660 * For historical reasons, if there's no disklabel present 1661 * the raw partition must be marked FS_BSDFFS. 1662 */ 1663 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1664 1665 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1666} 1667 1668/* 1669 * Wait interruptibly for an exclusive lock. 1670 * 1671 * XXX 1672 * Several drivers do this; it should be abstracted and made MP-safe. 1673 */ 1674static int 1675ccdlock(cs) 1676 struct ccd_softc *cs; 1677{ 1678 int error; 1679 1680 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1681 cs->sc_flags |= CCDF_WANTED; 1682 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1683 return (error); 1684 } 1685 cs->sc_flags |= CCDF_LOCKED; 1686 return (0); 1687} 1688 1689/* 1690 * Unlock and wake up any waiters. 1691 */ 1692static void 1693ccdunlock(cs) 1694 struct ccd_softc *cs; 1695{ 1696 1697 cs->sc_flags &= ~CCDF_LOCKED; 1698 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1699 cs->sc_flags &= ~CCDF_WANTED; 1700 wakeup(cs); 1701 } 1702} 1703 1704#ifdef DEBUG 1705static void 1706printiinfo(ii) 1707 struct ccdiinfo *ii; 1708{ 1709 int ix, i; 1710 1711 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1712 printf(" itab[%d]: #dk %d sblk %d soff %d", 1713 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1714 for (i = 0; i < ii->ii_ndisk; i++) 1715 printf(" %d", ii->ii_index[i]); 1716 printf("\n"); 1717 } 1718} 1719#endif 1720 1721 1722/* Local Variables: */ 1723/* c-argdecl-indent: 8 */ 1724/* c-continued-statement-offset: 8 */ 1725/* c-indent-level: 8 */ 1726/* End: */ 1727