geom_ccd.c revision 109534
1/* 2 * Copyright (c) 2003 Poul-Henning Kamp. 3 * Copyright (c) 1995 Jason R. Thorpe. 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * All rights reserved. 7 * Copyright (c) 1988 University of Utah. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed for the NetBSD Project 24 * by Jason R. Thorpe. 25 * 4. The names of the authors may not be used to endorse or promote products 26 * derived from this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 29 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 30 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 31 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 35 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 36 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * Dynamic configuration and disklabel support by: 41 * Jason R. Thorpe <thorpej@nas.nasa.gov> 42 * Numerical Aerodynamic Simulation Facility 43 * Mail Stop 258-6 44 * NASA Ames Research Center 45 * Moffett Field, CA 94035 46 * 47 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 48 * 49 * @(#)cd.c 8.2 (Berkeley) 11/16/93 50 * 51 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 52 * 53 * $FreeBSD: head/sys/geom/geom_ccd.c 109534 2003-01-19 14:35:38Z phk $ 54 */ 55 56#include <sys/param.h> 57#include <sys/systm.h> 58#include <sys/kernel.h> 59#include <sys/module.h> 60#include <sys/proc.h> 61#include <sys/bio.h> 62#include <sys/malloc.h> 63#include <sys/namei.h> 64#include <sys/conf.h> 65#include <sys/stat.h> 66#include <sys/stdint.h> 67#include <sys/sysctl.h> 68#include <sys/disk.h> 69#include <sys/disklabel.h> 70#include <sys/devicestat.h> 71#include <sys/fcntl.h> 72#include <sys/vnode.h> 73 74#include <sys/ccdvar.h> 75 76MALLOC_DEFINE(M_CCD, "CCD driver", "Concatenated Disk driver"); 77 78static u_int 79ccdunit(dev_t dev) 80{ 81 return (((minor(dev) >> 16) & 0x1e0) | ((minor(dev) >> 3) & 0x1f)); 82} 83 84#define ccdpart(x) (minor(x) & 7) 85 86/* 87 This is how mirroring works (only writes are special): 88 89 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 90 linked together by the cb_mirror field. "cb_pflags & 91 CCDPF_MIRROR_DONE" is set to 0 on both of them. 92 93 When a component returns to ccdiodone(), it checks if "cb_pflags & 94 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 95 flag and returns. If it is, it means its partner has already 96 returned, so it will go to the regular cleanup. 97 98 */ 99 100struct ccdbuf { 101 struct bio cb_buf; /* new I/O buf */ 102 struct bio *cb_obp; /* ptr. to original I/O buf */ 103 struct ccdbuf *cb_freenext; /* free list link */ 104 int cb_unit; /* target unit */ 105 int cb_comp; /* target component */ 106 int cb_pflags; /* mirror/parity status flag */ 107 struct ccdbuf *cb_mirror; /* mirror counterpart */ 108}; 109 110/* bits in cb_pflags */ 111#define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 112 113#define CCDLABELDEV(dev) \ 114 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 115 116/* convinient macros for often-used statements */ 117#define IS_ALLOCATED(unit) (ccdfind(unit) != NULL) 118#define IS_INITED(cs) (((cs)->sc_flags & CCDF_INITED) != 0) 119 120 121static dev_t ccdctldev; 122 123 124static d_open_t ccdopen; 125static d_close_t ccdclose; 126static d_strategy_t ccdstrategy; 127static d_ioctl_t ccdioctl; 128static d_ioctl_t ccdioctltoo; 129static d_psize_t ccdsize; 130 131#define NCCDFREEHIWAT 16 132 133#define CDEV_MAJOR 74 134 135static struct cdevsw ccd_cdevsw = { 136 /* open */ ccdopen, 137 /* close */ ccdclose, 138 /* read */ physread, 139 /* write */ physwrite, 140 /* ioctl */ ccdioctl, 141 /* poll */ nopoll, 142 /* mmap */ nommap, 143 /* strategy */ ccdstrategy, 144 /* name */ "ccd", 145 /* maj */ CDEV_MAJOR, 146 /* dump */ nodump, 147 /* psize */ ccdsize, 148 /* flags */ D_DISK, 149}; 150static LIST_HEAD(, ccd_s) ccd_softc_list = LIST_HEAD_INITIALIZER(&ccd_softc_list); 151 152static struct ccd_s *ccdfind(int); 153static struct ccd_s *ccdnew(int); 154static int ccddestroy(struct ccd_s *, struct proc *); 155 156/* called during module initialization */ 157static void ccdattach(void); 158static int ccd_modevent(module_t, int, void *); 159 160/* called by biodone() at interrupt time */ 161static void ccdiodone(struct bio *bp); 162 163static void ccdstart(struct ccd_s *, struct bio *); 164static void ccdinterleave(struct ccd_s *, int); 165static int ccdinit(struct ccd_s *, char **, struct thread *); 166static int ccdlookup(char *, struct thread *p, struct vnode **); 167static void ccdbuffer(struct ccdbuf **ret, struct ccd_s *, 168 struct bio *, daddr_t, caddr_t, long); 169static void ccdgetdisklabel(dev_t); 170static void ccdmakedisklabel(struct ccd_s *); 171static int ccdlock(struct ccd_s *); 172static void ccdunlock(struct ccd_s *); 173 174 175/* 176 * Number of blocks to untouched in front of a component partition. 177 * This is to avoid violating its disklabel area when it starts at the 178 * beginning of the slice. 179 */ 180#if !defined(CCD_OFFSET) 181#define CCD_OFFSET 16 182#endif 183 184static struct ccd_s * 185ccdfind(int unit) 186{ 187 struct ccd_s *sc = NULL; 188 189 /* XXX: LOCK(unique unit numbers) */ 190 LIST_FOREACH(sc, &ccd_softc_list, list) { 191 if (sc->sc_unit == unit) 192 break; 193 } 194 /* XXX: UNLOCK(unique unit numbers) */ 195 return ((sc == NULL) || (sc->sc_unit != unit) ? NULL : sc); 196} 197 198static struct ccd_s * 199ccdnew(int unit) 200{ 201 struct ccd_s *sc; 202 203 /* XXX: LOCK(unique unit numbers) */ 204 if (IS_ALLOCATED(unit) || unit > DKMAXUNIT) 205 return (NULL); 206 207 MALLOC(sc, struct ccd_s *, sizeof(*sc), M_CCD, M_WAITOK | M_ZERO); 208 sc->sc_unit = unit; 209 LIST_INSERT_HEAD(&ccd_softc_list, sc, list); 210 /* XXX: UNLOCK(unique unit numbers) */ 211 return (sc); 212} 213 214static int 215ccddestroy(struct ccd_s *sc, struct proc *p) 216{ 217 218 /* XXX: LOCK(unique unit numbers) */ 219 LIST_REMOVE(sc, list); 220 /* XXX: UNLOCK(unique unit numbers) */ 221 FREE(sc, M_CCD); 222 return (0); 223} 224 225static void 226ccd_clone(void *arg, char *name, int namelen, dev_t *dev) 227{ 228 int i, u; 229 char *s; 230 231 if (*dev != NODEV) 232 return; 233 i = dev_stdclone(name, &s, "ccd", &u); 234 if (i != 2) 235 return; 236 if (*s < 'a' || *s > 'h') 237 return; 238 if (s[1] != '\0') 239 return; 240 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a', 241 UID_ROOT, GID_OPERATOR, 0640, name); 242} 243 244/* 245 * Called by main() during pseudo-device attachment. All we need 246 * to do is to add devsw entries. 247 */ 248static void 249ccdattach() 250{ 251 252 ccdctldev = make_dev(&ccd_cdevsw, 0xffff00ff, 253 UID_ROOT, GID_OPERATOR, 0640, "ccd.ctl"); 254 ccdctldev->si_drv1 = ccdctldev; 255 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000); 256} 257 258static int 259ccd_modevent(module_t mod, int type, void *data) 260{ 261 int error = 0; 262 263 switch (type) { 264 case MOD_LOAD: 265 ccdattach(); 266 break; 267 268 case MOD_UNLOAD: 269 printf("ccd0: Unload not supported!\n"); 270 error = EOPNOTSUPP; 271 break; 272 273 case MOD_SHUTDOWN: 274 break; 275 276 default: 277 error = EOPNOTSUPP; 278 } 279 return (error); 280} 281 282DEV_MODULE(ccd, ccd_modevent, NULL); 283 284static int 285ccdinit(struct ccd_s *cs, char **cpaths, struct thread *td) 286{ 287 struct ccdcinfo *ci = NULL; /* XXX */ 288 size_t size; 289 int ix; 290 struct vnode *vp; 291 size_t minsize; 292 int maxsecsize; 293 struct ccdgeom *ccg = &cs->sc_geom; 294 char *tmppath = NULL; 295 int error = 0; 296 off_t mediasize; 297 u_int sectorsize; 298 299 300 cs->sc_size = 0; 301 302 /* Allocate space for the component info. */ 303 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 304 M_CCD, M_WAITOK); 305 306 /* 307 * Verify that each component piece exists and record 308 * relevant information about it. 309 */ 310 maxsecsize = 0; 311 minsize = 0; 312 tmppath = malloc(MAXPATHLEN, M_CCD, M_WAITOK); 313 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 314 vp = cs->sc_vpp[ix]; 315 ci = &cs->sc_cinfo[ix]; 316 ci->ci_vp = vp; 317 318 /* 319 * Copy in the pathname of the component. 320 */ 321 if ((error = copyinstr(cpaths[ix], tmppath, 322 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 323 goto fail; 324 } 325 ci->ci_path = malloc(ci->ci_pathlen, M_CCD, M_WAITOK); 326 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 327 328 ci->ci_dev = vn_todev(vp); 329 330 /* 331 * Get partition information for the component. 332 */ 333 error = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize, 334 FREAD, td->td_ucred, td); 335 if (error != 0) { 336 goto fail; 337 } 338 /* 339 * Get partition information for the component. 340 */ 341 error = VOP_IOCTL(vp, DIOCGSECTORSIZE, (caddr_t)§orsize, 342 FREAD, td->td_ucred, td); 343 if (error != 0) { 344 goto fail; 345 } 346 if (sectorsize > maxsecsize) 347 maxsecsize = sectorsize; 348 size = mediasize / DEV_BSIZE - CCD_OFFSET; 349 350 /* 351 * Calculate the size, truncating to an interleave 352 * boundary if necessary. 353 */ 354 355 if (cs->sc_ileave > 1) 356 size -= size % cs->sc_ileave; 357 358 if (size == 0) { 359 error = ENODEV; 360 goto fail; 361 } 362 363 if (minsize == 0 || size < minsize) 364 minsize = size; 365 ci->ci_size = size; 366 cs->sc_size += size; 367 } 368 369 free(tmppath, M_CCD); 370 tmppath = NULL; 371 372 /* 373 * Don't allow the interleave to be smaller than 374 * the biggest component sector. 375 */ 376 if ((cs->sc_ileave > 0) && 377 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 378 error = EINVAL; 379 goto fail; 380 } 381 382 /* 383 * If uniform interleave is desired set all sizes to that of 384 * the smallest component. This will guarentee that a single 385 * interleave table is generated. 386 * 387 * Lost space must be taken into account when calculating the 388 * overall size. Half the space is lost when CCDF_MIRROR is 389 * specified. 390 */ 391 if (cs->sc_flags & CCDF_UNIFORM) { 392 for (ci = cs->sc_cinfo; 393 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 394 ci->ci_size = minsize; 395 } 396 if (cs->sc_flags & CCDF_MIRROR) { 397 /* 398 * Check to see if an even number of components 399 * have been specified. The interleave must also 400 * be non-zero in order for us to be able to 401 * guarentee the topology. 402 */ 403 if (cs->sc_nccdisks % 2) { 404 printf("ccd%d: mirroring requires an even number of disks\n", cs->sc_unit ); 405 error = EINVAL; 406 goto fail; 407 } 408 if (cs->sc_ileave == 0) { 409 printf("ccd%d: an interleave must be specified when mirroring\n", cs->sc_unit); 410 error = EINVAL; 411 goto fail; 412 } 413 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 414 } else { 415 if (cs->sc_ileave == 0) { 416 printf("ccd%d: an interleave must be specified when using parity\n", cs->sc_unit); 417 error = EINVAL; 418 goto fail; 419 } 420 cs->sc_size = cs->sc_nccdisks * minsize; 421 } 422 } 423 424 /* 425 * Construct the interleave table. 426 */ 427 ccdinterleave(cs, cs->sc_unit); 428 429 /* 430 * Create pseudo-geometry based on 1MB cylinders. It's 431 * pretty close. 432 */ 433 ccg->ccg_secsize = maxsecsize; 434 ccg->ccg_ntracks = 1; 435 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 436 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 437 438 /* 439 * Add a devstat entry for this device. 440 */ 441 devstat_add_entry(&cs->device_stats, "ccd", cs->sc_unit, 442 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 443 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 444 DEVSTAT_PRIORITY_ARRAY); 445 446 cs->sc_flags |= CCDF_INITED; 447 cs->sc_cflags = cs->sc_flags; /* So we can find out later... */ 448 return (0); 449fail: 450 while (ci > cs->sc_cinfo) { 451 ci--; 452 free(ci->ci_path, M_CCD); 453 } 454 if (tmppath != NULL) 455 free(tmppath, M_CCD); 456 free(cs->sc_cinfo, M_CCD); 457 return (error); 458} 459 460static void 461ccdinterleave(struct ccd_s *cs, int unit) 462{ 463 struct ccdcinfo *ci, *smallci; 464 struct ccdiinfo *ii; 465 daddr_t bn, lbn; 466 int ix; 467 u_long size; 468 469 470 /* 471 * Allocate an interleave table. The worst case occurs when each 472 * of N disks is of a different size, resulting in N interleave 473 * tables. 474 * 475 * Chances are this is too big, but we don't care. 476 */ 477 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 478 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_CCD, 479 M_WAITOK | M_ZERO); 480 481 /* 482 * Trivial case: no interleave (actually interleave of disk size). 483 * Each table entry represents a single component in its entirety. 484 * 485 * An interleave of 0 may not be used with a mirror setup. 486 */ 487 if (cs->sc_ileave == 0) { 488 bn = 0; 489 ii = cs->sc_itable; 490 491 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 492 /* Allocate space for ii_index. */ 493 ii->ii_index = malloc(sizeof(int), M_CCD, M_WAITOK); 494 ii->ii_ndisk = 1; 495 ii->ii_startblk = bn; 496 ii->ii_startoff = 0; 497 ii->ii_index[0] = ix; 498 bn += cs->sc_cinfo[ix].ci_size; 499 ii++; 500 } 501 ii->ii_ndisk = 0; 502 return; 503 } 504 505 /* 506 * The following isn't fast or pretty; it doesn't have to be. 507 */ 508 size = 0; 509 bn = lbn = 0; 510 for (ii = cs->sc_itable; ; ii++) { 511 /* 512 * Allocate space for ii_index. We might allocate more then 513 * we use. 514 */ 515 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 516 M_CCD, M_WAITOK); 517 518 /* 519 * Locate the smallest of the remaining components 520 */ 521 smallci = NULL; 522 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 523 ci++) { 524 if (ci->ci_size > size && 525 (smallci == NULL || 526 ci->ci_size < smallci->ci_size)) { 527 smallci = ci; 528 } 529 } 530 531 /* 532 * Nobody left, all done 533 */ 534 if (smallci == NULL) { 535 ii->ii_ndisk = 0; 536 free(ii->ii_index, M_CCD); 537 break; 538 } 539 540 /* 541 * Record starting logical block using an sc_ileave blocksize. 542 */ 543 ii->ii_startblk = bn / cs->sc_ileave; 544 545 /* 546 * Record starting comopnent block using an sc_ileave 547 * blocksize. This value is relative to the beginning of 548 * a component disk. 549 */ 550 ii->ii_startoff = lbn; 551 552 /* 553 * Determine how many disks take part in this interleave 554 * and record their indices. 555 */ 556 ix = 0; 557 for (ci = cs->sc_cinfo; 558 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 559 if (ci->ci_size >= smallci->ci_size) { 560 ii->ii_index[ix++] = ci - cs->sc_cinfo; 561 } 562 } 563 ii->ii_ndisk = ix; 564 bn += ix * (smallci->ci_size - size); 565 lbn = smallci->ci_size / cs->sc_ileave; 566 size = smallci->ci_size; 567 } 568} 569 570/* ARGSUSED */ 571static int 572ccdopen(dev_t dev, int flags, int fmt, struct thread *td) 573{ 574 int unit = ccdunit(dev); 575 struct ccd_s *cs; 576 struct disklabel *lp; 577 int error = 0, part, pmask; 578 579 if (dev->si_drv1 == dev) 580 return (0); 581 582 cs = IS_ALLOCATED(unit) ? ccdfind(unit) : ccdnew(unit); 583 584 if ((error = ccdlock(cs)) != 0) 585 return (error); 586 587 lp = &cs->sc_label; 588 589 part = ccdpart(dev); 590 pmask = (1 << part); 591 592 /* 593 * If we're initialized, check to see if there are any other 594 * open partitions. If not, then it's safe to update 595 * the in-core disklabel. 596 */ 597 if (IS_INITED(cs) && (cs->sc_openmask == 0)) 598 ccdgetdisklabel(dev); 599 600 /* Check that the partition exists. */ 601 if (part != RAW_PART && ((part >= lp->d_npartitions) || 602 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 603 error = ENXIO; 604 goto done; 605 } 606 607 cs->sc_openmask |= pmask; 608 done: 609 ccdunlock(cs); 610 return (0); 611} 612 613/* ARGSUSED */ 614static int 615ccdclose(dev_t dev, int flags, int fmt, struct thread *td) 616{ 617 int unit = ccdunit(dev); 618 struct ccd_s *cs; 619 int error = 0, part; 620 621 if (dev->si_drv1 == dev) 622 return (0); 623 624 if (!IS_ALLOCATED(unit)) 625 return (ENXIO); 626 cs = ccdfind(unit); 627 628 if ((error = ccdlock(cs)) != 0) 629 return (error); 630 631 part = ccdpart(dev); 632 633 /* ...that much closer to allowing unconfiguration... */ 634 cs->sc_openmask &= ~(1 << part); 635 /* collect "garbage" if possible */ 636 if (!IS_INITED(cs) && (cs->sc_flags & CCDF_WANTED) == 0) 637 ccddestroy(cs, td->td_proc); 638 else 639 ccdunlock(cs); 640 return (0); 641} 642 643static void 644ccdstrategy(struct bio *bp) 645{ 646 int unit = ccdunit(bp->bio_dev); 647 struct ccd_s *cs = ccdfind(unit); 648 int s; 649 int wlabel; 650 struct disklabel *lp; 651 652 if (bp->bio_dev->si_drv1 == bp->bio_dev) { 653 biofinish(bp, NULL, ENXIO); 654 return; 655 } 656 if (!IS_INITED(cs)) { 657 biofinish(bp, NULL, ENXIO); 658 return; 659 } 660 661 /* If it's a nil transfer, wake up the top half now. */ 662 if (bp->bio_bcount == 0) { 663 biodone(bp); 664 return; 665 } 666 667 lp = &cs->sc_label; 668 669 /* 670 * Do bounds checking and adjust transfer. If there's an 671 * error, the bounds check will flag that for us. 672 */ 673 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 674 if (ccdpart(bp->bio_dev) != RAW_PART) { 675 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 676 biodone(bp); 677 return; 678 } 679 } else { 680 int pbn; /* in sc_secsize chunks */ 681 long sz; /* in sc_secsize chunks */ 682 683 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 684 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 685 686 /* 687 * If out of bounds return an error. If at the EOF point, 688 * simply read or write less. 689 */ 690 691 if (pbn < 0 || pbn >= cs->sc_size) { 692 bp->bio_resid = bp->bio_bcount; 693 if (pbn != cs->sc_size) 694 biofinish(bp, NULL, EINVAL); 695 else 696 biodone(bp); 697 return; 698 } 699 700 /* 701 * If the request crosses EOF, truncate the request. 702 */ 703 if (pbn + sz > cs->sc_size) { 704 bp->bio_bcount = (cs->sc_size - pbn) * 705 cs->sc_geom.ccg_secsize; 706 } 707 } 708 709 bp->bio_resid = bp->bio_bcount; 710 711 /* 712 * "Start" the unit. 713 */ 714 s = splbio(); 715 ccdstart(cs, bp); 716 splx(s); 717 return; 718} 719 720static void 721ccdstart(struct ccd_s *cs, struct bio *bp) 722{ 723 long bcount, rcount; 724 struct ccdbuf *cbp[4]; 725 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 726 caddr_t addr; 727 daddr_t bn; 728 struct partition *pp; 729 730 731 /* Record the transaction start */ 732 devstat_start_transaction(&cs->device_stats); 733 734 /* 735 * Translate the partition-relative block number to an absolute. 736 */ 737 bn = bp->bio_blkno; 738 if (ccdpart(bp->bio_dev) != RAW_PART) { 739 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; 740 bn += pp->p_offset; 741 } 742 743 /* 744 * Allocate component buffers and fire off the requests 745 */ 746 addr = bp->bio_data; 747 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 748 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 749 rcount = cbp[0]->cb_buf.bio_bcount; 750 751 if (cs->sc_cflags & CCDF_MIRROR) { 752 /* 753 * Mirroring. Writes go to both disks, reads are 754 * taken from whichever disk seems most appropriate. 755 * 756 * We attempt to localize reads to the disk whos arm 757 * is nearest the read request. We ignore seeks due 758 * to writes when making this determination and we 759 * also try to avoid hogging. 760 */ 761 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 762 BIO_STRATEGY(&cbp[0]->cb_buf); 763 BIO_STRATEGY(&cbp[1]->cb_buf); 764 } else { 765 int pick = cs->sc_pick; 766 daddr_t range = cs->sc_size / 16; 767 768 if (bn < cs->sc_blk[pick] - range || 769 bn > cs->sc_blk[pick] + range 770 ) { 771 cs->sc_pick = pick = 1 - pick; 772 } 773 cs->sc_blk[pick] = bn + btodb(rcount); 774 BIO_STRATEGY(&cbp[pick]->cb_buf); 775 } 776 } else { 777 /* 778 * Not mirroring 779 */ 780 BIO_STRATEGY(&cbp[0]->cb_buf); 781 } 782 bn += btodb(rcount); 783 addr += rcount; 784 } 785} 786 787/* 788 * Build a component buffer header. 789 */ 790static void 791ccdbuffer(struct ccdbuf **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) 792{ 793 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 794 struct ccdbuf *cbp; 795 daddr_t cbn, cboff; 796 off_t cbc; 797 798 /* 799 * Determine which component bn falls in. 800 */ 801 cbn = bn; 802 cboff = 0; 803 804 if (cs->sc_ileave == 0) { 805 /* 806 * Serially concatenated and neither a mirror nor a parity 807 * config. This is a special case. 808 */ 809 daddr_t sblk; 810 811 sblk = 0; 812 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 813 sblk += ci->ci_size; 814 cbn -= sblk; 815 } else { 816 struct ccdiinfo *ii; 817 int ccdisk, off; 818 819 /* 820 * Calculate cbn, the logical superblock (sc_ileave chunks), 821 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 822 * to cbn. 823 */ 824 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 825 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 826 827 /* 828 * Figure out which interleave table to use. 829 */ 830 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 831 if (ii->ii_startblk > cbn) 832 break; 833 } 834 ii--; 835 836 /* 837 * off is the logical superblock relative to the beginning 838 * of this interleave block. 839 */ 840 off = cbn - ii->ii_startblk; 841 842 /* 843 * We must calculate which disk component to use (ccdisk), 844 * and recalculate cbn to be the superblock relative to 845 * the beginning of the component. This is typically done by 846 * adding 'off' and ii->ii_startoff together. However, 'off' 847 * must typically be divided by the number of components in 848 * this interleave array to be properly convert it from a 849 * CCD-relative logical superblock number to a 850 * component-relative superblock number. 851 */ 852 if (ii->ii_ndisk == 1) { 853 /* 854 * When we have just one disk, it can't be a mirror 855 * or a parity config. 856 */ 857 ccdisk = ii->ii_index[0]; 858 cbn = ii->ii_startoff + off; 859 } else { 860 if (cs->sc_cflags & CCDF_MIRROR) { 861 /* 862 * We have forced a uniform mapping, resulting 863 * in a single interleave array. We double 864 * up on the first half of the available 865 * components and our mirror is in the second 866 * half. This only works with a single 867 * interleave array because doubling up 868 * doubles the number of sectors, so there 869 * cannot be another interleave array because 870 * the next interleave array's calculations 871 * would be off. 872 */ 873 int ndisk2 = ii->ii_ndisk / 2; 874 ccdisk = ii->ii_index[off % ndisk2]; 875 cbn = ii->ii_startoff + off / ndisk2; 876 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 877 } else { 878 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 879 cbn = ii->ii_startoff + off / ii->ii_ndisk; 880 } 881 } 882 883 ci = &cs->sc_cinfo[ccdisk]; 884 885 /* 886 * Convert cbn from a superblock to a normal block so it 887 * can be used to calculate (along with cboff) the normal 888 * block index into this particular disk. 889 */ 890 cbn *= cs->sc_ileave; 891 } 892 893 /* 894 * Fill in the component buf structure. 895 */ 896 cbp = malloc(sizeof(struct ccdbuf), M_CCD, M_WAITOK | M_ZERO); 897 cbp->cb_buf.bio_cmd = bp->bio_cmd; 898 cbp->cb_buf.bio_done = ccdiodone; 899 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 900 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 901 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 902 cbp->cb_buf.bio_data = addr; 903 if (cs->sc_ileave == 0) 904 cbc = dbtob((off_t)(ci->ci_size - cbn)); 905 else 906 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 907 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 908 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 909 910 /* 911 * context for ccdiodone 912 */ 913 cbp->cb_obp = bp; 914 cbp->cb_unit = cs->sc_unit; 915 cbp->cb_comp = ci - cs->sc_cinfo; 916 917 cb[0] = cbp; 918 919 /* 920 * Note: both I/O's setup when reading from mirror, but only one 921 * will be executed. 922 */ 923 if (cs->sc_cflags & CCDF_MIRROR) { 924 /* mirror, setup second I/O */ 925 cbp = malloc(sizeof(struct ccdbuf), M_CCD, M_WAITOK); 926 bcopy(cb[0], cbp, sizeof(struct ccdbuf)); 927 cbp->cb_buf.bio_dev = ci2->ci_dev; 928 cbp->cb_comp = ci2 - cs->sc_cinfo; 929 cb[1] = cbp; 930 /* link together the ccdbuf's and clear "mirror done" flag */ 931 cb[0]->cb_mirror = cb[1]; 932 cb[1]->cb_mirror = cb[0]; 933 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 934 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 935 } 936} 937 938/* 939 * Called at interrupt time. 940 * Mark the component as done and if all components are done, 941 * take a ccd interrupt. 942 */ 943static void 944ccdiodone(struct bio *ibp) 945{ 946 struct ccdbuf *cbp = (struct ccdbuf *)ibp; 947 struct bio *bp = cbp->cb_obp; 948 int unit = cbp->cb_unit; 949 struct ccd_s *cs; 950 int count, s; 951 952 cs = ccdfind(unit); 953 s = splbio(); 954 /* 955 * If an error occured, report it. If this is a mirrored 956 * configuration and the first of two possible reads, do not 957 * set the error in the bp yet because the second read may 958 * succeed. 959 */ 960 961 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 962 const char *msg = ""; 963 964 if ((cs->sc_cflags & CCDF_MIRROR) && 965 (cbp->cb_buf.bio_cmd == BIO_READ) && 966 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 967 /* 968 * We will try our read on the other disk down 969 * below, also reverse the default pick so if we 970 * are doing a scan we do not keep hitting the 971 * bad disk first. 972 */ 973 974 msg = ", trying other disk"; 975 cs->sc_pick = 1 - cs->sc_pick; 976 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 977 } else { 978 bp->bio_flags |= BIO_ERROR; 979 bp->bio_error = cbp->cb_buf.bio_error ? 980 cbp->cb_buf.bio_error : EIO; 981 } 982 printf("ccd%d: error %d on component %d block %jd " 983 "(ccd block %jd)%s\n", unit, bp->bio_error, cbp->cb_comp, 984 (intmax_t)cbp->cb_buf.bio_blkno, (intmax_t)bp->bio_blkno, 985 msg); 986 } 987 988 /* 989 * Process mirror. If we are writing, I/O has been initiated on both 990 * buffers and we fall through only after both are finished. 991 * 992 * If we are reading only one I/O is initiated at a time. If an 993 * error occurs we initiate the second I/O and return, otherwise 994 * we free the second I/O without initiating it. 995 */ 996 997 if (cs->sc_cflags & CCDF_MIRROR) { 998 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 999 /* 1000 * When writing, handshake with the second buffer 1001 * to determine when both are done. If both are not 1002 * done, return here. 1003 */ 1004 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1005 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1006 free(cbp, M_CCD); 1007 splx(s); 1008 return; 1009 } 1010 } else { 1011 /* 1012 * When reading, either dispose of the second buffer 1013 * or initiate I/O on the second buffer if an error 1014 * occured with this one. 1015 */ 1016 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1017 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1018 cbp->cb_mirror->cb_pflags |= 1019 CCDPF_MIRROR_DONE; 1020 BIO_STRATEGY(&cbp->cb_mirror->cb_buf); 1021 free(cbp, M_CCD); 1022 splx(s); 1023 return; 1024 } else { 1025 free(cbp->cb_mirror, M_CCD); 1026 } 1027 } 1028 } 1029 } 1030 1031 /* 1032 * use bio_caller1 to determine how big the original request was rather 1033 * then bio_bcount, because bio_bcount may have been truncated for EOF. 1034 * 1035 * XXX We check for an error, but we do not test the resid for an 1036 * aligned EOF condition. This may result in character & block 1037 * device access not recognizing EOF properly when read or written 1038 * sequentially, but will not effect filesystems. 1039 */ 1040 count = (long)cbp->cb_buf.bio_caller1; 1041 free(cbp, M_CCD); 1042 1043 /* 1044 * If all done, "interrupt". 1045 */ 1046 bp->bio_resid -= count; 1047 if (bp->bio_resid < 0) 1048 panic("ccdiodone: count"); 1049 if (bp->bio_resid == 0) { 1050 if (bp->bio_flags & BIO_ERROR) 1051 bp->bio_resid = bp->bio_bcount; 1052 biofinish(bp, &cs->device_stats, 0); 1053 } 1054 splx(s); 1055} 1056 1057static int 1058ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td) 1059{ 1060 struct ccd_ioctl *ccio; 1061 u_int unit; 1062 dev_t dev2; 1063 int error; 1064 1065 if (dev->si_drv1 != dev) { 1066 switch (cmd) { 1067 case CCDIOCSET: 1068 case CCDIOCCLR: 1069 case CCDCONFINFO: 1070 case CCDCPPINFO: 1071 printf("*** WARNING: upgrade your ccdconfig(8) binary\n"); 1072 printf("*** WARNING: continuing in 30 seconds\n"); 1073 tsleep(dev, PRIBIO, "ccdbug", hz * 30); 1074 break; 1075 } 1076 return ccdioctltoo(dev, cmd, data, flag, td); 1077 } 1078 switch (cmd) { 1079 case CCDIOCSET: 1080 case CCDIOCCLR: 1081 ccio = (struct ccd_ioctl *)data; 1082 unit = ccio->ccio_size; 1083 dev2 = makedev(CDEV_MAJOR, unit * 8 + 2); 1084 if (!(dev2->si_flags & SI_NAMED)) { 1085 dev2 = make_dev(&ccd_cdevsw, unit * 8 + 2, 1086 UID_ROOT, GID_OPERATOR, 0640, "ccd%dc", unit); 1087 ccdnew(unit); 1088 } 1089 return (ccdioctltoo(dev2, cmd, data, flag, td)); 1090 case CCDCONFINFO: 1091 { 1092 int ninit = 0; 1093 struct ccdconf *conf = (struct ccdconf *)data; 1094 struct ccd_s *tmpcs; 1095 struct ccd_s *ubuf = conf->buffer; 1096 1097 /* XXX: LOCK(unique unit numbers) */ 1098 LIST_FOREACH(tmpcs, &ccd_softc_list, list) 1099 if (IS_INITED(tmpcs)) 1100 ninit++; 1101 1102 if (conf->size == 0) { 1103 conf->size = sizeof(struct ccd_s) * ninit; 1104 return (0); 1105 } else if ((conf->size / sizeof(struct ccd_s) != ninit) || 1106 (conf->size % sizeof(struct ccd_s) != 0)) { 1107 /* XXX: UNLOCK(unique unit numbers) */ 1108 return (EINVAL); 1109 } 1110 1111 ubuf += ninit; 1112 LIST_FOREACH(tmpcs, &ccd_softc_list, list) { 1113 if (!IS_INITED(tmpcs)) 1114 continue; 1115 error = copyout(tmpcs, --ubuf, 1116 sizeof(struct ccd_s)); 1117 if (error != 0) 1118 /* XXX: UNLOCK(unique unit numbers) */ 1119 return (error); 1120 } 1121 /* XXX: UNLOCK(unique unit numbers) */ 1122 return (0); 1123 } 1124 1125 case CCDCPPINFO: 1126 { 1127 struct ccdcpps *cpps = (struct ccdcpps *)data; 1128 char *ubuf = cpps->buffer; 1129 1130 1131 error = copyin(ubuf, &unit, sizeof (unit)); 1132 if (error) 1133 return (error); 1134 1135 if (!IS_ALLOCATED(unit)) 1136 return (ENXIO); 1137 dev2 = makedev(CDEV_MAJOR, unit * 8 + 2); 1138 return (ccdioctltoo(dev2, cmd, data, flag, td)); 1139 } 1140 1141 default: 1142 return (ENXIO); 1143 } 1144} 1145 1146static int 1147ccdioctltoo(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td) 1148{ 1149 int unit; 1150 int i, j, lookedup = 0, error = 0; 1151 int part, pmask, s; 1152 struct ccd_s *cs; 1153 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1154 char **cpp; 1155 struct vnode **vpp; 1156 1157 unit = ccdunit(dev); 1158 if (!IS_ALLOCATED(unit)) 1159 return (ENXIO); 1160 cs = ccdfind(unit); 1161 1162 switch (cmd) { 1163 case CCDIOCSET: 1164 if (IS_INITED(cs)) 1165 return (EBUSY); 1166 1167 if ((flag & FWRITE) == 0) 1168 return (EBADF); 1169 1170 if ((error = ccdlock(cs)) != 0) 1171 return (error); 1172 1173 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1174 return (EINVAL); 1175 1176 /* Fill in some important bits. */ 1177 cs->sc_ileave = ccio->ccio_ileave; 1178 if (cs->sc_ileave == 0 && (ccio->ccio_flags & CCDF_MIRROR)) { 1179 printf("ccd%d: disabling mirror, interleave is 0\n", 1180 unit); 1181 ccio->ccio_flags &= ~(CCDF_MIRROR); 1182 } 1183 if ((ccio->ccio_flags & CCDF_MIRROR) && 1184 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1185 printf("ccd%d: mirror/parity forces uniform flag\n", 1186 unit); 1187 ccio->ccio_flags |= CCDF_UNIFORM; 1188 } 1189 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1190 1191 /* 1192 * Allocate space for and copy in the array of 1193 * componet pathnames and device numbers. 1194 */ 1195 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1196 M_CCD, M_WAITOK); 1197 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1198 M_CCD, M_WAITOK); 1199 1200 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1201 ccio->ccio_ndisks * sizeof(char **)); 1202 if (error) { 1203 free(vpp, M_CCD); 1204 free(cpp, M_CCD); 1205 ccdunlock(cs); 1206 return (error); 1207 } 1208 1209 1210 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1211 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1212 for (j = 0; j < lookedup; ++j) 1213 (void)vn_close(vpp[j], FREAD|FWRITE, 1214 td->td_ucred, td); 1215 free(vpp, M_CCD); 1216 free(cpp, M_CCD); 1217 ccdunlock(cs); 1218 return (error); 1219 } 1220 ++lookedup; 1221 } 1222 cs->sc_vpp = vpp; 1223 cs->sc_nccdisks = ccio->ccio_ndisks; 1224 1225 /* 1226 * Initialize the ccd. Fills in the softc for us. 1227 */ 1228 if ((error = ccdinit(cs, cpp, td)) != 0) { 1229 for (j = 0; j < lookedup; ++j) 1230 (void)vn_close(vpp[j], FREAD|FWRITE, 1231 td->td_ucred, td); 1232 /* 1233 * We can't ccddestroy() cs just yet, because nothing 1234 * prevents user-level app to do another ioctl() 1235 * without closing the device first, therefore 1236 * declare unit null and void and let ccdclose() 1237 * destroy it when it is safe to do so. 1238 */ 1239 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1240 free(vpp, M_CCD); 1241 free(cpp, M_CCD); 1242 ccdunlock(cs); 1243 return (error); 1244 } 1245 free(cpp, M_CCD); 1246 1247 /* 1248 * The ccd has been successfully initialized, so 1249 * we can place it into the array and read the disklabel. 1250 */ 1251 ccio->ccio_unit = unit; 1252 ccio->ccio_size = cs->sc_size; 1253 ccdgetdisklabel(dev); 1254 1255 ccdunlock(cs); 1256 1257 break; 1258 1259 case CCDIOCCLR: 1260 if (!IS_INITED(cs)) 1261 return (ENXIO); 1262 1263 if ((flag & FWRITE) == 0) 1264 return (EBADF); 1265 1266 if ((error = ccdlock(cs)) != 0) 1267 return (error); 1268 1269 /* Don't unconfigure if any other partitions are open */ 1270 part = ccdpart(dev); 1271 pmask = (1 << part); 1272 if ((cs->sc_openmask & ~pmask)) { 1273 ccdunlock(cs); 1274 return (EBUSY); 1275 } 1276 1277 /* Declare unit null and void (reset all flags) */ 1278 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1279 1280 /* Close the components and free their pathnames. */ 1281 for (i = 0; i < cs->sc_nccdisks; ++i) { 1282 /* 1283 * XXX: this close could potentially fail and 1284 * cause Bad Things. Maybe we need to force 1285 * the close to happen? 1286 */ 1287 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1288 td->td_ucred, td); 1289 free(cs->sc_cinfo[i].ci_path, M_CCD); 1290 } 1291 1292 /* Free interleave index. */ 1293 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1294 free(cs->sc_itable[i].ii_index, M_CCD); 1295 1296 /* Free component info and interleave table. */ 1297 free(cs->sc_cinfo, M_CCD); 1298 free(cs->sc_itable, M_CCD); 1299 free(cs->sc_vpp, M_CCD); 1300 1301 /* And remove the devstat entry. */ 1302 devstat_remove_entry(&cs->device_stats); 1303 1304 /* This must be atomic. */ 1305 s = splhigh(); 1306 ccdunlock(cs); 1307 splx(s); 1308 1309 break; 1310 1311 case CCDCONFINFO: 1312 { 1313 int ninit = 0; 1314 struct ccdconf *conf = (struct ccdconf *)data; 1315 struct ccd_s *tmpcs; 1316 struct ccd_s *ubuf = conf->buffer; 1317 1318 /* XXX: LOCK(unique unit numbers) */ 1319 LIST_FOREACH(tmpcs, &ccd_softc_list, list) 1320 if (IS_INITED(tmpcs)) 1321 ninit++; 1322 1323 if (conf->size == 0) { 1324 conf->size = sizeof(struct ccd_s) * ninit; 1325 break; 1326 } else if ((conf->size / sizeof(struct ccd_s) != ninit) || 1327 (conf->size % sizeof(struct ccd_s) != 0)) { 1328 /* XXX: UNLOCK(unique unit numbers) */ 1329 return (EINVAL); 1330 } 1331 1332 ubuf += ninit; 1333 LIST_FOREACH(tmpcs, &ccd_softc_list, list) { 1334 if (!IS_INITED(tmpcs)) 1335 continue; 1336 error = copyout(tmpcs, --ubuf, 1337 sizeof(struct ccd_s)); 1338 if (error != 0) 1339 /* XXX: UNLOCK(unique unit numbers) */ 1340 return (error); 1341 } 1342 /* XXX: UNLOCK(unique unit numbers) */ 1343 } 1344 break; 1345 1346 case CCDCPPINFO: 1347 if (!IS_INITED(cs)) 1348 return (ENXIO); 1349 1350 { 1351 int len = 0; 1352 struct ccdcpps *cpps = (struct ccdcpps *)data; 1353 char *ubuf = cpps->buffer; 1354 1355 1356 for (i = 0; i < cs->sc_nccdisks; ++i) 1357 len += cs->sc_cinfo[i].ci_pathlen; 1358 1359 if (cpps->size == 0) { 1360 cpps->size = len; 1361 break; 1362 } else if (cpps->size < len) { 1363 return (ENOMEM); 1364 } 1365 1366 for (i = 0; i < cs->sc_nccdisks; ++i) { 1367 len = cs->sc_cinfo[i].ci_pathlen; 1368 error = copyout(cs->sc_cinfo[i].ci_path, ubuf, 1369 len); 1370 if (error != 0) 1371 return (error); 1372 ubuf += len; 1373 } 1374 return(copyout("", ubuf, 1)); 1375 } 1376 break; 1377 1378 case DIOCGDINFO: 1379 if (!IS_INITED(cs)) 1380 return (ENXIO); 1381 1382 *(struct disklabel *)data = cs->sc_label; 1383 break; 1384 1385 case DIOCWDINFO: 1386 case DIOCSDINFO: 1387 if (!IS_INITED(cs)) 1388 return (ENXIO); 1389 1390 if ((flag & FWRITE) == 0) 1391 return (EBADF); 1392 1393 if ((error = ccdlock(cs)) != 0) 1394 return (error); 1395 1396 cs->sc_flags |= CCDF_LABELLING; 1397 1398 error = setdisklabel(&cs->sc_label, 1399 (struct disklabel *)data, 0); 1400 if (error == 0) { 1401 if (cmd == DIOCWDINFO) 1402 error = writedisklabel(CCDLABELDEV(dev), 1403 &cs->sc_label); 1404 } 1405 1406 cs->sc_flags &= ~CCDF_LABELLING; 1407 1408 ccdunlock(cs); 1409 1410 if (error) 1411 return (error); 1412 break; 1413 1414 case DIOCWLABEL: 1415 if (!IS_INITED(cs)) 1416 return (ENXIO); 1417 1418 if ((flag & FWRITE) == 0) 1419 return (EBADF); 1420 if (*(int *)data != 0) 1421 cs->sc_flags |= CCDF_WLABEL; 1422 else 1423 cs->sc_flags &= ~CCDF_WLABEL; 1424 break; 1425 1426 default: 1427 return (ENOTTY); 1428 } 1429 1430 return (0); 1431} 1432 1433static int 1434ccdsize(dev_t dev) 1435{ 1436 struct ccd_s *cs; 1437 int part, size; 1438 1439 if (dev->si_drv1 == dev) 1440 return (-1); 1441 1442 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1443 return (-1); 1444 1445 cs = ccdfind(ccdunit(dev)); 1446 part = ccdpart(dev); 1447 1448 if (!IS_INITED(cs)) 1449 return (-1); 1450 1451 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1452 size = -1; 1453 else 1454 size = cs->sc_label.d_partitions[part].p_size; 1455 1456 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1457 return (-1); 1458 1459 return (size); 1460} 1461 1462/* 1463 * Lookup the provided name in the filesystem. If the file exists, 1464 * is a valid block device, and isn't being used by anyone else, 1465 * set *vpp to the file's vnode. 1466 */ 1467static int 1468ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1469{ 1470 struct nameidata nd; 1471 struct vnode *vp; 1472 int error, flags; 1473 1474 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, td); 1475 flags = FREAD | FWRITE; 1476 if ((error = vn_open(&nd, &flags, 0)) != 0) { 1477 return (error); 1478 } 1479 vp = nd.ni_vp; 1480 1481 if (vrefcnt(vp) > 1) { 1482 error = EBUSY; 1483 goto bad; 1484 } 1485 1486 if (!vn_isdisk(vp, &error)) 1487 goto bad; 1488 1489 1490 VOP_UNLOCK(vp, 0, td); 1491 NDFREE(&nd, NDF_ONLY_PNBUF); 1492 *vpp = vp; 1493 return (0); 1494bad: 1495 VOP_UNLOCK(vp, 0, td); 1496 NDFREE(&nd, NDF_ONLY_PNBUF); 1497 /* vn_close does vrele() for vp */ 1498 (void)vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 1499 return (error); 1500} 1501 1502/* 1503 * Read the disklabel from the ccd. If one is not present, fake one 1504 * up. 1505 */ 1506static void 1507ccdgetdisklabel(dev_t dev) 1508{ 1509 int unit = ccdunit(dev); 1510 struct ccd_s *cs = ccdfind(unit); 1511 char *errstring; 1512 struct disklabel *lp = &cs->sc_label; 1513 struct ccdgeom *ccg = &cs->sc_geom; 1514 1515 bzero(lp, sizeof(*lp)); 1516 1517 lp->d_secperunit = cs->sc_size; 1518 lp->d_secsize = ccg->ccg_secsize; 1519 lp->d_nsectors = ccg->ccg_nsectors; 1520 lp->d_ntracks = ccg->ccg_ntracks; 1521 lp->d_ncylinders = ccg->ccg_ncylinders; 1522 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1523 1524 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1525 lp->d_type = DTYPE_CCD; 1526 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1527 lp->d_rpm = 3600; 1528 lp->d_interleave = 1; 1529 lp->d_flags = 0; 1530 1531 lp->d_partitions[RAW_PART].p_offset = 0; 1532 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1533 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1534 lp->d_npartitions = RAW_PART + 1; 1535 1536 lp->d_bbsize = BBSIZE; /* XXX */ 1537 lp->d_sbsize = 0; 1538 1539 lp->d_magic = DISKMAGIC; 1540 lp->d_magic2 = DISKMAGIC; 1541 lp->d_checksum = dkcksum(&cs->sc_label); 1542 1543 /* 1544 * Call the generic disklabel extraction routine. 1545 */ 1546 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1547 if (errstring != NULL) 1548 ccdmakedisklabel(cs); 1549 1550} 1551 1552/* 1553 * Take care of things one might want to take care of in the event 1554 * that a disklabel isn't present. 1555 */ 1556static void 1557ccdmakedisklabel(struct ccd_s *cs) 1558{ 1559 struct disklabel *lp = &cs->sc_label; 1560 1561 /* 1562 * For historical reasons, if there's no disklabel present 1563 * the raw partition must be marked FS_BSDFFS. 1564 */ 1565 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1566 1567 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1568} 1569 1570/* 1571 * Wait interruptibly for an exclusive lock. 1572 * 1573 * XXX 1574 * Several drivers do this; it should be abstracted and made MP-safe. 1575 */ 1576static int 1577ccdlock(struct ccd_s *cs) 1578{ 1579 int error; 1580 1581 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1582 cs->sc_flags |= CCDF_WANTED; 1583 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1584 return (error); 1585 } 1586 cs->sc_flags |= CCDF_LOCKED; 1587 return (0); 1588} 1589 1590/* 1591 * Unlock and wake up any waiters. 1592 */ 1593static void 1594ccdunlock(struct ccd_s *cs) 1595{ 1596 1597 cs->sc_flags &= ~CCDF_LOCKED; 1598 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1599 cs->sc_flags &= ~CCDF_WANTED; 1600 wakeup(cs); 1601 } 1602} 1603 1604