131}; 132 133static LIST_HEAD(, ccd_s) ccd_softc_list = 134 LIST_HEAD_INITIALIZER(&ccd_softc_list); 135 136static struct ccd_s *ccdfind(int); 137static struct ccd_s *ccdnew(int); 138static int ccddestroy(struct ccd_s *); 139 140/* called during module initialization */ 141static void ccdattach(void); 142static int ccd_modevent(module_t, int, void *); 143 144/* called by biodone() at interrupt time */ 145static void ccdiodone(struct bio *bp); 146 147static void ccdstart(struct ccd_s *, struct bio *); 148static void ccdinterleave(struct ccd_s *, int); 149static int ccdinit(struct ccd_s *, char **, struct thread *); 150static int ccdlookup(char *, struct thread *p, struct vnode **); 151static int ccdbuffer(struct ccdbuf **ret, struct ccd_s *, 152 struct bio *, daddr_t, caddr_t, long); 153static int ccdlock(struct ccd_s *); 154static void ccdunlock(struct ccd_s *); 155 156 157/* 158 * Number of blocks to untouched in front of a component partition. 159 * This is to avoid violating its disklabel area when it starts at the 160 * beginning of the slice. 161 */ 162#if !defined(CCD_OFFSET) 163#define CCD_OFFSET 16 164#endif 165 166static struct ccd_s * 167ccdfind(int unit) 168{ 169 struct ccd_s *sc = NULL; 170 171 /* XXX: LOCK(unique unit numbers) */ 172 LIST_FOREACH(sc, &ccd_softc_list, list) { 173 if (sc->sc_unit == unit) 174 break; 175 } 176 /* XXX: UNLOCK(unique unit numbers) */ 177 return ((sc == NULL) || (sc->sc_unit != unit) ? NULL : sc); 178} 179 180static struct ccd_s * 181ccdnew(int unit) 182{ 183 struct ccd_s *sc; 184 185 /* XXX: LOCK(unique unit numbers) */ 186 if (IS_ALLOCATED(unit) || unit > 32) 187 return (NULL); 188 189 MALLOC(sc, struct ccd_s *, sizeof(*sc), M_CCD, M_WAITOK | M_ZERO); 190 sc->sc_unit = unit; 191 LIST_INSERT_HEAD(&ccd_softc_list, sc, list); 192 /* XXX: UNLOCK(unique unit numbers) */ 193 return (sc); 194} 195 196static int 197ccddestroy(struct ccd_s *sc) 198{ 199 200 /* XXX: LOCK(unique unit numbers) */ 201 LIST_REMOVE(sc, list); 202 /* XXX: UNLOCK(unique unit numbers) */ 203 FREE(sc, M_CCD); 204 return (0); 205} 206 207/* 208 * Called by main() during pseudo-device attachment. All we need 209 * to do is to add devsw entries. 210 */ 211static void 212ccdattach() 213{ 214 215 ccdctldev = make_dev(&ccdctl_cdevsw, 0xffff00ff, 216 UID_ROOT, GID_OPERATOR, 0640, "ccd.ctl"); 217 ccdctldev->si_drv1 = ccdctldev; 218} 219 220static int 221ccd_modevent(module_t mod, int type, void *data) 222{ 223 int error = 0; 224 225 switch (type) { 226 case MOD_LOAD: 227 ccdattach(); 228 break; 229 230 case MOD_UNLOAD: 231 printf("ccd0: Unload not supported!\n"); 232 error = EOPNOTSUPP; 233 break; 234 235 case MOD_SHUTDOWN: 236 break; 237 238 default: 239 error = EOPNOTSUPP; 240 } 241 return (error); 242} 243 244DEV_MODULE(ccd, ccd_modevent, NULL); 245 246static int 247ccdinit(struct ccd_s *cs, char **cpaths, struct thread *td) 248{ 249 struct ccdcinfo *ci = NULL; /* XXX */ 250 size_t size; 251 int ix; 252 struct vnode *vp; 253 size_t minsize; 254 int maxsecsize; 255 struct ccdgeom *ccg = &cs->sc_geom; 256 char *tmppath = NULL; 257 int error = 0; 258 off_t mediasize; 259 u_int sectorsize; 260 261 262 cs->sc_size = 0; 263 264 /* Allocate space for the component info. */ 265 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 266 M_CCD, M_WAITOK); 267 268 /* 269 * Verify that each component piece exists and record 270 * relevant information about it. 271 */ 272 maxsecsize = 0; 273 minsize = 0; 274 tmppath = malloc(MAXPATHLEN, M_CCD, M_WAITOK); 275 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 276 vp = cs->sc_vpp[ix]; 277 ci = &cs->sc_cinfo[ix]; 278 ci->ci_vp = vp; 279 280 /* 281 * Copy in the pathname of the component. 282 */ 283 if ((error = copyinstr(cpaths[ix], tmppath, 284 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 285 goto fail; 286 } 287 ci->ci_path = malloc(ci->ci_pathlen, M_CCD, M_WAITOK); 288 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 289 290 ci->ci_dev = vn_todev(vp); 291 292 /* 293 * Get partition information for the component. 294 */ 295 error = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize, 296 FREAD, td->td_ucred, td); 297 if (error != 0) { 298 goto fail; 299 } 300 /* 301 * Get partition information for the component. 302 */ 303 error = VOP_IOCTL(vp, DIOCGSECTORSIZE, (caddr_t)§orsize, 304 FREAD, td->td_ucred, td); 305 if (error != 0) { 306 goto fail; 307 } 308 if (sectorsize > maxsecsize) 309 maxsecsize = sectorsize; 310 size = mediasize / DEV_BSIZE - CCD_OFFSET; 311 312 /* 313 * Calculate the size, truncating to an interleave 314 * boundary if necessary. 315 */ 316 317 if (cs->sc_ileave > 1) 318 size -= size % cs->sc_ileave; 319 320 if (size == 0) { 321 error = ENODEV; 322 goto fail; 323 } 324 325 if (minsize == 0 || size < minsize) 326 minsize = size; 327 ci->ci_size = size; 328 cs->sc_size += size; 329 } 330 331 free(tmppath, M_CCD); 332 tmppath = NULL; 333 334 /* 335 * Don't allow the interleave to be smaller than 336 * the biggest component sector. 337 */ 338 if ((cs->sc_ileave > 0) && 339 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 340 error = EINVAL; 341 goto fail; 342 } 343 344 /* 345 * If uniform interleave is desired set all sizes to that of 346 * the smallest component. This will guarentee that a single 347 * interleave table is generated. 348 * 349 * Lost space must be taken into account when calculating the 350 * overall size. Half the space is lost when CCDF_MIRROR is 351 * specified. 352 */ 353 if (cs->sc_flags & CCDF_UNIFORM) { 354 for (ci = cs->sc_cinfo; 355 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 356 ci->ci_size = minsize; 357 } 358 if (cs->sc_flags & CCDF_MIRROR) { 359 /* 360 * Check to see if an even number of components 361 * have been specified. The interleave must also 362 * be non-zero in order for us to be able to 363 * guarentee the topology. 364 */ 365 if (cs->sc_nccdisks % 2) { 366 printf("ccd%d: mirroring requires an even number of disks\n", cs->sc_unit ); 367 error = EINVAL; 368 goto fail; 369 } 370 if (cs->sc_ileave == 0) { 371 printf("ccd%d: an interleave must be specified when mirroring\n", cs->sc_unit); 372 error = EINVAL; 373 goto fail; 374 } 375 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 376 } else { 377 if (cs->sc_ileave == 0) { 378 printf("ccd%d: an interleave must be specified when using parity\n", cs->sc_unit); 379 error = EINVAL; 380 goto fail; 381 } 382 cs->sc_size = cs->sc_nccdisks * minsize; 383 } 384 } 385 386 /* 387 * Construct the interleave table. 388 */ 389 ccdinterleave(cs, cs->sc_unit); 390 391 /* 392 * Create pseudo-geometry based on 1MB cylinders. It's 393 * pretty close. 394 */ 395 ccg->ccg_secsize = maxsecsize; 396 ccg->ccg_ntracks = 1; 397 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 398 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 399 400 /* 401 * Add a devstat entry for this device. 402 */ 403 devstat_add_entry(&cs->device_stats, "ccd", cs->sc_unit, 404 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 405 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 406 DEVSTAT_PRIORITY_ARRAY); 407 408 cs->sc_flags |= CCDF_INITED; 409 cs->sc_cflags = cs->sc_flags; /* So we can find out later... */ 410 return (0); 411fail: 412 while (ci > cs->sc_cinfo) { 413 ci--; 414 free(ci->ci_path, M_CCD); 415 } 416 if (tmppath != NULL) 417 free(tmppath, M_CCD); 418 free(cs->sc_cinfo, M_CCD); 419 ccddestroy(cs); 420 return (error); 421} 422 423static void 424ccdinterleave(struct ccd_s *cs, int unit) 425{ 426 struct ccdcinfo *ci, *smallci; 427 struct ccdiinfo *ii; 428 daddr_t bn, lbn; 429 int ix; 430 u_long size; 431 432 433 /* 434 * Allocate an interleave table. The worst case occurs when each 435 * of N disks is of a different size, resulting in N interleave 436 * tables. 437 * 438 * Chances are this is too big, but we don't care. 439 */ 440 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 441 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_CCD, 442 M_WAITOK | M_ZERO); 443 444 /* 445 * Trivial case: no interleave (actually interleave of disk size). 446 * Each table entry represents a single component in its entirety. 447 * 448 * An interleave of 0 may not be used with a mirror setup. 449 */ 450 if (cs->sc_ileave == 0) { 451 bn = 0; 452 ii = cs->sc_itable; 453 454 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 455 /* Allocate space for ii_index. */ 456 ii->ii_index = malloc(sizeof(int), M_CCD, M_WAITOK); 457 ii->ii_ndisk = 1; 458 ii->ii_startblk = bn; 459 ii->ii_startoff = 0; 460 ii->ii_index[0] = ix; 461 bn += cs->sc_cinfo[ix].ci_size; 462 ii++; 463 } 464 ii->ii_ndisk = 0; 465 return; 466 } 467 468 /* 469 * The following isn't fast or pretty; it doesn't have to be. 470 */ 471 size = 0; 472 bn = lbn = 0; 473 for (ii = cs->sc_itable; ; ii++) { 474 /* 475 * Allocate space for ii_index. We might allocate more then 476 * we use. 477 */ 478 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 479 M_CCD, M_WAITOK); 480 481 /* 482 * Locate the smallest of the remaining components 483 */ 484 smallci = NULL; 485 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 486 ci++) { 487 if (ci->ci_size > size && 488 (smallci == NULL || 489 ci->ci_size < smallci->ci_size)) { 490 smallci = ci; 491 } 492 } 493 494 /* 495 * Nobody left, all done 496 */ 497 if (smallci == NULL) { 498 ii->ii_ndisk = 0; 499 free(ii->ii_index, M_CCD); 500 break; 501 } 502 503 /* 504 * Record starting logical block using an sc_ileave blocksize. 505 */ 506 ii->ii_startblk = bn / cs->sc_ileave; 507 508 /* 509 * Record starting comopnent block using an sc_ileave 510 * blocksize. This value is relative to the beginning of 511 * a component disk. 512 */ 513 ii->ii_startoff = lbn; 514 515 /* 516 * Determine how many disks take part in this interleave 517 * and record their indices. 518 */ 519 ix = 0; 520 for (ci = cs->sc_cinfo; 521 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 522 if (ci->ci_size >= smallci->ci_size) { 523 ii->ii_index[ix++] = ci - cs->sc_cinfo; 524 } 525 } 526 ii->ii_ndisk = ix; 527 bn += ix * (smallci->ci_size - size); 528 lbn = smallci->ci_size / cs->sc_ileave; 529 size = smallci->ci_size; 530 } 531} 532 533static void 534ccdstrategy(struct bio *bp) 535{ 536 struct ccd_s *cs; 537 int pbn; /* in sc_secsize chunks */ 538 long sz; /* in sc_secsize chunks */ 539 540 cs = bp->bio_disk->d_drv1; 541 542 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 543 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 544 545 /* 546 * If out of bounds return an error. If at the EOF point, 547 * simply read or write less. 548 */ 549 550 if (pbn < 0 || pbn >= cs->sc_size) { 551 bp->bio_resid = bp->bio_bcount; 552 if (pbn != cs->sc_size) 553 biofinish(bp, NULL, EINVAL); 554 else 555 biodone(bp); 556 return; 557 } 558 559 /* 560 * If the request crosses EOF, truncate the request. 561 */ 562 if (pbn + sz > cs->sc_size) { 563 bp->bio_bcount = (cs->sc_size - pbn) * 564 cs->sc_geom.ccg_secsize; 565 } 566 567 bp->bio_resid = bp->bio_bcount; 568 569 /* 570 * "Start" the unit. 571 */ 572 ccdstart(cs, bp); 573 return; 574} 575 576static void 577ccdstart(struct ccd_s *cs, struct bio *bp) 578{ 579 long bcount, rcount; 580 struct ccdbuf *cbp[2]; 581 caddr_t addr; 582 daddr_t bn; 583 int err; 584 585 586 /* Record the transaction start */ 587 devstat_start_transaction(&cs->device_stats); 588 589 /* 590 * Translate the partition-relative block number to an absolute. 591 */ 592 bn = bp->bio_blkno; 593 594 /* 595 * Allocate component buffers and fire off the requests 596 */ 597 addr = bp->bio_data; 598 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 599 err = ccdbuffer(cbp, cs, bp, bn, addr, bcount); 600 if (err) { 601 printf("ccdbuffer error %d\n", err); 602 /* We're screwed */ 603 bp->bio_resid -= bcount; 604 bp->bio_error = ENOMEM; 605 bp->bio_flags |= BIO_ERROR; 606 return; 607 } 608 rcount = cbp[0]->cb_buf.bio_bcount; 609 610 if (cs->sc_cflags & CCDF_MIRROR) { 611 /* 612 * Mirroring. Writes go to both disks, reads are 613 * taken from whichever disk seems most appropriate. 614 * 615 * We attempt to localize reads to the disk whos arm 616 * is nearest the read request. We ignore seeks due 617 * to writes when making this determination and we 618 * also try to avoid hogging. 619 */ 620 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 621 BIO_STRATEGY(&cbp[0]->cb_buf); 622 BIO_STRATEGY(&cbp[1]->cb_buf); 623 } else { 624 int pick = cs->sc_pick; 625 daddr_t range = cs->sc_size / 16; 626 627 if (bn < cs->sc_blk[pick] - range || 628 bn > cs->sc_blk[pick] + range 629 ) { 630 cs->sc_pick = pick = 1 - pick; 631 } 632 cs->sc_blk[pick] = bn + btodb(rcount); 633 BIO_STRATEGY(&cbp[pick]->cb_buf); 634 } 635 } else { 636 /* 637 * Not mirroring 638 */ 639 BIO_STRATEGY(&cbp[0]->cb_buf); 640 } 641 bn += btodb(rcount); 642 addr += rcount; 643 } 644} 645 646/* 647 * Build a component buffer header. 648 */ 649static int 650ccdbuffer(struct ccdbuf **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) 651{ 652 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 653 struct ccdbuf *cbp; 654 daddr_t cbn, cboff; 655 off_t cbc; 656 657 /* 658 * Determine which component bn falls in. 659 */ 660 cbn = bn; 661 cboff = 0; 662 663 if (cs->sc_ileave == 0) { 664 /* 665 * Serially concatenated and neither a mirror nor a parity 666 * config. This is a special case. 667 */ 668 daddr_t sblk; 669 670 sblk = 0; 671 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 672 sblk += ci->ci_size; 673 cbn -= sblk; 674 } else { 675 struct ccdiinfo *ii; 676 int ccdisk, off; 677 678 /* 679 * Calculate cbn, the logical superblock (sc_ileave chunks), 680 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 681 * to cbn. 682 */ 683 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 684 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 685 686 /* 687 * Figure out which interleave table to use. 688 */ 689 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 690 if (ii->ii_startblk > cbn) 691 break; 692 } 693 ii--; 694 695 /* 696 * off is the logical superblock relative to the beginning 697 * of this interleave block. 698 */ 699 off = cbn - ii->ii_startblk; 700 701 /* 702 * We must calculate which disk component to use (ccdisk), 703 * and recalculate cbn to be the superblock relative to 704 * the beginning of the component. This is typically done by 705 * adding 'off' and ii->ii_startoff together. However, 'off' 706 * must typically be divided by the number of components in 707 * this interleave array to be properly convert it from a 708 * CCD-relative logical superblock number to a 709 * component-relative superblock number. 710 */ 711 if (ii->ii_ndisk == 1) { 712 /* 713 * When we have just one disk, it can't be a mirror 714 * or a parity config. 715 */ 716 ccdisk = ii->ii_index[0]; 717 cbn = ii->ii_startoff + off; 718 } else { 719 if (cs->sc_cflags & CCDF_MIRROR) { 720 /* 721 * We have forced a uniform mapping, resulting 722 * in a single interleave array. We double 723 * up on the first half of the available 724 * components and our mirror is in the second 725 * half. This only works with a single 726 * interleave array because doubling up 727 * doubles the number of sectors, so there 728 * cannot be another interleave array because 729 * the next interleave array's calculations 730 * would be off. 731 */ 732 int ndisk2 = ii->ii_ndisk / 2; 733 ccdisk = ii->ii_index[off % ndisk2]; 734 cbn = ii->ii_startoff + off / ndisk2; 735 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 736 } else { 737 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 738 cbn = ii->ii_startoff + off / ii->ii_ndisk; 739 } 740 } 741 742 ci = &cs->sc_cinfo[ccdisk]; 743 744 /* 745 * Convert cbn from a superblock to a normal block so it 746 * can be used to calculate (along with cboff) the normal 747 * block index into this particular disk. 748 */ 749 cbn *= cs->sc_ileave; 750 } 751 752 /* 753 * Fill in the component buf structure. 754 */ 755 cbp = malloc(sizeof(struct ccdbuf), M_CCD, M_NOWAIT | M_ZERO); 756 if (cbp == NULL) 757 return (ENOMEM); 758 cbp->cb_buf.bio_cmd = bp->bio_cmd; 759 cbp->cb_buf.bio_done = ccdiodone; 760 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 761 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 762 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 763 cbp->cb_buf.bio_data = addr; 764 cbp->cb_buf.bio_caller2 = cbp; 765 if (cs->sc_ileave == 0) 766 cbc = dbtob((off_t)(ci->ci_size - cbn)); 767 else 768 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 769 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 770 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 771 772 /* 773 * context for ccdiodone 774 */ 775 cbp->cb_obp = bp; 776 cbp->cb_softc = cs; 777 cbp->cb_comp = ci - cs->sc_cinfo; 778 779 cb[0] = cbp; 780 781 /* 782 * Note: both I/O's setup when reading from mirror, but only one 783 * will be executed. 784 */ 785 if (cs->sc_cflags & CCDF_MIRROR) { 786 /* mirror, setup second I/O */ 787 cbp = malloc(sizeof(struct ccdbuf), M_CCD, M_NOWAIT); 788 if (cbp == NULL) { 789 free(cb[0], M_CCD); 790 cb[0] = NULL; 791 return (ENOMEM); 792 } 793 bcopy(cb[0], cbp, sizeof(struct ccdbuf)); 794 cbp->cb_buf.bio_dev = ci2->ci_dev; 795 cbp->cb_comp = ci2 - cs->sc_cinfo; 796 cb[1] = cbp; 797 /* link together the ccdbuf's and clear "mirror done" flag */ 798 cb[0]->cb_mirror = cb[1]; 799 cb[1]->cb_mirror = cb[0]; 800 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 801 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 802 } 803 return (0); 804} 805 806/* 807 * Called at interrupt time. 808 * Mark the component as done and if all components are done, 809 * take a ccd interrupt. 810 */ 811static void 812ccdiodone(struct bio *ibp) 813{ 814 struct ccdbuf *cbp; 815 struct bio *bp; 816 struct ccd_s *cs; 817 int count; 818 819 cbp = ibp->bio_caller2; 820 cs = cbp->cb_softc; 821 bp = cbp->cb_obp; 822 /* 823 * If an error occured, report it. If this is a mirrored 824 * configuration and the first of two possible reads, do not 825 * set the error in the bp yet because the second read may 826 * succeed. 827 */ 828 829 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 830 const char *msg = ""; 831 832 if ((cs->sc_cflags & CCDF_MIRROR) && 833 (cbp->cb_buf.bio_cmd == BIO_READ) && 834 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 835 /* 836 * We will try our read on the other disk down 837 * below, also reverse the default pick so if we 838 * are doing a scan we do not keep hitting the 839 * bad disk first. 840 */ 841 842 msg = ", trying other disk"; 843 cs->sc_pick = 1 - cs->sc_pick; 844 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 845 } else { 846 bp->bio_flags |= BIO_ERROR; 847 bp->bio_error = cbp->cb_buf.bio_error ? 848 cbp->cb_buf.bio_error : EIO; 849 } 850 printf("ccd%d: error %d on component %d block %jd " 851 "(ccd block %jd)%s\n", cs->sc_unit, bp->bio_error, 852 cbp->cb_comp, 853 (intmax_t)cbp->cb_buf.bio_blkno, (intmax_t)bp->bio_blkno, 854 msg); 855 } 856 857 /* 858 * Process mirror. If we are writing, I/O has been initiated on both 859 * buffers and we fall through only after both are finished. 860 * 861 * If we are reading only one I/O is initiated at a time. If an 862 * error occurs we initiate the second I/O and return, otherwise 863 * we free the second I/O without initiating it. 864 */ 865 866 if (cs->sc_cflags & CCDF_MIRROR) { 867 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 868 /* 869 * When writing, handshake with the second buffer 870 * to determine when both are done. If both are not 871 * done, return here. 872 */ 873 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 874 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 875 free(cbp, M_CCD); 876 return; 877 } 878 } else { 879 /* 880 * When reading, either dispose of the second buffer 881 * or initiate I/O on the second buffer if an error 882 * occured with this one. 883 */ 884 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 885 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 886 cbp->cb_mirror->cb_pflags |= 887 CCDPF_MIRROR_DONE; 888 BIO_STRATEGY(&cbp->cb_mirror->cb_buf); 889 free(cbp, M_CCD); 890 return; 891 } else { 892 free(cbp->cb_mirror, M_CCD); 893 } 894 } 895 } 896 } 897 898 /* 899 * use bio_caller1 to determine how big the original request was rather 900 * then bio_bcount, because bio_bcount may have been truncated for EOF. 901 * 902 * XXX We check for an error, but we do not test the resid for an 903 * aligned EOF condition. This may result in character & block 904 * device access not recognizing EOF properly when read or written 905 * sequentially, but will not effect filesystems. 906 */ 907 count = (long)cbp->cb_buf.bio_caller1; 908 free(cbp, M_CCD); 909 910 /* 911 * If all done, "interrupt". 912 */ 913 bp->bio_resid -= count; 914 if (bp->bio_resid < 0) 915 panic("ccdiodone: count"); 916 if (bp->bio_resid == 0) { 917 if (bp->bio_flags & BIO_ERROR) 918 bp->bio_resid = bp->bio_bcount; 919 biofinish(bp, &cs->device_stats, 0); 920 } 921} 922 923static int ccdioctltoo(int unit, u_long cmd, caddr_t data, int flag, struct thread *td); 924 925static int 926ccdctlioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td) 927{ 928 struct ccd_ioctl *ccio; 929 u_int unit; 930 dev_t dev2; 931 int error; 932 933 switch (cmd) { 934 case CCDIOCSET: 935 case CCDIOCCLR: 936 ccio = (struct ccd_ioctl *)data; 937 unit = ccio->ccio_size; 938 return (ccdioctltoo(unit, cmd, data, flag, td)); 939 case CCDCONFINFO: 940 { 941 int ninit = 0; 942 struct ccdconf *conf = (struct ccdconf *)data; 943 struct ccd_s *tmpcs; 944 struct ccd_s *ubuf = conf->buffer; 945 946 /* XXX: LOCK(unique unit numbers) */ 947 LIST_FOREACH(tmpcs, &ccd_softc_list, list) 948 if (IS_INITED(tmpcs)) 949 ninit++; 950 951 if (conf->size == 0) { 952 conf->size = sizeof(struct ccd_s) * ninit; 953 return (0); 954 } else if ((conf->size / sizeof(struct ccd_s) != ninit) || 955 (conf->size % sizeof(struct ccd_s) != 0)) { 956 /* XXX: UNLOCK(unique unit numbers) */ 957 return (EINVAL); 958 } 959 960 ubuf += ninit; 961 LIST_FOREACH(tmpcs, &ccd_softc_list, list) { 962 if (!IS_INITED(tmpcs)) 963 continue; 964 error = copyout(tmpcs, --ubuf, 965 sizeof(struct ccd_s)); 966 if (error != 0) 967 /* XXX: UNLOCK(unique unit numbers) */ 968 return (error); 969 } 970 /* XXX: UNLOCK(unique unit numbers) */ 971 return (0); 972 } 973 974 case CCDCPPINFO: 975 { 976 struct ccdcpps *cpps = (struct ccdcpps *)data; 977 char *ubuf = cpps->buffer; 978 struct ccd_s *cs; 979 980 981 error = copyin(ubuf, &unit, sizeof (unit)); 982 if (error) 983 return (error); 984 985 if (!IS_ALLOCATED(unit)) 986 return (ENXIO); 987 dev2 = makedev(CDEV_MAJOR, unit * 8 + 2); 988 cs = ccdfind(unit); 989 if (!IS_INITED(cs)) 990 return (ENXIO); 991 992 { 993 int len = 0, i; 994 struct ccdcpps *cpps = (struct ccdcpps *)data; 995 char *ubuf = cpps->buffer; 996 997 998 for (i = 0; i < cs->sc_nccdisks; ++i) 999 len += cs->sc_cinfo[i].ci_pathlen; 1000 1001 if (cpps->size < len) 1002 return (ENOMEM); 1003 1004 for (i = 0; i < cs->sc_nccdisks; ++i) { 1005 len = cs->sc_cinfo[i].ci_pathlen; 1006 error = copyout(cs->sc_cinfo[i].ci_path, ubuf, 1007 len); 1008 if (error != 0) 1009 return (error); 1010 ubuf += len; 1011 } 1012 return(copyout("", ubuf, 1)); 1013 } 1014 break; 1015 } 1016 1017 default: 1018 return (ENXIO); 1019 } 1020} 1021 1022static int 1023ccdioctltoo(int unit, u_long cmd, caddr_t data, int flag, struct thread *td) 1024{ 1025 int i, j, lookedup = 0, error = 0; 1026 struct ccd_s *cs; 1027 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1028 struct ccdgeom *ccg; 1029 char **cpp; 1030 struct vnode **vpp; 1031 1032 cs = ccdfind(unit); 1033 switch (cmd) { 1034 case CCDIOCSET: 1035 if (cs == NULL) 1036 cs = ccdnew(unit); 1037 if (IS_INITED(cs)) 1038 return (EBUSY); 1039 1040 if ((flag & FWRITE) == 0) 1041 return (EBADF); 1042 1043 if ((error = ccdlock(cs)) != 0) 1044 return (error); 1045 1046 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1047 return (EINVAL); 1048 1049 /* Fill in some important bits. */ 1050 cs->sc_ileave = ccio->ccio_ileave; 1051 if (cs->sc_ileave == 0 && (ccio->ccio_flags & CCDF_MIRROR)) { 1052 printf("ccd%d: disabling mirror, interleave is 0\n", 1053 unit); 1054 ccio->ccio_flags &= ~(CCDF_MIRROR); 1055 } 1056 if ((ccio->ccio_flags & CCDF_MIRROR) && 1057 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1058 printf("ccd%d: mirror/parity forces uniform flag\n", 1059 unit); 1060 ccio->ccio_flags |= CCDF_UNIFORM; 1061 } 1062 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1063 1064 /* 1065 * Allocate space for and copy in the array of 1066 * componet pathnames and device numbers. 1067 */ 1068 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1069 M_CCD, M_WAITOK); 1070 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1071 M_CCD, M_WAITOK); 1072 1073 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1074 ccio->ccio_ndisks * sizeof(char **)); 1075 if (error) { 1076 free(vpp, M_CCD); 1077 free(cpp, M_CCD); 1078 ccdunlock(cs); 1079 return (error); 1080 } 1081 1082 1083 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1084 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1085 for (j = 0; j < lookedup; ++j) 1086 (void)vn_close(vpp[j], FREAD|FWRITE, 1087 td->td_ucred, td); 1088 free(vpp, M_CCD); 1089 free(cpp, M_CCD); 1090 ccdunlock(cs); 1091 return (error); 1092 } 1093 ++lookedup; 1094 } 1095 cs->sc_vpp = vpp; 1096 cs->sc_nccdisks = ccio->ccio_ndisks; 1097 1098 /* 1099 * Initialize the ccd. Fills in the softc for us. 1100 */ 1101 if ((error = ccdinit(cs, cpp, td)) != 0) { 1102 for (j = 0; j < lookedup; ++j) 1103 (void)vn_close(vpp[j], FREAD|FWRITE, 1104 td->td_ucred, td); 1105 /* 1106 * We can't ccddestroy() cs just yet, because nothing 1107 * prevents user-level app to do another ioctl() 1108 * without closing the device first, therefore 1109 * declare unit null and void and let ccdclose() 1110 * destroy it when it is safe to do so. 1111 */ 1112 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1113 free(vpp, M_CCD); 1114 free(cpp, M_CCD); 1115 ccdunlock(cs); 1116 return (error); 1117 } 1118 free(cpp, M_CCD); 1119 1120 /* 1121 * The ccd has been successfully initialized, so 1122 * we can place it into the array and read the disklabel. 1123 */ 1124 ccio->ccio_unit = unit; 1125 ccio->ccio_size = cs->sc_size; 1126 ccg = &cs->sc_geom; 1127 cs->sc_disk = malloc(sizeof(struct disk), M_CCD, 1128 M_ZERO | M_WAITOK); 1129 cs->sc_disk->d_strategy = ccdstrategy; 1130 cs->sc_disk->d_name = "ccd"; 1131 cs->sc_disk->d_sectorsize = ccg->ccg_secsize; 1132 cs->sc_disk->d_mediasize = 1133 cs->sc_size * (off_t)ccg->ccg_secsize; 1134 cs->sc_disk->d_fwsectors = ccg->ccg_nsectors; 1135 cs->sc_disk->d_fwheads = ccg->ccg_ntracks; 1136 cs->sc_disk->d_drv1 = cs; 1137 cs->sc_disk->d_maxsize = MAXPHYS; 1138 disk_create(unit, cs->sc_disk, 0, NULL, NULL); 1139 1140 ccdunlock(cs); 1141 1142 break; 1143 1144 case CCDIOCCLR: 1145 if (cs == NULL) 1146 return (ENXIO); 1147 1148 if (!IS_INITED(cs)) 1149 return (ENXIO); 1150 1151 if ((flag & FWRITE) == 0) 1152 return (EBADF); 1153 1154 if ((error = ccdlock(cs)) != 0) 1155 return (error); 1156 1157 /* Don't unconfigure if any other partitions are open */ 1158 if (cs->sc_disk->d_flags & DISKFLAG_OPEN) { 1159 ccdunlock(cs); 1160 return (EBUSY); 1161 } 1162 1163 disk_destroy(cs->sc_disk); 1164 free(cs->sc_disk, M_CCD); 1165 cs->sc_disk = NULL; 1166 /* Declare unit null and void (reset all flags) */ 1167 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1168 1169 /* Close the components and free their pathnames. */ 1170 for (i = 0; i < cs->sc_nccdisks; ++i) { 1171 /* 1172 * XXX: this close could potentially fail and 1173 * cause Bad Things. Maybe we need to force 1174 * the close to happen? 1175 */ 1176 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1177 td->td_ucred, td); 1178 free(cs->sc_cinfo[i].ci_path, M_CCD); 1179 } 1180 1181 /* Free interleave index. */ 1182 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1183 free(cs->sc_itable[i].ii_index, M_CCD); 1184 1185 /* Free component info and interleave table. */ 1186 free(cs->sc_cinfo, M_CCD); 1187 free(cs->sc_itable, M_CCD); 1188 free(cs->sc_vpp, M_CCD); 1189 1190 /* And remove the devstat entry. */ 1191 devstat_remove_entry(&cs->device_stats); 1192 1193 /* This must be atomic. */ 1194 ccdunlock(cs); 1195 ccddestroy(cs); 1196 1197 break; 1198 } 1199 1200 return (0); 1201} 1202 1203 1204/* 1205 * Lookup the provided name in the filesystem. If the file exists, 1206 * is a valid block device, and isn't being used by anyone else, 1207 * set *vpp to the file's vnode. 1208 */ 1209static int 1210ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1211{ 1212 struct nameidata nd; 1213 struct vnode *vp; 1214 int error, flags; 1215 1216 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, td); 1217 flags = FREAD | FWRITE; 1218 if ((error = vn_open(&nd, &flags, 0)) != 0) { 1219 return (error); 1220 } 1221 vp = nd.ni_vp; 1222 1223 if (vrefcnt(vp) > 1) { 1224 error = EBUSY; 1225 goto bad; 1226 } 1227 1228 if (!vn_isdisk(vp, &error)) 1229 goto bad; 1230 1231 1232 VOP_UNLOCK(vp, 0, td); 1233 NDFREE(&nd, NDF_ONLY_PNBUF); 1234 *vpp = vp; 1235 return (0); 1236bad: 1237 VOP_UNLOCK(vp, 0, td); 1238 NDFREE(&nd, NDF_ONLY_PNBUF); 1239 /* vn_close does vrele() for vp */ 1240 (void)vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 1241 return (error); 1242} 1243 1244/* 1245 1246 * Wait interruptibly for an exclusive lock. 1247 * 1248 * XXX 1249 * Several drivers do this; it should be abstracted and made MP-safe. 1250 */ 1251static int 1252ccdlock(struct ccd_s *cs) 1253{ 1254 int error; 1255 1256 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1257 cs->sc_flags |= CCDF_WANTED; 1258 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1259 return (error); 1260 } 1261 cs->sc_flags |= CCDF_LOCKED; 1262 return (0); 1263} 1264 1265/* 1266 * Unlock and wake up any waiters. 1267 */ 1268static void 1269ccdunlock(struct ccd_s *cs) 1270{ 1271 1272 cs->sc_flags &= ~CCDF_LOCKED; 1273 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1274 cs->sc_flags &= ~CCDF_WANTED; 1275 wakeup(cs); 1276 } 1277}
| 123}; 124 125static LIST_HEAD(, ccd_s) ccd_softc_list = 126 LIST_HEAD_INITIALIZER(&ccd_softc_list); 127 128static struct ccd_s *ccdfind(int); 129static struct ccd_s *ccdnew(int); 130static int ccddestroy(struct ccd_s *); 131 132/* called during module initialization */ 133static void ccdattach(void); 134static int ccd_modevent(module_t, int, void *); 135 136/* called by biodone() at interrupt time */ 137static void ccdiodone(struct bio *bp); 138 139static void ccdstart(struct ccd_s *, struct bio *); 140static void ccdinterleave(struct ccd_s *, int); 141static int ccdinit(struct ccd_s *, char **, struct thread *); 142static int ccdlookup(char *, struct thread *p, struct vnode **); 143static int ccdbuffer(struct ccdbuf **ret, struct ccd_s *, 144 struct bio *, daddr_t, caddr_t, long); 145static int ccdlock(struct ccd_s *); 146static void ccdunlock(struct ccd_s *); 147 148 149/* 150 * Number of blocks to untouched in front of a component partition. 151 * This is to avoid violating its disklabel area when it starts at the 152 * beginning of the slice. 153 */ 154#if !defined(CCD_OFFSET) 155#define CCD_OFFSET 16 156#endif 157 158static struct ccd_s * 159ccdfind(int unit) 160{ 161 struct ccd_s *sc = NULL; 162 163 /* XXX: LOCK(unique unit numbers) */ 164 LIST_FOREACH(sc, &ccd_softc_list, list) { 165 if (sc->sc_unit == unit) 166 break; 167 } 168 /* XXX: UNLOCK(unique unit numbers) */ 169 return ((sc == NULL) || (sc->sc_unit != unit) ? NULL : sc); 170} 171 172static struct ccd_s * 173ccdnew(int unit) 174{ 175 struct ccd_s *sc; 176 177 /* XXX: LOCK(unique unit numbers) */ 178 if (IS_ALLOCATED(unit) || unit > 32) 179 return (NULL); 180 181 MALLOC(sc, struct ccd_s *, sizeof(*sc), M_CCD, M_WAITOK | M_ZERO); 182 sc->sc_unit = unit; 183 LIST_INSERT_HEAD(&ccd_softc_list, sc, list); 184 /* XXX: UNLOCK(unique unit numbers) */ 185 return (sc); 186} 187 188static int 189ccddestroy(struct ccd_s *sc) 190{ 191 192 /* XXX: LOCK(unique unit numbers) */ 193 LIST_REMOVE(sc, list); 194 /* XXX: UNLOCK(unique unit numbers) */ 195 FREE(sc, M_CCD); 196 return (0); 197} 198 199/* 200 * Called by main() during pseudo-device attachment. All we need 201 * to do is to add devsw entries. 202 */ 203static void 204ccdattach() 205{ 206 207 ccdctldev = make_dev(&ccdctl_cdevsw, 0xffff00ff, 208 UID_ROOT, GID_OPERATOR, 0640, "ccd.ctl"); 209 ccdctldev->si_drv1 = ccdctldev; 210} 211 212static int 213ccd_modevent(module_t mod, int type, void *data) 214{ 215 int error = 0; 216 217 switch (type) { 218 case MOD_LOAD: 219 ccdattach(); 220 break; 221 222 case MOD_UNLOAD: 223 printf("ccd0: Unload not supported!\n"); 224 error = EOPNOTSUPP; 225 break; 226 227 case MOD_SHUTDOWN: 228 break; 229 230 default: 231 error = EOPNOTSUPP; 232 } 233 return (error); 234} 235 236DEV_MODULE(ccd, ccd_modevent, NULL); 237 238static int 239ccdinit(struct ccd_s *cs, char **cpaths, struct thread *td) 240{ 241 struct ccdcinfo *ci = NULL; /* XXX */ 242 size_t size; 243 int ix; 244 struct vnode *vp; 245 size_t minsize; 246 int maxsecsize; 247 struct ccdgeom *ccg = &cs->sc_geom; 248 char *tmppath = NULL; 249 int error = 0; 250 off_t mediasize; 251 u_int sectorsize; 252 253 254 cs->sc_size = 0; 255 256 /* Allocate space for the component info. */ 257 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 258 M_CCD, M_WAITOK); 259 260 /* 261 * Verify that each component piece exists and record 262 * relevant information about it. 263 */ 264 maxsecsize = 0; 265 minsize = 0; 266 tmppath = malloc(MAXPATHLEN, M_CCD, M_WAITOK); 267 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 268 vp = cs->sc_vpp[ix]; 269 ci = &cs->sc_cinfo[ix]; 270 ci->ci_vp = vp; 271 272 /* 273 * Copy in the pathname of the component. 274 */ 275 if ((error = copyinstr(cpaths[ix], tmppath, 276 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 277 goto fail; 278 } 279 ci->ci_path = malloc(ci->ci_pathlen, M_CCD, M_WAITOK); 280 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 281 282 ci->ci_dev = vn_todev(vp); 283 284 /* 285 * Get partition information for the component. 286 */ 287 error = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize, 288 FREAD, td->td_ucred, td); 289 if (error != 0) { 290 goto fail; 291 } 292 /* 293 * Get partition information for the component. 294 */ 295 error = VOP_IOCTL(vp, DIOCGSECTORSIZE, (caddr_t)§orsize, 296 FREAD, td->td_ucred, td); 297 if (error != 0) { 298 goto fail; 299 } 300 if (sectorsize > maxsecsize) 301 maxsecsize = sectorsize; 302 size = mediasize / DEV_BSIZE - CCD_OFFSET; 303 304 /* 305 * Calculate the size, truncating to an interleave 306 * boundary if necessary. 307 */ 308 309 if (cs->sc_ileave > 1) 310 size -= size % cs->sc_ileave; 311 312 if (size == 0) { 313 error = ENODEV; 314 goto fail; 315 } 316 317 if (minsize == 0 || size < minsize) 318 minsize = size; 319 ci->ci_size = size; 320 cs->sc_size += size; 321 } 322 323 free(tmppath, M_CCD); 324 tmppath = NULL; 325 326 /* 327 * Don't allow the interleave to be smaller than 328 * the biggest component sector. 329 */ 330 if ((cs->sc_ileave > 0) && 331 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 332 error = EINVAL; 333 goto fail; 334 } 335 336 /* 337 * If uniform interleave is desired set all sizes to that of 338 * the smallest component. This will guarentee that a single 339 * interleave table is generated. 340 * 341 * Lost space must be taken into account when calculating the 342 * overall size. Half the space is lost when CCDF_MIRROR is 343 * specified. 344 */ 345 if (cs->sc_flags & CCDF_UNIFORM) { 346 for (ci = cs->sc_cinfo; 347 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 348 ci->ci_size = minsize; 349 } 350 if (cs->sc_flags & CCDF_MIRROR) { 351 /* 352 * Check to see if an even number of components 353 * have been specified. The interleave must also 354 * be non-zero in order for us to be able to 355 * guarentee the topology. 356 */ 357 if (cs->sc_nccdisks % 2) { 358 printf("ccd%d: mirroring requires an even number of disks\n", cs->sc_unit ); 359 error = EINVAL; 360 goto fail; 361 } 362 if (cs->sc_ileave == 0) { 363 printf("ccd%d: an interleave must be specified when mirroring\n", cs->sc_unit); 364 error = EINVAL; 365 goto fail; 366 } 367 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 368 } else { 369 if (cs->sc_ileave == 0) { 370 printf("ccd%d: an interleave must be specified when using parity\n", cs->sc_unit); 371 error = EINVAL; 372 goto fail; 373 } 374 cs->sc_size = cs->sc_nccdisks * minsize; 375 } 376 } 377 378 /* 379 * Construct the interleave table. 380 */ 381 ccdinterleave(cs, cs->sc_unit); 382 383 /* 384 * Create pseudo-geometry based on 1MB cylinders. It's 385 * pretty close. 386 */ 387 ccg->ccg_secsize = maxsecsize; 388 ccg->ccg_ntracks = 1; 389 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 390 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 391 392 /* 393 * Add a devstat entry for this device. 394 */ 395 devstat_add_entry(&cs->device_stats, "ccd", cs->sc_unit, 396 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 397 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 398 DEVSTAT_PRIORITY_ARRAY); 399 400 cs->sc_flags |= CCDF_INITED; 401 cs->sc_cflags = cs->sc_flags; /* So we can find out later... */ 402 return (0); 403fail: 404 while (ci > cs->sc_cinfo) { 405 ci--; 406 free(ci->ci_path, M_CCD); 407 } 408 if (tmppath != NULL) 409 free(tmppath, M_CCD); 410 free(cs->sc_cinfo, M_CCD); 411 ccddestroy(cs); 412 return (error); 413} 414 415static void 416ccdinterleave(struct ccd_s *cs, int unit) 417{ 418 struct ccdcinfo *ci, *smallci; 419 struct ccdiinfo *ii; 420 daddr_t bn, lbn; 421 int ix; 422 u_long size; 423 424 425 /* 426 * Allocate an interleave table. The worst case occurs when each 427 * of N disks is of a different size, resulting in N interleave 428 * tables. 429 * 430 * Chances are this is too big, but we don't care. 431 */ 432 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 433 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_CCD, 434 M_WAITOK | M_ZERO); 435 436 /* 437 * Trivial case: no interleave (actually interleave of disk size). 438 * Each table entry represents a single component in its entirety. 439 * 440 * An interleave of 0 may not be used with a mirror setup. 441 */ 442 if (cs->sc_ileave == 0) { 443 bn = 0; 444 ii = cs->sc_itable; 445 446 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 447 /* Allocate space for ii_index. */ 448 ii->ii_index = malloc(sizeof(int), M_CCD, M_WAITOK); 449 ii->ii_ndisk = 1; 450 ii->ii_startblk = bn; 451 ii->ii_startoff = 0; 452 ii->ii_index[0] = ix; 453 bn += cs->sc_cinfo[ix].ci_size; 454 ii++; 455 } 456 ii->ii_ndisk = 0; 457 return; 458 } 459 460 /* 461 * The following isn't fast or pretty; it doesn't have to be. 462 */ 463 size = 0; 464 bn = lbn = 0; 465 for (ii = cs->sc_itable; ; ii++) { 466 /* 467 * Allocate space for ii_index. We might allocate more then 468 * we use. 469 */ 470 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 471 M_CCD, M_WAITOK); 472 473 /* 474 * Locate the smallest of the remaining components 475 */ 476 smallci = NULL; 477 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 478 ci++) { 479 if (ci->ci_size > size && 480 (smallci == NULL || 481 ci->ci_size < smallci->ci_size)) { 482 smallci = ci; 483 } 484 } 485 486 /* 487 * Nobody left, all done 488 */ 489 if (smallci == NULL) { 490 ii->ii_ndisk = 0; 491 free(ii->ii_index, M_CCD); 492 break; 493 } 494 495 /* 496 * Record starting logical block using an sc_ileave blocksize. 497 */ 498 ii->ii_startblk = bn / cs->sc_ileave; 499 500 /* 501 * Record starting comopnent block using an sc_ileave 502 * blocksize. This value is relative to the beginning of 503 * a component disk. 504 */ 505 ii->ii_startoff = lbn; 506 507 /* 508 * Determine how many disks take part in this interleave 509 * and record their indices. 510 */ 511 ix = 0; 512 for (ci = cs->sc_cinfo; 513 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 514 if (ci->ci_size >= smallci->ci_size) { 515 ii->ii_index[ix++] = ci - cs->sc_cinfo; 516 } 517 } 518 ii->ii_ndisk = ix; 519 bn += ix * (smallci->ci_size - size); 520 lbn = smallci->ci_size / cs->sc_ileave; 521 size = smallci->ci_size; 522 } 523} 524 525static void 526ccdstrategy(struct bio *bp) 527{ 528 struct ccd_s *cs; 529 int pbn; /* in sc_secsize chunks */ 530 long sz; /* in sc_secsize chunks */ 531 532 cs = bp->bio_disk->d_drv1; 533 534 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 535 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 536 537 /* 538 * If out of bounds return an error. If at the EOF point, 539 * simply read or write less. 540 */ 541 542 if (pbn < 0 || pbn >= cs->sc_size) { 543 bp->bio_resid = bp->bio_bcount; 544 if (pbn != cs->sc_size) 545 biofinish(bp, NULL, EINVAL); 546 else 547 biodone(bp); 548 return; 549 } 550 551 /* 552 * If the request crosses EOF, truncate the request. 553 */ 554 if (pbn + sz > cs->sc_size) { 555 bp->bio_bcount = (cs->sc_size - pbn) * 556 cs->sc_geom.ccg_secsize; 557 } 558 559 bp->bio_resid = bp->bio_bcount; 560 561 /* 562 * "Start" the unit. 563 */ 564 ccdstart(cs, bp); 565 return; 566} 567 568static void 569ccdstart(struct ccd_s *cs, struct bio *bp) 570{ 571 long bcount, rcount; 572 struct ccdbuf *cbp[2]; 573 caddr_t addr; 574 daddr_t bn; 575 int err; 576 577 578 /* Record the transaction start */ 579 devstat_start_transaction(&cs->device_stats); 580 581 /* 582 * Translate the partition-relative block number to an absolute. 583 */ 584 bn = bp->bio_blkno; 585 586 /* 587 * Allocate component buffers and fire off the requests 588 */ 589 addr = bp->bio_data; 590 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 591 err = ccdbuffer(cbp, cs, bp, bn, addr, bcount); 592 if (err) { 593 printf("ccdbuffer error %d\n", err); 594 /* We're screwed */ 595 bp->bio_resid -= bcount; 596 bp->bio_error = ENOMEM; 597 bp->bio_flags |= BIO_ERROR; 598 return; 599 } 600 rcount = cbp[0]->cb_buf.bio_bcount; 601 602 if (cs->sc_cflags & CCDF_MIRROR) { 603 /* 604 * Mirroring. Writes go to both disks, reads are 605 * taken from whichever disk seems most appropriate. 606 * 607 * We attempt to localize reads to the disk whos arm 608 * is nearest the read request. We ignore seeks due 609 * to writes when making this determination and we 610 * also try to avoid hogging. 611 */ 612 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 613 BIO_STRATEGY(&cbp[0]->cb_buf); 614 BIO_STRATEGY(&cbp[1]->cb_buf); 615 } else { 616 int pick = cs->sc_pick; 617 daddr_t range = cs->sc_size / 16; 618 619 if (bn < cs->sc_blk[pick] - range || 620 bn > cs->sc_blk[pick] + range 621 ) { 622 cs->sc_pick = pick = 1 - pick; 623 } 624 cs->sc_blk[pick] = bn + btodb(rcount); 625 BIO_STRATEGY(&cbp[pick]->cb_buf); 626 } 627 } else { 628 /* 629 * Not mirroring 630 */ 631 BIO_STRATEGY(&cbp[0]->cb_buf); 632 } 633 bn += btodb(rcount); 634 addr += rcount; 635 } 636} 637 638/* 639 * Build a component buffer header. 640 */ 641static int 642ccdbuffer(struct ccdbuf **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) 643{ 644 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 645 struct ccdbuf *cbp; 646 daddr_t cbn, cboff; 647 off_t cbc; 648 649 /* 650 * Determine which component bn falls in. 651 */ 652 cbn = bn; 653 cboff = 0; 654 655 if (cs->sc_ileave == 0) { 656 /* 657 * Serially concatenated and neither a mirror nor a parity 658 * config. This is a special case. 659 */ 660 daddr_t sblk; 661 662 sblk = 0; 663 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 664 sblk += ci->ci_size; 665 cbn -= sblk; 666 } else { 667 struct ccdiinfo *ii; 668 int ccdisk, off; 669 670 /* 671 * Calculate cbn, the logical superblock (sc_ileave chunks), 672 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 673 * to cbn. 674 */ 675 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 676 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 677 678 /* 679 * Figure out which interleave table to use. 680 */ 681 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 682 if (ii->ii_startblk > cbn) 683 break; 684 } 685 ii--; 686 687 /* 688 * off is the logical superblock relative to the beginning 689 * of this interleave block. 690 */ 691 off = cbn - ii->ii_startblk; 692 693 /* 694 * We must calculate which disk component to use (ccdisk), 695 * and recalculate cbn to be the superblock relative to 696 * the beginning of the component. This is typically done by 697 * adding 'off' and ii->ii_startoff together. However, 'off' 698 * must typically be divided by the number of components in 699 * this interleave array to be properly convert it from a 700 * CCD-relative logical superblock number to a 701 * component-relative superblock number. 702 */ 703 if (ii->ii_ndisk == 1) { 704 /* 705 * When we have just one disk, it can't be a mirror 706 * or a parity config. 707 */ 708 ccdisk = ii->ii_index[0]; 709 cbn = ii->ii_startoff + off; 710 } else { 711 if (cs->sc_cflags & CCDF_MIRROR) { 712 /* 713 * We have forced a uniform mapping, resulting 714 * in a single interleave array. We double 715 * up on the first half of the available 716 * components and our mirror is in the second 717 * half. This only works with a single 718 * interleave array because doubling up 719 * doubles the number of sectors, so there 720 * cannot be another interleave array because 721 * the next interleave array's calculations 722 * would be off. 723 */ 724 int ndisk2 = ii->ii_ndisk / 2; 725 ccdisk = ii->ii_index[off % ndisk2]; 726 cbn = ii->ii_startoff + off / ndisk2; 727 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 728 } else { 729 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 730 cbn = ii->ii_startoff + off / ii->ii_ndisk; 731 } 732 } 733 734 ci = &cs->sc_cinfo[ccdisk]; 735 736 /* 737 * Convert cbn from a superblock to a normal block so it 738 * can be used to calculate (along with cboff) the normal 739 * block index into this particular disk. 740 */ 741 cbn *= cs->sc_ileave; 742 } 743 744 /* 745 * Fill in the component buf structure. 746 */ 747 cbp = malloc(sizeof(struct ccdbuf), M_CCD, M_NOWAIT | M_ZERO); 748 if (cbp == NULL) 749 return (ENOMEM); 750 cbp->cb_buf.bio_cmd = bp->bio_cmd; 751 cbp->cb_buf.bio_done = ccdiodone; 752 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 753 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 754 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 755 cbp->cb_buf.bio_data = addr; 756 cbp->cb_buf.bio_caller2 = cbp; 757 if (cs->sc_ileave == 0) 758 cbc = dbtob((off_t)(ci->ci_size - cbn)); 759 else 760 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 761 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 762 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 763 764 /* 765 * context for ccdiodone 766 */ 767 cbp->cb_obp = bp; 768 cbp->cb_softc = cs; 769 cbp->cb_comp = ci - cs->sc_cinfo; 770 771 cb[0] = cbp; 772 773 /* 774 * Note: both I/O's setup when reading from mirror, but only one 775 * will be executed. 776 */ 777 if (cs->sc_cflags & CCDF_MIRROR) { 778 /* mirror, setup second I/O */ 779 cbp = malloc(sizeof(struct ccdbuf), M_CCD, M_NOWAIT); 780 if (cbp == NULL) { 781 free(cb[0], M_CCD); 782 cb[0] = NULL; 783 return (ENOMEM); 784 } 785 bcopy(cb[0], cbp, sizeof(struct ccdbuf)); 786 cbp->cb_buf.bio_dev = ci2->ci_dev; 787 cbp->cb_comp = ci2 - cs->sc_cinfo; 788 cb[1] = cbp; 789 /* link together the ccdbuf's and clear "mirror done" flag */ 790 cb[0]->cb_mirror = cb[1]; 791 cb[1]->cb_mirror = cb[0]; 792 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 793 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 794 } 795 return (0); 796} 797 798/* 799 * Called at interrupt time. 800 * Mark the component as done and if all components are done, 801 * take a ccd interrupt. 802 */ 803static void 804ccdiodone(struct bio *ibp) 805{ 806 struct ccdbuf *cbp; 807 struct bio *bp; 808 struct ccd_s *cs; 809 int count; 810 811 cbp = ibp->bio_caller2; 812 cs = cbp->cb_softc; 813 bp = cbp->cb_obp; 814 /* 815 * If an error occured, report it. If this is a mirrored 816 * configuration and the first of two possible reads, do not 817 * set the error in the bp yet because the second read may 818 * succeed. 819 */ 820 821 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 822 const char *msg = ""; 823 824 if ((cs->sc_cflags & CCDF_MIRROR) && 825 (cbp->cb_buf.bio_cmd == BIO_READ) && 826 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 827 /* 828 * We will try our read on the other disk down 829 * below, also reverse the default pick so if we 830 * are doing a scan we do not keep hitting the 831 * bad disk first. 832 */ 833 834 msg = ", trying other disk"; 835 cs->sc_pick = 1 - cs->sc_pick; 836 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 837 } else { 838 bp->bio_flags |= BIO_ERROR; 839 bp->bio_error = cbp->cb_buf.bio_error ? 840 cbp->cb_buf.bio_error : EIO; 841 } 842 printf("ccd%d: error %d on component %d block %jd " 843 "(ccd block %jd)%s\n", cs->sc_unit, bp->bio_error, 844 cbp->cb_comp, 845 (intmax_t)cbp->cb_buf.bio_blkno, (intmax_t)bp->bio_blkno, 846 msg); 847 } 848 849 /* 850 * Process mirror. If we are writing, I/O has been initiated on both 851 * buffers and we fall through only after both are finished. 852 * 853 * If we are reading only one I/O is initiated at a time. If an 854 * error occurs we initiate the second I/O and return, otherwise 855 * we free the second I/O without initiating it. 856 */ 857 858 if (cs->sc_cflags & CCDF_MIRROR) { 859 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 860 /* 861 * When writing, handshake with the second buffer 862 * to determine when both are done. If both are not 863 * done, return here. 864 */ 865 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 866 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 867 free(cbp, M_CCD); 868 return; 869 } 870 } else { 871 /* 872 * When reading, either dispose of the second buffer 873 * or initiate I/O on the second buffer if an error 874 * occured with this one. 875 */ 876 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 877 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 878 cbp->cb_mirror->cb_pflags |= 879 CCDPF_MIRROR_DONE; 880 BIO_STRATEGY(&cbp->cb_mirror->cb_buf); 881 free(cbp, M_CCD); 882 return; 883 } else { 884 free(cbp->cb_mirror, M_CCD); 885 } 886 } 887 } 888 } 889 890 /* 891 * use bio_caller1 to determine how big the original request was rather 892 * then bio_bcount, because bio_bcount may have been truncated for EOF. 893 * 894 * XXX We check for an error, but we do not test the resid for an 895 * aligned EOF condition. This may result in character & block 896 * device access not recognizing EOF properly when read or written 897 * sequentially, but will not effect filesystems. 898 */ 899 count = (long)cbp->cb_buf.bio_caller1; 900 free(cbp, M_CCD); 901 902 /* 903 * If all done, "interrupt". 904 */ 905 bp->bio_resid -= count; 906 if (bp->bio_resid < 0) 907 panic("ccdiodone: count"); 908 if (bp->bio_resid == 0) { 909 if (bp->bio_flags & BIO_ERROR) 910 bp->bio_resid = bp->bio_bcount; 911 biofinish(bp, &cs->device_stats, 0); 912 } 913} 914 915static int ccdioctltoo(int unit, u_long cmd, caddr_t data, int flag, struct thread *td); 916 917static int 918ccdctlioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td) 919{ 920 struct ccd_ioctl *ccio; 921 u_int unit; 922 dev_t dev2; 923 int error; 924 925 switch (cmd) { 926 case CCDIOCSET: 927 case CCDIOCCLR: 928 ccio = (struct ccd_ioctl *)data; 929 unit = ccio->ccio_size; 930 return (ccdioctltoo(unit, cmd, data, flag, td)); 931 case CCDCONFINFO: 932 { 933 int ninit = 0; 934 struct ccdconf *conf = (struct ccdconf *)data; 935 struct ccd_s *tmpcs; 936 struct ccd_s *ubuf = conf->buffer; 937 938 /* XXX: LOCK(unique unit numbers) */ 939 LIST_FOREACH(tmpcs, &ccd_softc_list, list) 940 if (IS_INITED(tmpcs)) 941 ninit++; 942 943 if (conf->size == 0) { 944 conf->size = sizeof(struct ccd_s) * ninit; 945 return (0); 946 } else if ((conf->size / sizeof(struct ccd_s) != ninit) || 947 (conf->size % sizeof(struct ccd_s) != 0)) { 948 /* XXX: UNLOCK(unique unit numbers) */ 949 return (EINVAL); 950 } 951 952 ubuf += ninit; 953 LIST_FOREACH(tmpcs, &ccd_softc_list, list) { 954 if (!IS_INITED(tmpcs)) 955 continue; 956 error = copyout(tmpcs, --ubuf, 957 sizeof(struct ccd_s)); 958 if (error != 0) 959 /* XXX: UNLOCK(unique unit numbers) */ 960 return (error); 961 } 962 /* XXX: UNLOCK(unique unit numbers) */ 963 return (0); 964 } 965 966 case CCDCPPINFO: 967 { 968 struct ccdcpps *cpps = (struct ccdcpps *)data; 969 char *ubuf = cpps->buffer; 970 struct ccd_s *cs; 971 972 973 error = copyin(ubuf, &unit, sizeof (unit)); 974 if (error) 975 return (error); 976 977 if (!IS_ALLOCATED(unit)) 978 return (ENXIO); 979 dev2 = makedev(CDEV_MAJOR, unit * 8 + 2); 980 cs = ccdfind(unit); 981 if (!IS_INITED(cs)) 982 return (ENXIO); 983 984 { 985 int len = 0, i; 986 struct ccdcpps *cpps = (struct ccdcpps *)data; 987 char *ubuf = cpps->buffer; 988 989 990 for (i = 0; i < cs->sc_nccdisks; ++i) 991 len += cs->sc_cinfo[i].ci_pathlen; 992 993 if (cpps->size < len) 994 return (ENOMEM); 995 996 for (i = 0; i < cs->sc_nccdisks; ++i) { 997 len = cs->sc_cinfo[i].ci_pathlen; 998 error = copyout(cs->sc_cinfo[i].ci_path, ubuf, 999 len); 1000 if (error != 0) 1001 return (error); 1002 ubuf += len; 1003 } 1004 return(copyout("", ubuf, 1)); 1005 } 1006 break; 1007 } 1008 1009 default: 1010 return (ENXIO); 1011 } 1012} 1013 1014static int 1015ccdioctltoo(int unit, u_long cmd, caddr_t data, int flag, struct thread *td) 1016{ 1017 int i, j, lookedup = 0, error = 0; 1018 struct ccd_s *cs; 1019 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1020 struct ccdgeom *ccg; 1021 char **cpp; 1022 struct vnode **vpp; 1023 1024 cs = ccdfind(unit); 1025 switch (cmd) { 1026 case CCDIOCSET: 1027 if (cs == NULL) 1028 cs = ccdnew(unit); 1029 if (IS_INITED(cs)) 1030 return (EBUSY); 1031 1032 if ((flag & FWRITE) == 0) 1033 return (EBADF); 1034 1035 if ((error = ccdlock(cs)) != 0) 1036 return (error); 1037 1038 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1039 return (EINVAL); 1040 1041 /* Fill in some important bits. */ 1042 cs->sc_ileave = ccio->ccio_ileave; 1043 if (cs->sc_ileave == 0 && (ccio->ccio_flags & CCDF_MIRROR)) { 1044 printf("ccd%d: disabling mirror, interleave is 0\n", 1045 unit); 1046 ccio->ccio_flags &= ~(CCDF_MIRROR); 1047 } 1048 if ((ccio->ccio_flags & CCDF_MIRROR) && 1049 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1050 printf("ccd%d: mirror/parity forces uniform flag\n", 1051 unit); 1052 ccio->ccio_flags |= CCDF_UNIFORM; 1053 } 1054 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1055 1056 /* 1057 * Allocate space for and copy in the array of 1058 * componet pathnames and device numbers. 1059 */ 1060 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1061 M_CCD, M_WAITOK); 1062 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1063 M_CCD, M_WAITOK); 1064 1065 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1066 ccio->ccio_ndisks * sizeof(char **)); 1067 if (error) { 1068 free(vpp, M_CCD); 1069 free(cpp, M_CCD); 1070 ccdunlock(cs); 1071 return (error); 1072 } 1073 1074 1075 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1076 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1077 for (j = 0; j < lookedup; ++j) 1078 (void)vn_close(vpp[j], FREAD|FWRITE, 1079 td->td_ucred, td); 1080 free(vpp, M_CCD); 1081 free(cpp, M_CCD); 1082 ccdunlock(cs); 1083 return (error); 1084 } 1085 ++lookedup; 1086 } 1087 cs->sc_vpp = vpp; 1088 cs->sc_nccdisks = ccio->ccio_ndisks; 1089 1090 /* 1091 * Initialize the ccd. Fills in the softc for us. 1092 */ 1093 if ((error = ccdinit(cs, cpp, td)) != 0) { 1094 for (j = 0; j < lookedup; ++j) 1095 (void)vn_close(vpp[j], FREAD|FWRITE, 1096 td->td_ucred, td); 1097 /* 1098 * We can't ccddestroy() cs just yet, because nothing 1099 * prevents user-level app to do another ioctl() 1100 * without closing the device first, therefore 1101 * declare unit null and void and let ccdclose() 1102 * destroy it when it is safe to do so. 1103 */ 1104 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1105 free(vpp, M_CCD); 1106 free(cpp, M_CCD); 1107 ccdunlock(cs); 1108 return (error); 1109 } 1110 free(cpp, M_CCD); 1111 1112 /* 1113 * The ccd has been successfully initialized, so 1114 * we can place it into the array and read the disklabel. 1115 */ 1116 ccio->ccio_unit = unit; 1117 ccio->ccio_size = cs->sc_size; 1118 ccg = &cs->sc_geom; 1119 cs->sc_disk = malloc(sizeof(struct disk), M_CCD, 1120 M_ZERO | M_WAITOK); 1121 cs->sc_disk->d_strategy = ccdstrategy; 1122 cs->sc_disk->d_name = "ccd"; 1123 cs->sc_disk->d_sectorsize = ccg->ccg_secsize; 1124 cs->sc_disk->d_mediasize = 1125 cs->sc_size * (off_t)ccg->ccg_secsize; 1126 cs->sc_disk->d_fwsectors = ccg->ccg_nsectors; 1127 cs->sc_disk->d_fwheads = ccg->ccg_ntracks; 1128 cs->sc_disk->d_drv1 = cs; 1129 cs->sc_disk->d_maxsize = MAXPHYS; 1130 disk_create(unit, cs->sc_disk, 0, NULL, NULL); 1131 1132 ccdunlock(cs); 1133 1134 break; 1135 1136 case CCDIOCCLR: 1137 if (cs == NULL) 1138 return (ENXIO); 1139 1140 if (!IS_INITED(cs)) 1141 return (ENXIO); 1142 1143 if ((flag & FWRITE) == 0) 1144 return (EBADF); 1145 1146 if ((error = ccdlock(cs)) != 0) 1147 return (error); 1148 1149 /* Don't unconfigure if any other partitions are open */ 1150 if (cs->sc_disk->d_flags & DISKFLAG_OPEN) { 1151 ccdunlock(cs); 1152 return (EBUSY); 1153 } 1154 1155 disk_destroy(cs->sc_disk); 1156 free(cs->sc_disk, M_CCD); 1157 cs->sc_disk = NULL; 1158 /* Declare unit null and void (reset all flags) */ 1159 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1160 1161 /* Close the components and free their pathnames. */ 1162 for (i = 0; i < cs->sc_nccdisks; ++i) { 1163 /* 1164 * XXX: this close could potentially fail and 1165 * cause Bad Things. Maybe we need to force 1166 * the close to happen? 1167 */ 1168 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1169 td->td_ucred, td); 1170 free(cs->sc_cinfo[i].ci_path, M_CCD); 1171 } 1172 1173 /* Free interleave index. */ 1174 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1175 free(cs->sc_itable[i].ii_index, M_CCD); 1176 1177 /* Free component info and interleave table. */ 1178 free(cs->sc_cinfo, M_CCD); 1179 free(cs->sc_itable, M_CCD); 1180 free(cs->sc_vpp, M_CCD); 1181 1182 /* And remove the devstat entry. */ 1183 devstat_remove_entry(&cs->device_stats); 1184 1185 /* This must be atomic. */ 1186 ccdunlock(cs); 1187 ccddestroy(cs); 1188 1189 break; 1190 } 1191 1192 return (0); 1193} 1194 1195 1196/* 1197 * Lookup the provided name in the filesystem. If the file exists, 1198 * is a valid block device, and isn't being used by anyone else, 1199 * set *vpp to the file's vnode. 1200 */ 1201static int 1202ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1203{ 1204 struct nameidata nd; 1205 struct vnode *vp; 1206 int error, flags; 1207 1208 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, td); 1209 flags = FREAD | FWRITE; 1210 if ((error = vn_open(&nd, &flags, 0)) != 0) { 1211 return (error); 1212 } 1213 vp = nd.ni_vp; 1214 1215 if (vrefcnt(vp) > 1) { 1216 error = EBUSY; 1217 goto bad; 1218 } 1219 1220 if (!vn_isdisk(vp, &error)) 1221 goto bad; 1222 1223 1224 VOP_UNLOCK(vp, 0, td); 1225 NDFREE(&nd, NDF_ONLY_PNBUF); 1226 *vpp = vp; 1227 return (0); 1228bad: 1229 VOP_UNLOCK(vp, 0, td); 1230 NDFREE(&nd, NDF_ONLY_PNBUF); 1231 /* vn_close does vrele() for vp */ 1232 (void)vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 1233 return (error); 1234} 1235 1236/* 1237 1238 * Wait interruptibly for an exclusive lock. 1239 * 1240 * XXX 1241 * Several drivers do this; it should be abstracted and made MP-safe. 1242 */ 1243static int 1244ccdlock(struct ccd_s *cs) 1245{ 1246 int error; 1247 1248 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1249 cs->sc_flags |= CCDF_WANTED; 1250 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1251 return (error); 1252 } 1253 cs->sc_flags |= CCDF_LOCKED; 1254 return (0); 1255} 1256 1257/* 1258 * Unlock and wake up any waiters. 1259 */ 1260static void 1261ccdunlock(struct ccd_s *cs) 1262{ 1263 1264 cs->sc_flags &= ~CCDF_LOCKED; 1265 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1266 cs->sc_flags &= ~CCDF_WANTED; 1267 wakeup(cs); 1268 } 1269}
|