1/* $NetBSD: dk.c,v 1.171 2023/05/22 15:00:17 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.171 2023/05/22 15:00:17 riastradh Exp $"); 34 35#ifdef _KERNEL_OPT 36#include "opt_dkwedge.h" 37#endif 38 39#include <sys/param.h> 40#include <sys/types.h> 41 42#include <sys/buf.h> 43#include <sys/bufq.h> 44#include <sys/callout.h> 45#include <sys/conf.h> 46#include <sys/device.h> 47#include <sys/disk.h> 48#include <sys/disklabel.h> 49#include <sys/errno.h> 50#include <sys/fcntl.h> 51#include <sys/ioctl.h> 52#include <sys/kauth.h> 53#include <sys/kernel.h> 54#include <sys/malloc.h> 55#include <sys/pool.h> 56#include <sys/proc.h> 57#include <sys/rwlock.h> 58#include <sys/stat.h> 59#include <sys/systm.h> 60#include <sys/vnode.h> 61 62#include <miscfs/specfs/specdev.h> 63 64MALLOC_DEFINE(M_DKWEDGE, "dkwedge", "Disk wedge structures"); 65 66typedef enum { 67 DKW_STATE_LARVAL = 0, 68 DKW_STATE_RUNNING = 1, 69 DKW_STATE_DYING = 2, 70 DKW_STATE_DEAD = 666 71} dkwedge_state_t; 72 73/* 74 * Lock order: 75 * 76 * sc->sc_dk.dk_openlock 77 * => sc->sc_parent->dk_rawlock 78 * => sc->sc_parent->dk_openlock 79 * => dkwedges_lock 80 * => sc->sc_sizelock 81 * 82 * Locking notes: 83 * 84 * W dkwedges_lock 85 * D device reference 86 * O sc->sc_dk.dk_openlock 87 * P sc->sc_parent->dk_openlock 88 * R sc->sc_parent->dk_rawlock 89 * S sc->sc_sizelock 90 * I sc->sc_iolock 91 * $ stable after initialization 92 * 1 used only by a single thread 93 * 94 * x&y means both x and y must be held to write (with a write lock if 95 * one is rwlock), and either x or y must be held to read. 96 */ 97 98struct dkwedge_softc { 99 device_t sc_dev; /* P&W: pointer to our pseudo-device */ 100 /* sc_dev is also stable while device is referenced */ 101 struct cfdata sc_cfdata; /* 1: our cfdata structure */ 102 uint8_t sc_wname[128]; /* $: wedge name (Unicode, UTF-8) */ 103 104 dkwedge_state_t sc_state; /* state this wedge is in */ 105 /* stable while device is referenced */ 106 /* used only in assertions when stable, and in dump in ddb */ 107 108 struct disk *sc_parent; /* $: parent disk */ 109 /* P: sc_parent->dk_openmask */ 110 /* P: sc_parent->dk_nwedges */ 111 /* P: sc_parent->dk_wedges */ 112 /* R: sc_parent->dk_rawopens */ 113 /* R: sc_parent->dk_rawvp (also stable while wedge is open) */ 114 daddr_t sc_offset; /* $: LBA offset of wedge in parent */ 115 krwlock_t sc_sizelock; 116 uint64_t sc_size; /* S: size of wedge in blocks */ 117 char sc_ptype[32]; /* $: partition type */ 118 dev_t sc_pdev; /* $: cached parent's dev_t */ 119 /* P: link on parent's wedge list */ 120 LIST_ENTRY(dkwedge_softc) sc_plink; 121 122 struct disk sc_dk; /* our own disk structure */ 123 /* O&R: sc_dk.dk_bopenmask */ 124 /* O&R: sc_dk.dk_copenmask */ 125 /* O&R: sc_dk.dk_openmask */ 126 struct bufq_state *sc_bufq; /* $: buffer queue */ 127 struct callout sc_restart_ch; /* I: callout to restart I/O */ 128 129 kmutex_t sc_iolock; 130 bool sc_iostop; /* I: don't schedule restart */ 131 int sc_mode; /* O&R: parent open mode */ 132}; 133 134static int dkwedge_match(device_t, cfdata_t, void *); 135static void dkwedge_attach(device_t, device_t, void *); 136static int dkwedge_detach(device_t, int); 137 138static void dk_set_geometry(struct dkwedge_softc *, struct disk *); 139 140static void dkstart(struct dkwedge_softc *); 141static void dkiodone(struct buf *); 142static void dkrestart(void *); 143static void dkminphys(struct buf *); 144 145static int dkfirstopen(struct dkwedge_softc *, int); 146static void dklastclose(struct dkwedge_softc *); 147static int dkwedge_detach(device_t, int); 148static void dkwedge_delall1(struct disk *, bool); 149static int dkwedge_del1(struct dkwedge_info *, int); 150static int dk_open_parent(dev_t, int, struct vnode **); 151static int dk_close_parent(struct vnode *, int); 152 153static dev_type_open(dkopen); 154static dev_type_close(dkclose); 155static dev_type_cancel(dkcancel); 156static dev_type_read(dkread); 157static dev_type_write(dkwrite); 158static dev_type_ioctl(dkioctl); 159static dev_type_strategy(dkstrategy); 160static dev_type_dump(dkdump); 161static dev_type_size(dksize); 162static dev_type_discard(dkdiscard); 163 164CFDRIVER_DECL(dk, DV_DISK, NULL); 165CFATTACH_DECL3_NEW(dk, 0, 166 dkwedge_match, dkwedge_attach, dkwedge_detach, NULL, NULL, NULL, 167 DVF_DETACH_SHUTDOWN); 168 169const struct bdevsw dk_bdevsw = { 170 .d_open = dkopen, 171 .d_close = dkclose, 172 .d_cancel = dkcancel, 173 .d_strategy = dkstrategy, 174 .d_ioctl = dkioctl, 175 .d_dump = dkdump, 176 .d_psize = dksize, 177 .d_discard = dkdiscard, 178 .d_cfdriver = &dk_cd, 179 .d_devtounit = dev_minor_unit, 180 .d_flag = D_DISK | D_MPSAFE 181}; 182 183const struct cdevsw dk_cdevsw = { 184 .d_open = dkopen, 185 .d_close = dkclose, 186 .d_cancel = dkcancel, 187 .d_read = dkread, 188 .d_write = dkwrite, 189 .d_ioctl = dkioctl, 190 .d_stop = nostop, 191 .d_tty = notty, 192 .d_poll = nopoll, 193 .d_mmap = nommap, 194 .d_kqfilter = nokqfilter, 195 .d_discard = dkdiscard, 196 .d_cfdriver = &dk_cd, 197 .d_devtounit = dev_minor_unit, 198 .d_flag = D_DISK | D_MPSAFE 199}; 200 201static struct dkwedge_softc **dkwedges; 202static u_int ndkwedges; 203static krwlock_t dkwedges_lock; 204 205static LIST_HEAD(, dkwedge_discovery_method) dkwedge_discovery_methods; 206static krwlock_t dkwedge_discovery_methods_lock; 207 208/* 209 * dkwedge_match: 210 * 211 * Autoconfiguration match function for pseudo-device glue. 212 */ 213static int 214dkwedge_match(device_t parent, cfdata_t match, void *aux) 215{ 216 217 /* Pseudo-device; always present. */ 218 return 1; 219} 220 221/* 222 * dkwedge_attach: 223 * 224 * Autoconfiguration attach function for pseudo-device glue. 225 */ 226static void 227dkwedge_attach(device_t parent, device_t self, void *aux) 228{ 229 struct dkwedge_softc *sc = aux; 230 struct disk *pdk = sc->sc_parent; 231 int unit = device_unit(self); 232 233 KASSERTMSG(unit >= 0, "unit=%d", unit); 234 235 if (!pmf_device_register(self, NULL, NULL)) 236 aprint_error_dev(self, "couldn't establish power handler\n"); 237 238 mutex_enter(&pdk->dk_openlock); 239 rw_enter(&dkwedges_lock, RW_WRITER); 240 KASSERTMSG(unit < ndkwedges, "unit=%d ndkwedges=%u", unit, ndkwedges); 241 KASSERTMSG(sc == dkwedges[unit], "sc=%p dkwedges[%d]=%p", 242 sc, unit, dkwedges[unit]); 243 KASSERTMSG(sc->sc_dev == NULL, "sc=%p sc->sc_dev=%p", sc, sc->sc_dev); 244 sc->sc_dev = self; 245 rw_exit(&dkwedges_lock); 246 mutex_exit(&pdk->dk_openlock); 247 248 disk_init(&sc->sc_dk, device_xname(sc->sc_dev), NULL); 249 mutex_enter(&pdk->dk_openlock); 250 dk_set_geometry(sc, pdk); 251 mutex_exit(&pdk->dk_openlock); 252 disk_attach(&sc->sc_dk); 253 254 /* Disk wedge is ready for use! */ 255 device_set_private(self, sc); 256 sc->sc_state = DKW_STATE_RUNNING; 257} 258 259/* 260 * dkwedge_compute_pdev: 261 * 262 * Compute the parent disk's dev_t. 263 */ 264static int 265dkwedge_compute_pdev(const char *pname, dev_t *pdevp, enum vtype type) 266{ 267 const char *name, *cp; 268 devmajor_t pmaj; 269 int punit; 270 char devname[16]; 271 272 name = pname; 273 switch (type) { 274 case VBLK: 275 pmaj = devsw_name2blk(name, devname, sizeof(devname)); 276 break; 277 case VCHR: 278 pmaj = devsw_name2chr(name, devname, sizeof(devname)); 279 break; 280 default: 281 pmaj = NODEVMAJOR; 282 break; 283 } 284 if (pmaj == NODEVMAJOR) 285 return ENXIO; 286 287 name += strlen(devname); 288 for (cp = name, punit = 0; *cp >= '0' && *cp <= '9'; cp++) 289 punit = (punit * 10) + (*cp - '0'); 290 if (cp == name) { 291 /* Invalid parent disk name. */ 292 return ENXIO; 293 } 294 295 *pdevp = MAKEDISKDEV(pmaj, punit, RAW_PART); 296 297 return 0; 298} 299 300/* 301 * dkwedge_array_expand: 302 * 303 * Expand the dkwedges array. 304 * 305 * Releases and reacquires dkwedges_lock as a writer. 306 */ 307static int 308dkwedge_array_expand(void) 309{ 310 311 const unsigned incr = 16; 312 unsigned newcnt, oldcnt; 313 struct dkwedge_softc **newarray = NULL, **oldarray = NULL; 314 315 KASSERT(rw_write_held(&dkwedges_lock)); 316 317 oldcnt = ndkwedges; 318 oldarray = dkwedges; 319 320 if (oldcnt >= INT_MAX - incr) 321 return ENFILE; /* XXX */ 322 newcnt = oldcnt + incr; 323 324 rw_exit(&dkwedges_lock); 325 newarray = malloc(newcnt * sizeof(*newarray), M_DKWEDGE, 326 M_WAITOK|M_ZERO); 327 rw_enter(&dkwedges_lock, RW_WRITER); 328 329 if (ndkwedges != oldcnt || dkwedges != oldarray) { 330 oldarray = NULL; /* already recycled */ 331 goto out; 332 } 333 334 if (oldarray != NULL) 335 memcpy(newarray, dkwedges, ndkwedges * sizeof(*newarray)); 336 dkwedges = newarray; 337 newarray = NULL; /* transferred to dkwedges */ 338 ndkwedges = newcnt; 339 340out: rw_exit(&dkwedges_lock); 341 if (oldarray != NULL) 342 free(oldarray, M_DKWEDGE); 343 if (newarray != NULL) 344 free(newarray, M_DKWEDGE); 345 rw_enter(&dkwedges_lock, RW_WRITER); 346 return 0; 347} 348 349static void 350dkwedge_size_init(struct dkwedge_softc *sc, uint64_t size) 351{ 352 353 rw_init(&sc->sc_sizelock); 354 sc->sc_size = size; 355} 356 357static void 358dkwedge_size_fini(struct dkwedge_softc *sc) 359{ 360 361 rw_destroy(&sc->sc_sizelock); 362} 363 364static uint64_t 365dkwedge_size(struct dkwedge_softc *sc) 366{ 367 uint64_t size; 368 369 rw_enter(&sc->sc_sizelock, RW_READER); 370 size = sc->sc_size; 371 rw_exit(&sc->sc_sizelock); 372 373 return size; 374} 375 376static void 377dkwedge_size_increase(struct dkwedge_softc *sc, uint64_t size) 378{ 379 380 KASSERT(mutex_owned(&sc->sc_parent->dk_openlock)); 381 382 rw_enter(&sc->sc_sizelock, RW_WRITER); 383 KASSERTMSG(size >= sc->sc_size, 384 "decreasing dkwedge size from %"PRIu64" to %"PRIu64, 385 sc->sc_size, size); 386 sc->sc_size = size; 387 rw_exit(&sc->sc_sizelock); 388} 389 390static void 391dk_set_geometry(struct dkwedge_softc *sc, struct disk *pdk) 392{ 393 struct disk *dk = &sc->sc_dk; 394 struct disk_geom *dg = &dk->dk_geom; 395 396 KASSERT(mutex_owned(&pdk->dk_openlock)); 397 398 memset(dg, 0, sizeof(*dg)); 399 400 dg->dg_secperunit = dkwedge_size(sc); 401 dg->dg_secsize = DEV_BSIZE << pdk->dk_blkshift; 402 403 /* fake numbers, 1 cylinder is 1 MB with default sector size */ 404 dg->dg_nsectors = 32; 405 dg->dg_ntracks = 64; 406 dg->dg_ncylinders = 407 dg->dg_secperunit / (dg->dg_nsectors * dg->dg_ntracks); 408 409 disk_set_info(sc->sc_dev, dk, NULL); 410} 411 412/* 413 * dkwedge_add: [exported function] 414 * 415 * Add a disk wedge based on the provided information. 416 * 417 * The incoming dkw_devname[] is ignored, instead being 418 * filled in and returned to the caller. 419 */ 420int 421dkwedge_add(struct dkwedge_info *dkw) 422{ 423 struct dkwedge_softc *sc, *lsc; 424 struct disk *pdk; 425 u_int unit; 426 int error; 427 dev_t pdev; 428 device_t dev __diagused; 429 430 dkw->dkw_parent[sizeof(dkw->dkw_parent) - 1] = '\0'; 431 pdk = disk_find(dkw->dkw_parent); 432 if (pdk == NULL) 433 return ENXIO; 434 435 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VBLK); 436 if (error) 437 return error; 438 439 if (dkw->dkw_offset < 0) 440 return EINVAL; 441 442 /* 443 * Check for an existing wedge at the same disk offset. Allow 444 * updating a wedge if the only change is the size, and the new 445 * size is larger than the old. 446 */ 447 sc = NULL; 448 mutex_enter(&pdk->dk_openlock); 449 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) { 450 if (lsc->sc_offset != dkw->dkw_offset) 451 continue; 452 if (strcmp(lsc->sc_wname, dkw->dkw_wname) != 0) 453 break; 454 if (strcmp(lsc->sc_ptype, dkw->dkw_ptype) != 0) 455 break; 456 if (dkwedge_size(lsc) > dkw->dkw_size) 457 break; 458 if (lsc->sc_dev == NULL) 459 break; 460 461 sc = lsc; 462 device_acquire(sc->sc_dev); 463 dkwedge_size_increase(sc, dkw->dkw_size); 464 dk_set_geometry(sc, pdk); 465 466 break; 467 } 468 mutex_exit(&pdk->dk_openlock); 469 470 if (sc != NULL) 471 goto announce; 472 473 sc = malloc(sizeof(*sc), M_DKWEDGE, M_WAITOK|M_ZERO); 474 sc->sc_state = DKW_STATE_LARVAL; 475 sc->sc_parent = pdk; 476 sc->sc_pdev = pdev; 477 sc->sc_offset = dkw->dkw_offset; 478 dkwedge_size_init(sc, dkw->dkw_size); 479 480 memcpy(sc->sc_wname, dkw->dkw_wname, sizeof(sc->sc_wname)); 481 sc->sc_wname[sizeof(sc->sc_wname) - 1] = '\0'; 482 483 memcpy(sc->sc_ptype, dkw->dkw_ptype, sizeof(sc->sc_ptype)); 484 sc->sc_ptype[sizeof(sc->sc_ptype) - 1] = '\0'; 485 486 bufq_alloc(&sc->sc_bufq, "fcfs", 0); 487 488 callout_init(&sc->sc_restart_ch, 0); 489 callout_setfunc(&sc->sc_restart_ch, dkrestart, sc); 490 491 mutex_init(&sc->sc_iolock, MUTEX_DEFAULT, IPL_BIO); 492 493 /* 494 * Wedge will be added; increment the wedge count for the parent. 495 * Only allow this to happen if RAW_PART is the only thing open. 496 */ 497 mutex_enter(&pdk->dk_openlock); 498 if (pdk->dk_openmask & ~(1 << RAW_PART)) 499 error = EBUSY; 500 else { 501 /* Check for wedge overlap. */ 502 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) { 503 /* XXX arithmetic overflow */ 504 uint64_t size = dkwedge_size(sc); 505 uint64_t lsize = dkwedge_size(lsc); 506 daddr_t lastblk = sc->sc_offset + size - 1; 507 daddr_t llastblk = lsc->sc_offset + lsize - 1; 508 509 if (sc->sc_offset >= lsc->sc_offset && 510 sc->sc_offset <= llastblk) { 511 /* Overlaps the tail of the existing wedge. */ 512 break; 513 } 514 if (lastblk >= lsc->sc_offset && 515 lastblk <= llastblk) { 516 /* Overlaps the head of the existing wedge. */ 517 break; 518 } 519 } 520 if (lsc != NULL) { 521 if (sc->sc_offset == lsc->sc_offset && 522 dkwedge_size(sc) == dkwedge_size(lsc) && 523 strcmp(sc->sc_wname, lsc->sc_wname) == 0) 524 error = EEXIST; 525 else 526 error = EINVAL; 527 } else { 528 pdk->dk_nwedges++; 529 LIST_INSERT_HEAD(&pdk->dk_wedges, sc, sc_plink); 530 } 531 } 532 mutex_exit(&pdk->dk_openlock); 533 if (error) { 534 mutex_destroy(&sc->sc_iolock); 535 bufq_free(sc->sc_bufq); 536 dkwedge_size_fini(sc); 537 free(sc, M_DKWEDGE); 538 return error; 539 } 540 541 /* Fill in our cfdata for the pseudo-device glue. */ 542 sc->sc_cfdata.cf_name = dk_cd.cd_name; 543 sc->sc_cfdata.cf_atname = dk_ca.ca_name; 544 /* sc->sc_cfdata.cf_unit set below */ 545 sc->sc_cfdata.cf_fstate = FSTATE_NOTFOUND; /* use chosen cf_unit */ 546 547 /* Insert the larval wedge into the array. */ 548 rw_enter(&dkwedges_lock, RW_WRITER); 549 for (error = 0;;) { 550 struct dkwedge_softc **scpp; 551 552 /* 553 * Check for a duplicate wname while searching for 554 * a slot. 555 */ 556 for (scpp = NULL, unit = 0; unit < ndkwedges; unit++) { 557 if (dkwedges[unit] == NULL) { 558 if (scpp == NULL) { 559 scpp = &dkwedges[unit]; 560 sc->sc_cfdata.cf_unit = unit; 561 } 562 } else { 563 /* XXX Unicode. */ 564 if (strcmp(dkwedges[unit]->sc_wname, 565 sc->sc_wname) == 0) { 566 error = EEXIST; 567 break; 568 } 569 } 570 } 571 if (error) 572 break; 573 KASSERT(unit == ndkwedges); 574 if (scpp == NULL) { 575 error = dkwedge_array_expand(); 576 if (error) 577 break; 578 } else { 579 KASSERT(scpp == &dkwedges[sc->sc_cfdata.cf_unit]); 580 *scpp = sc; 581 break; 582 } 583 } 584 rw_exit(&dkwedges_lock); 585 if (error) { 586 mutex_enter(&pdk->dk_openlock); 587 pdk->dk_nwedges--; 588 LIST_REMOVE(sc, sc_plink); 589 mutex_exit(&pdk->dk_openlock); 590 591 mutex_destroy(&sc->sc_iolock); 592 bufq_free(sc->sc_bufq); 593 dkwedge_size_fini(sc); 594 free(sc, M_DKWEDGE); 595 return error; 596 } 597 598 /* 599 * Now that we know the unit #, attach a pseudo-device for 600 * this wedge instance. This will provide us with the 601 * device_t necessary for glue to other parts of the system. 602 * 603 * This should never fail, unless we're almost totally out of 604 * memory. 605 */ 606 if ((dev = config_attach_pseudo_acquire(&sc->sc_cfdata, sc)) == NULL) { 607 aprint_error("%s%u: unable to attach pseudo-device\n", 608 sc->sc_cfdata.cf_name, sc->sc_cfdata.cf_unit); 609 610 rw_enter(&dkwedges_lock, RW_WRITER); 611 KASSERT(dkwedges[sc->sc_cfdata.cf_unit] == sc); 612 dkwedges[sc->sc_cfdata.cf_unit] = NULL; 613 rw_exit(&dkwedges_lock); 614 615 mutex_enter(&pdk->dk_openlock); 616 pdk->dk_nwedges--; 617 LIST_REMOVE(sc, sc_plink); 618 mutex_exit(&pdk->dk_openlock); 619 620 mutex_destroy(&sc->sc_iolock); 621 bufq_free(sc->sc_bufq); 622 dkwedge_size_fini(sc); 623 free(sc, M_DKWEDGE); 624 return ENOMEM; 625 } 626 627 KASSERT(dev == sc->sc_dev); 628 629announce: 630 /* Announce our arrival. */ 631 aprint_normal( 632 "%s at %s: \"%s\", %"PRIu64" blocks at %"PRId64", type: %s\n", 633 device_xname(sc->sc_dev), pdk->dk_name, 634 sc->sc_wname, /* XXX Unicode */ 635 dkwedge_size(sc), sc->sc_offset, 636 sc->sc_ptype[0] == '\0' ? "<unknown>" : sc->sc_ptype); 637 638 /* Return the devname to the caller. */ 639 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev), 640 sizeof(dkw->dkw_devname)); 641 642 device_release(sc->sc_dev); 643 return 0; 644} 645 646/* 647 * dkwedge_find_acquire: 648 * 649 * Lookup a disk wedge based on the provided information. 650 * NOTE: We look up the wedge based on the wedge devname, 651 * not wname. 652 * 653 * Return NULL if the wedge is not found, otherwise return 654 * the wedge's softc. Assign the wedge's unit number to unitp 655 * if unitp is not NULL. The wedge's sc_dev is referenced and 656 * must be released by device_release or equivalent. 657 */ 658static struct dkwedge_softc * 659dkwedge_find_acquire(struct dkwedge_info *dkw, u_int *unitp) 660{ 661 struct dkwedge_softc *sc = NULL; 662 u_int unit; 663 664 /* Find our softc. */ 665 dkw->dkw_devname[sizeof(dkw->dkw_devname) - 1] = '\0'; 666 rw_enter(&dkwedges_lock, RW_READER); 667 for (unit = 0; unit < ndkwedges; unit++) { 668 if ((sc = dkwedges[unit]) != NULL && 669 sc->sc_dev != NULL && 670 strcmp(device_xname(sc->sc_dev), dkw->dkw_devname) == 0 && 671 strcmp(sc->sc_parent->dk_name, dkw->dkw_parent) == 0) { 672 device_acquire(sc->sc_dev); 673 break; 674 } 675 } 676 rw_exit(&dkwedges_lock); 677 if (sc == NULL) 678 return NULL; 679 680 if (unitp != NULL) 681 *unitp = unit; 682 683 return sc; 684} 685 686/* 687 * dkwedge_del: [exported function] 688 * 689 * Delete a disk wedge based on the provided information. 690 * NOTE: We look up the wedge based on the wedge devname, 691 * not wname. 692 */ 693int 694dkwedge_del(struct dkwedge_info *dkw) 695{ 696 697 return dkwedge_del1(dkw, 0); 698} 699 700int 701dkwedge_del1(struct dkwedge_info *dkw, int flags) 702{ 703 struct dkwedge_softc *sc = NULL; 704 705 /* Find our softc. */ 706 if ((sc = dkwedge_find_acquire(dkw, NULL)) == NULL) 707 return ESRCH; 708 709 return config_detach_release(sc->sc_dev, flags); 710} 711 712/* 713 * dkwedge_detach: 714 * 715 * Autoconfiguration detach function for pseudo-device glue. 716 */ 717static int 718dkwedge_detach(device_t self, int flags) 719{ 720 struct dkwedge_softc *const sc = device_private(self); 721 const u_int unit = device_unit(self); 722 int bmaj, cmaj, error; 723 724 error = disk_begindetach(&sc->sc_dk, /*lastclose*/NULL, self, flags); 725 if (error) 726 return error; 727 728 /* Mark the wedge as dying. */ 729 sc->sc_state = DKW_STATE_DYING; 730 731 pmf_device_deregister(self); 732 733 /* Kill any pending restart. */ 734 mutex_enter(&sc->sc_iolock); 735 sc->sc_iostop = true; 736 mutex_exit(&sc->sc_iolock); 737 callout_halt(&sc->sc_restart_ch, NULL); 738 739 /* Locate the wedge major numbers. */ 740 bmaj = bdevsw_lookup_major(&dk_bdevsw); 741 cmaj = cdevsw_lookup_major(&dk_cdevsw); 742 743 /* Nuke the vnodes for any open instances. */ 744 vdevgone(bmaj, unit, unit, VBLK); 745 vdevgone(cmaj, unit, unit, VCHR); 746 747 /* 748 * At this point, all block device opens have been closed, 749 * synchronously flushing any buffered writes; and all 750 * character device I/O operations have completed 751 * synchronously, and character device opens have been closed. 752 * 753 * So there can be no more opens or queued buffers by now. 754 */ 755 KASSERT(sc->sc_dk.dk_openmask == 0); 756 KASSERT(bufq_peek(sc->sc_bufq) == NULL); 757 bufq_drain(sc->sc_bufq); 758 759 /* Announce our departure. */ 760 aprint_normal("%s at %s (%s) deleted\n", device_xname(sc->sc_dev), 761 sc->sc_parent->dk_name, 762 sc->sc_wname); /* XXX Unicode */ 763 764 mutex_enter(&sc->sc_parent->dk_openlock); 765 sc->sc_parent->dk_nwedges--; 766 LIST_REMOVE(sc, sc_plink); 767 mutex_exit(&sc->sc_parent->dk_openlock); 768 769 /* Delete our buffer queue. */ 770 bufq_free(sc->sc_bufq); 771 772 /* Detach from the disk list. */ 773 disk_detach(&sc->sc_dk); 774 disk_destroy(&sc->sc_dk); 775 776 /* Poof. */ 777 rw_enter(&dkwedges_lock, RW_WRITER); 778 KASSERT(dkwedges[unit] == sc); 779 dkwedges[unit] = NULL; 780 sc->sc_state = DKW_STATE_DEAD; 781 rw_exit(&dkwedges_lock); 782 783 mutex_destroy(&sc->sc_iolock); 784 dkwedge_size_fini(sc); 785 786 free(sc, M_DKWEDGE); 787 788 return 0; 789} 790 791/* 792 * dkwedge_delall: [exported function] 793 * 794 * Forcibly delete all of the wedges on the specified disk. Used 795 * when a disk is being detached. 796 */ 797void 798dkwedge_delall(struct disk *pdk) 799{ 800 801 dkwedge_delall1(pdk, /*idleonly*/false); 802} 803 804/* 805 * dkwedge_delidle: [exported function] 806 * 807 * Delete all of the wedges on the specified disk if idle. Used 808 * by ioctl(DIOCRMWEDGES). 809 */ 810void 811dkwedge_delidle(struct disk *pdk) 812{ 813 814 dkwedge_delall1(pdk, /*idleonly*/true); 815} 816 817static void 818dkwedge_delall1(struct disk *pdk, bool idleonly) 819{ 820 struct dkwedge_softc *sc; 821 int flags; 822 823 flags = DETACH_QUIET; 824 if (!idleonly) 825 flags |= DETACH_FORCE; 826 827 for (;;) { 828 mutex_enter(&pdk->dk_rawlock); /* for sc->sc_dk.dk_openmask */ 829 mutex_enter(&pdk->dk_openlock); 830 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) { 831 /* 832 * Wedge is not yet created. This is a race -- 833 * it may as well have been added just after we 834 * deleted all the wedges, so pretend it's not 835 * here yet. 836 */ 837 if (sc->sc_dev == NULL) 838 continue; 839 if (!idleonly || sc->sc_dk.dk_openmask == 0) { 840 device_acquire(sc->sc_dev); 841 break; 842 } 843 } 844 if (sc == NULL) { 845 KASSERT(idleonly || pdk->dk_nwedges == 0); 846 mutex_exit(&pdk->dk_openlock); 847 mutex_exit(&pdk->dk_rawlock); 848 return; 849 } 850 mutex_exit(&pdk->dk_openlock); 851 mutex_exit(&pdk->dk_rawlock); 852 (void)config_detach_release(sc->sc_dev, flags); 853 } 854} 855 856/* 857 * dkwedge_list: [exported function] 858 * 859 * List all of the wedges on a particular disk. 860 */ 861int 862dkwedge_list(struct disk *pdk, struct dkwedge_list *dkwl, struct lwp *l) 863{ 864 struct uio uio; 865 struct iovec iov; 866 struct dkwedge_softc *sc; 867 struct dkwedge_info dkw; 868 int error = 0; 869 870 iov.iov_base = dkwl->dkwl_buf; 871 iov.iov_len = dkwl->dkwl_bufsize; 872 873 uio.uio_iov = &iov; 874 uio.uio_iovcnt = 1; 875 uio.uio_offset = 0; 876 uio.uio_resid = dkwl->dkwl_bufsize; 877 uio.uio_rw = UIO_READ; 878 KASSERT(l == curlwp); 879 uio.uio_vmspace = l->l_proc->p_vmspace; 880 881 dkwl->dkwl_ncopied = 0; 882 883 mutex_enter(&pdk->dk_openlock); 884 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) { 885 if (uio.uio_resid < sizeof(dkw)) 886 break; 887 888 if (sc->sc_dev == NULL) 889 continue; 890 891 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev), 892 sizeof(dkw.dkw_devname)); 893 memcpy(dkw.dkw_wname, sc->sc_wname, sizeof(dkw.dkw_wname)); 894 dkw.dkw_wname[sizeof(dkw.dkw_wname) - 1] = '\0'; 895 strlcpy(dkw.dkw_parent, sc->sc_parent->dk_name, 896 sizeof(dkw.dkw_parent)); 897 dkw.dkw_offset = sc->sc_offset; 898 dkw.dkw_size = dkwedge_size(sc); 899 strlcpy(dkw.dkw_ptype, sc->sc_ptype, sizeof(dkw.dkw_ptype)); 900 901 /* 902 * Acquire a device reference so this wedge doesn't go 903 * away before our next iteration in LIST_FOREACH, and 904 * then release the lock for uiomove. 905 */ 906 device_acquire(sc->sc_dev); 907 mutex_exit(&pdk->dk_openlock); 908 error = uiomove(&dkw, sizeof(dkw), &uio); 909 mutex_enter(&pdk->dk_openlock); 910 device_release(sc->sc_dev); 911 if (error) 912 break; 913 914 dkwl->dkwl_ncopied++; 915 } 916 dkwl->dkwl_nwedges = pdk->dk_nwedges; 917 mutex_exit(&pdk->dk_openlock); 918 919 return error; 920} 921 922static device_t 923dkwedge_find_by_wname_acquire(const char *wname) 924{ 925 device_t dv = NULL; 926 struct dkwedge_softc *sc; 927 int i; 928 929 rw_enter(&dkwedges_lock, RW_READER); 930 for (i = 0; i < ndkwedges; i++) { 931 if ((sc = dkwedges[i]) == NULL || sc->sc_dev == NULL) 932 continue; 933 if (strcmp(sc->sc_wname, wname) == 0) { 934 if (dv != NULL) { 935 printf( 936 "WARNING: double match for wedge name %s " 937 "(%s, %s)\n", wname, device_xname(dv), 938 device_xname(sc->sc_dev)); 939 continue; 940 } 941 device_acquire(sc->sc_dev); 942 dv = sc->sc_dev; 943 } 944 } 945 rw_exit(&dkwedges_lock); 946 return dv; 947} 948 949static device_t 950dkwedge_find_by_parent_acquire(const char *name, size_t *i) 951{ 952 953 rw_enter(&dkwedges_lock, RW_READER); 954 for (; *i < (size_t)ndkwedges; (*i)++) { 955 struct dkwedge_softc *sc; 956 if ((sc = dkwedges[*i]) == NULL || sc->sc_dev == NULL) 957 continue; 958 if (strcmp(sc->sc_parent->dk_name, name) != 0) 959 continue; 960 device_acquire(sc->sc_dev); 961 rw_exit(&dkwedges_lock); 962 return sc->sc_dev; 963 } 964 rw_exit(&dkwedges_lock); 965 return NULL; 966} 967 968/* XXX unsafe */ 969device_t 970dkwedge_find_by_wname(const char *wname) 971{ 972 device_t dv; 973 974 if ((dv = dkwedge_find_by_wname_acquire(wname)) == NULL) 975 return NULL; 976 device_release(dv); 977 return dv; 978} 979 980/* XXX unsafe */ 981device_t 982dkwedge_find_by_parent(const char *name, size_t *i) 983{ 984 device_t dv; 985 986 if ((dv = dkwedge_find_by_parent_acquire(name, i)) == NULL) 987 return NULL; 988 device_release(dv); 989 return dv; 990} 991 992void 993dkwedge_print_wnames(void) 994{ 995 struct dkwedge_softc *sc; 996 int i; 997 998 rw_enter(&dkwedges_lock, RW_READER); 999 for (i = 0; i < ndkwedges; i++) { 1000 if ((sc = dkwedges[i]) == NULL || sc->sc_dev == NULL) 1001 continue; 1002 printf(" wedge:%s", sc->sc_wname); 1003 } 1004 rw_exit(&dkwedges_lock); 1005} 1006 1007/* 1008 * We need a dummy object to stuff into the dkwedge discovery method link 1009 * set to ensure that there is always at least one object in the set. 1010 */ 1011static struct dkwedge_discovery_method dummy_discovery_method; 1012__link_set_add_bss(dkwedge_methods, dummy_discovery_method); 1013 1014/* 1015 * dkwedge_init: 1016 * 1017 * Initialize the disk wedge subsystem. 1018 */ 1019void 1020dkwedge_init(void) 1021{ 1022 __link_set_decl(dkwedge_methods, struct dkwedge_discovery_method); 1023 struct dkwedge_discovery_method * const *ddmp; 1024 struct dkwedge_discovery_method *lddm, *ddm; 1025 1026 rw_init(&dkwedges_lock); 1027 rw_init(&dkwedge_discovery_methods_lock); 1028 1029 if (config_cfdriver_attach(&dk_cd) != 0) 1030 panic("dkwedge: unable to attach cfdriver"); 1031 if (config_cfattach_attach(dk_cd.cd_name, &dk_ca) != 0) 1032 panic("dkwedge: unable to attach cfattach"); 1033 1034 rw_enter(&dkwedge_discovery_methods_lock, RW_WRITER); 1035 1036 LIST_INIT(&dkwedge_discovery_methods); 1037 1038 __link_set_foreach(ddmp, dkwedge_methods) { 1039 ddm = *ddmp; 1040 if (ddm == &dummy_discovery_method) 1041 continue; 1042 if (LIST_EMPTY(&dkwedge_discovery_methods)) { 1043 LIST_INSERT_HEAD(&dkwedge_discovery_methods, 1044 ddm, ddm_list); 1045 continue; 1046 } 1047 LIST_FOREACH(lddm, &dkwedge_discovery_methods, ddm_list) { 1048 if (ddm->ddm_priority == lddm->ddm_priority) { 1049 aprint_error("dk-method-%s: method \"%s\" " 1050 "already exists at priority %d\n", 1051 ddm->ddm_name, lddm->ddm_name, 1052 lddm->ddm_priority); 1053 /* Not inserted. */ 1054 break; 1055 } 1056 if (ddm->ddm_priority < lddm->ddm_priority) { 1057 /* Higher priority; insert before. */ 1058 LIST_INSERT_BEFORE(lddm, ddm, ddm_list); 1059 break; 1060 } 1061 if (LIST_NEXT(lddm, ddm_list) == NULL) { 1062 /* Last one; insert after. */ 1063 KASSERT(lddm->ddm_priority < ddm->ddm_priority); 1064 LIST_INSERT_AFTER(lddm, ddm, ddm_list); 1065 break; 1066 } 1067 } 1068 } 1069 1070 rw_exit(&dkwedge_discovery_methods_lock); 1071} 1072 1073#ifdef DKWEDGE_AUTODISCOVER 1074int dkwedge_autodiscover = 1; 1075#else 1076int dkwedge_autodiscover = 0; 1077#endif 1078 1079/* 1080 * dkwedge_discover: [exported function] 1081 * 1082 * Discover the wedges on a newly attached disk. 1083 * Remove all unused wedges on the disk first. 1084 */ 1085void 1086dkwedge_discover(struct disk *pdk) 1087{ 1088 struct dkwedge_discovery_method *ddm; 1089 struct vnode *vp; 1090 int error; 1091 dev_t pdev; 1092 1093 /* 1094 * Require people playing with wedges to enable this explicitly. 1095 */ 1096 if (dkwedge_autodiscover == 0) 1097 return; 1098 1099 rw_enter(&dkwedge_discovery_methods_lock, RW_READER); 1100 1101 /* 1102 * Use the character device for scanning, the block device 1103 * is busy if there are already wedges attached. 1104 */ 1105 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VCHR); 1106 if (error) { 1107 aprint_error("%s: unable to compute pdev, error = %d\n", 1108 pdk->dk_name, error); 1109 goto out; 1110 } 1111 1112 error = cdevvp(pdev, &vp); 1113 if (error) { 1114 aprint_error("%s: unable to find vnode for pdev, error = %d\n", 1115 pdk->dk_name, error); 1116 goto out; 1117 } 1118 1119 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1120 if (error) { 1121 aprint_error("%s: unable to lock vnode for pdev, error = %d\n", 1122 pdk->dk_name, error); 1123 vrele(vp); 1124 goto out; 1125 } 1126 1127 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED); 1128 if (error) { 1129 if (error != ENXIO) 1130 aprint_error("%s: unable to open device, error = %d\n", 1131 pdk->dk_name, error); 1132 vput(vp); 1133 goto out; 1134 } 1135 VOP_UNLOCK(vp); 1136 1137 /* 1138 * Remove unused wedges 1139 */ 1140 dkwedge_delidle(pdk); 1141 1142 /* 1143 * For each supported partition map type, look to see if 1144 * this map type exists. If so, parse it and add the 1145 * corresponding wedges. 1146 */ 1147 LIST_FOREACH(ddm, &dkwedge_discovery_methods, ddm_list) { 1148 error = (*ddm->ddm_discover)(pdk, vp); 1149 if (error == 0) { 1150 /* Successfully created wedges; we're done. */ 1151 break; 1152 } 1153 } 1154 1155 error = vn_close(vp, FREAD, NOCRED); 1156 if (error) { 1157 aprint_error("%s: unable to close device, error = %d\n", 1158 pdk->dk_name, error); 1159 /* We'll just assume the vnode has been cleaned up. */ 1160 } 1161 1162out: 1163 rw_exit(&dkwedge_discovery_methods_lock); 1164} 1165 1166/* 1167 * dkwedge_read: 1168 * 1169 * Read some data from the specified disk, used for 1170 * partition discovery. 1171 */ 1172int 1173dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno, 1174 void *tbuf, size_t len) 1175{ 1176 buf_t *bp; 1177 int error; 1178 bool isopen; 1179 dev_t bdev; 1180 struct vnode *bdvp; 1181 1182 /* 1183 * The kernel cannot read from a character device vnode 1184 * as physio() only handles user memory. 1185 * 1186 * If the block device has already been opened by a wedge 1187 * use that vnode and temporarily bump the open counter. 1188 * 1189 * Otherwise try to open the block device. 1190 */ 1191 1192 bdev = devsw_chr2blk(vp->v_rdev); 1193 1194 mutex_enter(&pdk->dk_rawlock); 1195 if (pdk->dk_rawopens != 0) { 1196 KASSERT(pdk->dk_rawvp != NULL); 1197 isopen = true; 1198 ++pdk->dk_rawopens; 1199 bdvp = pdk->dk_rawvp; 1200 error = 0; 1201 } else { 1202 isopen = false; 1203 error = dk_open_parent(bdev, FREAD, &bdvp); 1204 } 1205 mutex_exit(&pdk->dk_rawlock); 1206 1207 if (error) 1208 return error; 1209 1210 bp = getiobuf(bdvp, true); 1211 bp->b_flags = B_READ; 1212 bp->b_cflags = BC_BUSY; 1213 bp->b_dev = bdev; 1214 bp->b_data = tbuf; 1215 bp->b_bufsize = bp->b_bcount = len; 1216 bp->b_blkno = blkno; 1217 bp->b_cylinder = 0; 1218 bp->b_error = 0; 1219 1220 VOP_STRATEGY(bdvp, bp); 1221 error = biowait(bp); 1222 putiobuf(bp); 1223 1224 mutex_enter(&pdk->dk_rawlock); 1225 if (isopen) { 1226 --pdk->dk_rawopens; 1227 } else { 1228 dk_close_parent(bdvp, FREAD); 1229 } 1230 mutex_exit(&pdk->dk_rawlock); 1231 1232 return error; 1233} 1234 1235/* 1236 * dkwedge_lookup: 1237 * 1238 * Look up a dkwedge_softc based on the provided dev_t. 1239 * 1240 * Caller must guarantee the wedge is referenced. 1241 */ 1242static struct dkwedge_softc * 1243dkwedge_lookup(dev_t dev) 1244{ 1245 1246 return device_lookup_private(&dk_cd, minor(dev)); 1247} 1248 1249static struct dkwedge_softc * 1250dkwedge_lookup_acquire(dev_t dev) 1251{ 1252 device_t dv = device_lookup_acquire(&dk_cd, minor(dev)); 1253 1254 if (dv == NULL) 1255 return NULL; 1256 return device_private(dv); 1257} 1258 1259static int 1260dk_open_parent(dev_t dev, int mode, struct vnode **vpp) 1261{ 1262 struct vnode *vp; 1263 int error; 1264 1265 error = bdevvp(dev, &vp); 1266 if (error) 1267 return error; 1268 1269 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1270 if (error) { 1271 vrele(vp); 1272 return error; 1273 } 1274 error = VOP_OPEN(vp, mode, NOCRED); 1275 if (error) { 1276 vput(vp); 1277 return error; 1278 } 1279 1280 /* VOP_OPEN() doesn't do this for us. */ 1281 if (mode & FWRITE) { 1282 mutex_enter(vp->v_interlock); 1283 vp->v_writecount++; 1284 mutex_exit(vp->v_interlock); 1285 } 1286 1287 VOP_UNLOCK(vp); 1288 1289 *vpp = vp; 1290 1291 return 0; 1292} 1293 1294static int 1295dk_close_parent(struct vnode *vp, int mode) 1296{ 1297 int error; 1298 1299 error = vn_close(vp, mode, NOCRED); 1300 return error; 1301} 1302 1303/* 1304 * dkopen: [devsw entry point] 1305 * 1306 * Open a wedge. 1307 */ 1308static int 1309dkopen(dev_t dev, int flags, int fmt, struct lwp *l) 1310{ 1311 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1312 int error = 0; 1313 1314 if (sc == NULL) 1315 return ENXIO; 1316 KASSERT(sc->sc_dev != NULL); 1317 KASSERT(sc->sc_state == DKW_STATE_RUNNING); 1318 1319 /* 1320 * We go through a complicated little dance to only open the parent 1321 * vnode once per wedge, no matter how many times the wedge is 1322 * opened. The reason? We see one dkopen() per open call, but 1323 * only dkclose() on the last close. 1324 */ 1325 mutex_enter(&sc->sc_dk.dk_openlock); 1326 mutex_enter(&sc->sc_parent->dk_rawlock); 1327 if (sc->sc_dk.dk_openmask == 0) { 1328 error = dkfirstopen(sc, flags); 1329 if (error) 1330 goto out; 1331 } else if (flags & ~sc->sc_mode & FWRITE) { 1332 /* 1333 * The parent is already open, but the previous attempt 1334 * to open it read/write failed and fell back to 1335 * read-only. In that case, we assume the medium is 1336 * read-only and fail to open the wedge read/write. 1337 */ 1338 error = EROFS; 1339 goto out; 1340 } 1341 KASSERT(sc->sc_mode != 0); 1342 KASSERTMSG(sc->sc_mode & FREAD, "%s: sc_mode=%x", 1343 device_xname(sc->sc_dev), sc->sc_mode); 1344 KASSERTMSG((flags & FWRITE) ? (sc->sc_mode & FWRITE) : 1, 1345 "%s: flags=%x sc_mode=%x", 1346 device_xname(sc->sc_dev), flags, sc->sc_mode); 1347 if (fmt == S_IFCHR) 1348 sc->sc_dk.dk_copenmask |= 1; 1349 else 1350 sc->sc_dk.dk_bopenmask |= 1; 1351 sc->sc_dk.dk_openmask = 1352 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 1353 1354out: mutex_exit(&sc->sc_parent->dk_rawlock); 1355 mutex_exit(&sc->sc_dk.dk_openlock); 1356 return error; 1357} 1358 1359static int 1360dkfirstopen(struct dkwedge_softc *sc, int flags) 1361{ 1362 struct dkwedge_softc *nsc; 1363 struct vnode *vp; 1364 int mode; 1365 int error; 1366 1367 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock)); 1368 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock)); 1369 1370 if (sc->sc_parent->dk_rawopens == 0) { 1371 KASSERT(sc->sc_parent->dk_rawvp == NULL); 1372 /* 1373 * Try open read-write. If this fails for EROFS 1374 * and wedge is read-only, retry to open read-only. 1375 */ 1376 mode = FREAD | FWRITE; 1377 error = dk_open_parent(sc->sc_pdev, mode, &vp); 1378 if (error == EROFS && (flags & FWRITE) == 0) { 1379 mode &= ~FWRITE; 1380 error = dk_open_parent(sc->sc_pdev, mode, &vp); 1381 } 1382 if (error) 1383 return error; 1384 KASSERT(vp != NULL); 1385 sc->sc_parent->dk_rawvp = vp; 1386 } else { 1387 /* 1388 * Retrieve mode from an already opened wedge. 1389 * 1390 * At this point, dk_rawopens is bounded by the number 1391 * of dkwedge devices in the system, which is limited 1392 * by autoconf device numbering to INT_MAX. Since 1393 * dk_rawopens is unsigned, this can't overflow. 1394 */ 1395 KASSERT(sc->sc_parent->dk_rawopens < UINT_MAX); 1396 KASSERT(sc->sc_parent->dk_rawvp != NULL); 1397 mode = 0; 1398 mutex_enter(&sc->sc_parent->dk_openlock); 1399 LIST_FOREACH(nsc, &sc->sc_parent->dk_wedges, sc_plink) { 1400 if (nsc == sc || nsc->sc_dk.dk_openmask == 0) 1401 continue; 1402 mode = nsc->sc_mode; 1403 break; 1404 } 1405 mutex_exit(&sc->sc_parent->dk_openlock); 1406 } 1407 sc->sc_mode = mode; 1408 sc->sc_parent->dk_rawopens++; 1409 1410 return 0; 1411} 1412 1413static void 1414dklastclose(struct dkwedge_softc *sc) 1415{ 1416 1417 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock)); 1418 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock)); 1419 KASSERT(sc->sc_parent->dk_rawopens > 0); 1420 KASSERT(sc->sc_parent->dk_rawvp != NULL); 1421 1422 if (--sc->sc_parent->dk_rawopens == 0) { 1423 struct vnode *const vp = sc->sc_parent->dk_rawvp; 1424 const int mode = sc->sc_mode; 1425 1426 sc->sc_parent->dk_rawvp = NULL; 1427 sc->sc_mode = 0; 1428 1429 dk_close_parent(vp, mode); 1430 } 1431} 1432 1433/* 1434 * dkclose: [devsw entry point] 1435 * 1436 * Close a wedge. 1437 */ 1438static int 1439dkclose(dev_t dev, int flags, int fmt, struct lwp *l) 1440{ 1441 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1442 1443 /* 1444 * dkclose can be called even if dkopen didn't succeed, so we 1445 * have to handle the same possibility that the wedge may not 1446 * exist. 1447 */ 1448 if (sc == NULL) 1449 return ENXIO; 1450 KASSERT(sc->sc_dev != NULL); 1451 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1452 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1453 1454 mutex_enter(&sc->sc_dk.dk_openlock); 1455 mutex_enter(&sc->sc_parent->dk_rawlock); 1456 1457 KASSERT(sc->sc_dk.dk_openmask != 0); 1458 1459 if (fmt == S_IFCHR) 1460 sc->sc_dk.dk_copenmask &= ~1; 1461 else 1462 sc->sc_dk.dk_bopenmask &= ~1; 1463 sc->sc_dk.dk_openmask = 1464 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 1465 1466 if (sc->sc_dk.dk_openmask == 0) { 1467 dklastclose(sc); 1468 } 1469 1470 mutex_exit(&sc->sc_parent->dk_rawlock); 1471 mutex_exit(&sc->sc_dk.dk_openlock); 1472 1473 return 0; 1474} 1475 1476/* 1477 * dkcancel: [devsw entry point] 1478 * 1479 * Cancel any pending I/O operations waiting on a wedge. 1480 */ 1481static int 1482dkcancel(dev_t dev, int flags, int fmt, struct lwp *l) 1483{ 1484 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1485 1486 KASSERT(sc != NULL); 1487 KASSERT(sc->sc_dev != NULL); 1488 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1489 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1490 1491 /* 1492 * Disk I/O is expected to complete or fail within a reasonable 1493 * timeframe -- it's storage, not communication. Further, the 1494 * character and block device interface guarantees that prior 1495 * reads and writes have completed or failed by the time close 1496 * returns -- we are not to cancel them here. If the parent 1497 * device's hardware is gone, the parent driver can make them 1498 * fail. Nothing for dk(4) itself to do. 1499 */ 1500 1501 return 0; 1502} 1503 1504/* 1505 * dkstrategy: [devsw entry point] 1506 * 1507 * Perform I/O based on the wedge I/O strategy. 1508 */ 1509static void 1510dkstrategy(struct buf *bp) 1511{ 1512 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev); 1513 uint64_t p_size, p_offset; 1514 1515 KASSERT(sc != NULL); 1516 KASSERT(sc->sc_dev != NULL); 1517 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1518 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1519 KASSERT(sc->sc_parent->dk_rawvp != NULL); 1520 1521 /* If it's an empty transfer, wake up the top half now. */ 1522 if (bp->b_bcount == 0) 1523 goto done; 1524 1525 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift; 1526 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift; 1527 1528 /* Make sure it's in-range. */ 1529 if (bounds_check_with_mediasize(bp, DEV_BSIZE, p_size) <= 0) 1530 goto done; 1531 1532 /* Translate it to the parent's raw LBA. */ 1533 bp->b_rawblkno = bp->b_blkno + p_offset; 1534 1535 /* Place it in the queue and start I/O on the unit. */ 1536 mutex_enter(&sc->sc_iolock); 1537 disk_wait(&sc->sc_dk); 1538 bufq_put(sc->sc_bufq, bp); 1539 mutex_exit(&sc->sc_iolock); 1540 1541 dkstart(sc); 1542 return; 1543 1544done: 1545 bp->b_resid = bp->b_bcount; 1546 biodone(bp); 1547} 1548 1549/* 1550 * dkstart: 1551 * 1552 * Start I/O that has been enqueued on the wedge. 1553 */ 1554static void 1555dkstart(struct dkwedge_softc *sc) 1556{ 1557 struct vnode *vp; 1558 struct buf *bp, *nbp; 1559 1560 mutex_enter(&sc->sc_iolock); 1561 1562 /* Do as much work as has been enqueued. */ 1563 while ((bp = bufq_peek(sc->sc_bufq)) != NULL) { 1564 if (sc->sc_iostop) { 1565 (void) bufq_get(sc->sc_bufq); 1566 mutex_exit(&sc->sc_iolock); 1567 bp->b_error = ENXIO; 1568 bp->b_resid = bp->b_bcount; 1569 biodone(bp); 1570 mutex_enter(&sc->sc_iolock); 1571 continue; 1572 } 1573 1574 /* fetch an I/O buf with sc_iolock dropped */ 1575 mutex_exit(&sc->sc_iolock); 1576 nbp = getiobuf(sc->sc_parent->dk_rawvp, false); 1577 mutex_enter(&sc->sc_iolock); 1578 if (nbp == NULL) { 1579 /* 1580 * No resources to run this request; leave the 1581 * buffer queued up, and schedule a timer to 1582 * restart the queue in 1/2 a second. 1583 */ 1584 if (!sc->sc_iostop) 1585 callout_schedule(&sc->sc_restart_ch, hz/2); 1586 break; 1587 } 1588 1589 /* 1590 * fetch buf, this can fail if another thread 1591 * has already processed the queue, it can also 1592 * return a completely different buf. 1593 */ 1594 bp = bufq_get(sc->sc_bufq); 1595 if (bp == NULL) { 1596 mutex_exit(&sc->sc_iolock); 1597 putiobuf(nbp); 1598 mutex_enter(&sc->sc_iolock); 1599 continue; 1600 } 1601 1602 /* Instrumentation. */ 1603 disk_busy(&sc->sc_dk); 1604 1605 /* release lock for VOP_STRATEGY */ 1606 mutex_exit(&sc->sc_iolock); 1607 1608 nbp->b_data = bp->b_data; 1609 nbp->b_flags = bp->b_flags; 1610 nbp->b_oflags = bp->b_oflags; 1611 nbp->b_cflags = bp->b_cflags; 1612 nbp->b_iodone = dkiodone; 1613 nbp->b_proc = bp->b_proc; 1614 nbp->b_blkno = bp->b_rawblkno; 1615 nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev; 1616 nbp->b_bcount = bp->b_bcount; 1617 nbp->b_private = bp; 1618 BIO_COPYPRIO(nbp, bp); 1619 1620 vp = nbp->b_vp; 1621 if ((nbp->b_flags & B_READ) == 0) { 1622 mutex_enter(vp->v_interlock); 1623 vp->v_numoutput++; 1624 mutex_exit(vp->v_interlock); 1625 } 1626 VOP_STRATEGY(vp, nbp); 1627 1628 mutex_enter(&sc->sc_iolock); 1629 } 1630 1631 mutex_exit(&sc->sc_iolock); 1632} 1633 1634/* 1635 * dkiodone: 1636 * 1637 * I/O to a wedge has completed; alert the top half. 1638 */ 1639static void 1640dkiodone(struct buf *bp) 1641{ 1642 struct buf *obp = bp->b_private; 1643 struct dkwedge_softc *sc = dkwedge_lookup(obp->b_dev); 1644 1645 KASSERT(sc != NULL); 1646 KASSERT(sc->sc_dev != NULL); 1647 1648 if (bp->b_error != 0) 1649 obp->b_error = bp->b_error; 1650 obp->b_resid = bp->b_resid; 1651 putiobuf(bp); 1652 1653 mutex_enter(&sc->sc_iolock); 1654 disk_unbusy(&sc->sc_dk, obp->b_bcount - obp->b_resid, 1655 obp->b_flags & B_READ); 1656 mutex_exit(&sc->sc_iolock); 1657 1658 biodone(obp); 1659 1660 /* Kick the queue in case there is more work we can do. */ 1661 dkstart(sc); 1662} 1663 1664/* 1665 * dkrestart: 1666 * 1667 * Restart the work queue after it was stalled due to 1668 * a resource shortage. Invoked via a callout. 1669 */ 1670static void 1671dkrestart(void *v) 1672{ 1673 struct dkwedge_softc *sc = v; 1674 1675 dkstart(sc); 1676} 1677 1678/* 1679 * dkminphys: 1680 * 1681 * Call parent's minphys function. 1682 */ 1683static void 1684dkminphys(struct buf *bp) 1685{ 1686 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev); 1687 dev_t dev; 1688 1689 KASSERT(sc != NULL); 1690 KASSERT(sc->sc_dev != NULL); 1691 1692 dev = bp->b_dev; 1693 bp->b_dev = sc->sc_pdev; 1694 if (sc->sc_parent->dk_driver && sc->sc_parent->dk_driver->d_minphys) 1695 (*sc->sc_parent->dk_driver->d_minphys)(bp); 1696 else 1697 minphys(bp); 1698 bp->b_dev = dev; 1699} 1700 1701/* 1702 * dkread: [devsw entry point] 1703 * 1704 * Read from a wedge. 1705 */ 1706static int 1707dkread(dev_t dev, struct uio *uio, int flags) 1708{ 1709 struct dkwedge_softc *sc __diagused = dkwedge_lookup(dev); 1710 1711 KASSERT(sc != NULL); 1712 KASSERT(sc->sc_dev != NULL); 1713 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1714 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1715 1716 return physio(dkstrategy, NULL, dev, B_READ, dkminphys, uio); 1717} 1718 1719/* 1720 * dkwrite: [devsw entry point] 1721 * 1722 * Write to a wedge. 1723 */ 1724static int 1725dkwrite(dev_t dev, struct uio *uio, int flags) 1726{ 1727 struct dkwedge_softc *sc __diagused = dkwedge_lookup(dev); 1728 1729 KASSERT(sc != NULL); 1730 KASSERT(sc->sc_dev != NULL); 1731 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1732 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1733 1734 return physio(dkstrategy, NULL, dev, B_WRITE, dkminphys, uio); 1735} 1736 1737/* 1738 * dkioctl: [devsw entry point] 1739 * 1740 * Perform an ioctl request on a wedge. 1741 */ 1742static int 1743dkioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1744{ 1745 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1746 int error = 0; 1747 1748 KASSERT(sc != NULL); 1749 KASSERT(sc->sc_dev != NULL); 1750 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1751 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1752 KASSERT(sc->sc_parent->dk_rawvp != NULL); 1753 1754 /* 1755 * We pass NODEV instead of our device to indicate we don't 1756 * want to handle disklabel ioctls 1757 */ 1758 error = disk_ioctl(&sc->sc_dk, NODEV, cmd, data, flag, l); 1759 if (error != EPASSTHROUGH) 1760 return error; 1761 1762 error = 0; 1763 1764 switch (cmd) { 1765 case DIOCGSTRATEGY: 1766 case DIOCGCACHE: 1767 case DIOCCACHESYNC: 1768 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, data, flag, 1769 l != NULL ? l->l_cred : NOCRED); 1770 break; 1771 case DIOCGWEDGEINFO: { 1772 struct dkwedge_info *dkw = data; 1773 1774 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev), 1775 sizeof(dkw->dkw_devname)); 1776 memcpy(dkw->dkw_wname, sc->sc_wname, sizeof(dkw->dkw_wname)); 1777 dkw->dkw_wname[sizeof(dkw->dkw_wname) - 1] = '\0'; 1778 strlcpy(dkw->dkw_parent, sc->sc_parent->dk_name, 1779 sizeof(dkw->dkw_parent)); 1780 dkw->dkw_offset = sc->sc_offset; 1781 dkw->dkw_size = dkwedge_size(sc); 1782 strlcpy(dkw->dkw_ptype, sc->sc_ptype, sizeof(dkw->dkw_ptype)); 1783 1784 break; 1785 } 1786 case DIOCGSECTORALIGN: { 1787 struct disk_sectoralign *dsa = data; 1788 uint32_t r; 1789 1790 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag, 1791 l != NULL ? l->l_cred : NOCRED); 1792 if (error) 1793 break; 1794 1795 r = sc->sc_offset % dsa->dsa_alignment; 1796 if (r < dsa->dsa_firstaligned) 1797 dsa->dsa_firstaligned = dsa->dsa_firstaligned - r; 1798 else 1799 dsa->dsa_firstaligned = (dsa->dsa_firstaligned + 1800 dsa->dsa_alignment) - r; 1801 break; 1802 } 1803 default: 1804 error = ENOTTY; 1805 } 1806 1807 return error; 1808} 1809 1810/* 1811 * dkdiscard: [devsw entry point] 1812 * 1813 * Perform a discard-range request on a wedge. 1814 */ 1815static int 1816dkdiscard(dev_t dev, off_t pos, off_t len) 1817{ 1818 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1819 uint64_t size = dkwedge_size(sc); 1820 unsigned shift; 1821 off_t offset, maxlen; 1822 int error; 1823 1824 KASSERT(sc != NULL); 1825 KASSERT(sc->sc_dev != NULL); 1826 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1827 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1828 KASSERT(sc->sc_parent->dk_rawvp != NULL); 1829 1830 /* XXX check bounds on size/offset up front */ 1831 shift = (sc->sc_parent->dk_blkshift + DEV_BSHIFT); 1832 KASSERT(__type_fit(off_t, size)); 1833 KASSERT(__type_fit(off_t, sc->sc_offset)); 1834 KASSERT(0 <= sc->sc_offset); 1835 KASSERT(size <= (__type_max(off_t) >> shift)); 1836 KASSERT(sc->sc_offset <= ((__type_max(off_t) >> shift) - size)); 1837 offset = ((off_t)sc->sc_offset << shift); 1838 maxlen = ((off_t)size << shift); 1839 1840 if (len > maxlen) 1841 return EINVAL; 1842 if (pos > (maxlen - len)) 1843 return EINVAL; 1844 1845 pos += offset; 1846 1847 vn_lock(sc->sc_parent->dk_rawvp, LK_EXCLUSIVE | LK_RETRY); 1848 error = VOP_FDISCARD(sc->sc_parent->dk_rawvp, pos, len); 1849 VOP_UNLOCK(sc->sc_parent->dk_rawvp); 1850 1851 return error; 1852} 1853 1854/* 1855 * dksize: [devsw entry point] 1856 * 1857 * Query the size of a wedge for the purpose of performing a dump 1858 * or for swapping to. 1859 */ 1860static int 1861dksize(dev_t dev) 1862{ 1863 /* 1864 * Don't bother taking a reference because this is only used 1865 * either (a) while the device is open (for swap), or (b) while 1866 * any multiprocessing is quiescent (for crash dumps). 1867 */ 1868 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1869 uint64_t p_size; 1870 int rv = -1; 1871 1872 if (sc == NULL) 1873 return -1; 1874 if (sc->sc_state != DKW_STATE_RUNNING) 1875 return -1; 1876 1877 /* Our content type is static, no need to open the device. */ 1878 1879 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift; 1880 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) == 0) { 1881 /* Saturate if we are larger than INT_MAX. */ 1882 if (p_size > INT_MAX) 1883 rv = INT_MAX; 1884 else 1885 rv = (int)p_size; 1886 } 1887 1888 return rv; 1889} 1890 1891/* 1892 * dkdump: [devsw entry point] 1893 * 1894 * Perform a crash dump to a wedge. 1895 */ 1896static int 1897dkdump(dev_t dev, daddr_t blkno, void *va, size_t size) 1898{ 1899 /* 1900 * Don't bother taking a reference because this is only used 1901 * while any multiprocessing is quiescent. 1902 */ 1903 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1904 const struct bdevsw *bdev; 1905 uint64_t p_size, p_offset; 1906 1907 if (sc == NULL) 1908 return ENXIO; 1909 if (sc->sc_state != DKW_STATE_RUNNING) 1910 return ENXIO; 1911 1912 /* Our content type is static, no need to open the device. */ 1913 1914 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) != 0 && 1915 strcmp(sc->sc_ptype, DKW_PTYPE_RAID) != 0 && 1916 strcmp(sc->sc_ptype, DKW_PTYPE_CGD) != 0) 1917 return ENXIO; 1918 if (size % DEV_BSIZE != 0) 1919 return EINVAL; 1920 1921 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift; 1922 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift; 1923 1924 if (blkno < 0 || blkno + size/DEV_BSIZE > p_size) { 1925 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " 1926 "p_size (%" PRIu64 ")\n", __func__, blkno, 1927 size/DEV_BSIZE, p_size); 1928 return EINVAL; 1929 } 1930 1931 bdev = bdevsw_lookup(sc->sc_pdev); 1932 return (*bdev->d_dump)(sc->sc_pdev, blkno + p_offset, va, size); 1933} 1934 1935/* 1936 * config glue 1937 */ 1938 1939/* 1940 * dkwedge_find_partition 1941 * 1942 * Find wedge corresponding to the specified parent name 1943 * and offset/length. 1944 */ 1945static device_t 1946dkwedge_find_partition_acquire(device_t parent, daddr_t startblk, 1947 uint64_t nblks) 1948{ 1949 struct dkwedge_softc *sc; 1950 int i; 1951 device_t wedge = NULL; 1952 1953 rw_enter(&dkwedges_lock, RW_READER); 1954 for (i = 0; i < ndkwedges; i++) { 1955 if ((sc = dkwedges[i]) == NULL || sc->sc_dev == NULL) 1956 continue; 1957 if (strcmp(sc->sc_parent->dk_name, device_xname(parent)) == 0 && 1958 sc->sc_offset == startblk && 1959 dkwedge_size(sc) == nblks) { 1960 if (wedge) { 1961 printf("WARNING: double match for boot wedge " 1962 "(%s, %s)\n", 1963 device_xname(wedge), 1964 device_xname(sc->sc_dev)); 1965 continue; 1966 } 1967 wedge = sc->sc_dev; 1968 device_acquire(wedge); 1969 } 1970 } 1971 rw_exit(&dkwedges_lock); 1972 1973 return wedge; 1974} 1975 1976/* XXX unsafe */ 1977device_t 1978dkwedge_find_partition(device_t parent, daddr_t startblk, 1979 uint64_t nblks) 1980{ 1981 device_t dv; 1982 1983 if ((dv = dkwedge_find_partition_acquire(parent, startblk, nblks)) 1984 == NULL) 1985 return NULL; 1986 device_release(dv); 1987 return dv; 1988} 1989 1990const char * 1991dkwedge_get_parent_name(dev_t dev) 1992{ 1993 /* XXX: perhaps do this in lookup? */ 1994 int bmaj = bdevsw_lookup_major(&dk_bdevsw); 1995 int cmaj = cdevsw_lookup_major(&dk_cdevsw); 1996 1997 if (major(dev) != bmaj && major(dev) != cmaj) 1998 return NULL; 1999 2000 struct dkwedge_softc *const sc = dkwedge_lookup_acquire(dev); 2001 if (sc == NULL) 2002 return NULL; 2003 const char *const name = sc->sc_parent->dk_name; 2004 device_release(sc->sc_dev); 2005 return name; 2006} 2007