geom_disk.c revision 133318
160786Sps/*- 2170259Sdelphij * Copyright (c) 2002 Poul-Henning Kamp 360786Sps * Copyright (c) 2002 Networks Associates Technology, Inc. 460786Sps * All rights reserved. 560786Sps * 660786Sps * This software was developed for the FreeBSD Project by Poul-Henning Kamp 760786Sps * and NAI Labs, the Security Research Division of Network Associates, Inc. 860786Sps * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 960786Sps * DARPA CHATS research program. 1060786Sps * 1163120Sps * Redistribution and use in source and binary forms, with or without 1260786Sps * modification, are permitted provided that the following conditions 1360786Sps * are met: 1460786Sps * 1. Redistributions of source code must retain the above copyright 1560786Sps * notice, this list of conditions and the following disclaimer. 1660786Sps * 2. Redistributions in binary form must reproduce the above copyright 1760786Sps * notice, this list of conditions and the following disclaimer in the 1860786Sps * documentation and/or other materials provided with the distribution. 1960786Sps * 3. The names of the authors may not be used to endorse or promote 2060786Sps * products derived from this software without specific prior written 2160786Sps * permission. 2260786Sps * 2360786Sps * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 2460786Sps * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2560786Sps * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2660786Sps * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2760786Sps * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2860786Sps * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2960786Sps * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3060786Sps * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3160786Sps * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3260786Sps * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3360786Sps * SUCH DAMAGE. 3460786Sps */ 3560786Sps 3660786Sps#include <sys/cdefs.h> 37161478Sdelphij__FBSDID("$FreeBSD: head/sys/geom/geom_disk.c 133318 2004-08-08 07:57:53Z phk $"); 38170259Sdelphij 39170259Sdelphij#include "opt_geom.h" 4060786Sps 4160786Sps#include <sys/param.h> 4260786Sps#include <sys/systm.h> 4360786Sps#include <sys/kernel.h> 4460786Sps#include <sys/sysctl.h> 4560786Sps#include <sys/bio.h> 4660786Sps#include <sys/conf.h> 4760786Sps#include <sys/fcntl.h> 4860786Sps#include <sys/malloc.h> 4960786Sps#include <sys/sysctl.h> 5060786Sps#include <sys/devicestat.h> 5160786Sps#include <machine/md_var.h> 5260786Sps 5360786Sps#include <sys/lock.h> 5460786Sps#include <sys/mutex.h> 5560786Sps#include <geom/geom.h> 5660786Sps#include <geom/geom_disk.h> 5760786Sps#include <geom/geom_int.h> 5860786Sps 5960786Spsstatic struct mtx g_disk_done_mtx; 6060786Sps 6160786Spsstatic g_access_t g_disk_access; 6260786Spsstatic g_init_t g_disk_init; 63170259Sdelphijstatic g_fini_t g_disk_fini; 6463120Spsstatic g_start_t g_disk_start; 6560786Spsstatic g_ioctl_t g_disk_ioctl; 6660786Spsstatic g_dumpconf_t g_disk_dumpconf; 6760786Sps 6860786Spsstruct g_class g_disk_class = { 6960786Sps .name = "DISK", 7060786Sps .version = G_VERSION, 7160786Sps .init = g_disk_init, 7260786Sps .fini = g_disk_fini, 7360786Sps .start = g_disk_start, 7460786Sps .access = g_disk_access, 7560786Sps .ioctl = g_disk_ioctl, 7660786Sps .dumpconf = g_disk_dumpconf, 7760786Sps}; 7860786Sps 7960786Spsstatic void 8060786Spsg_disk_init(struct g_class *mp __unused) 8160786Sps{ 8260786Sps 8360786Sps mtx_init(&g_disk_done_mtx, "g_disk_done", NULL, MTX_DEF); 8460786Sps} 8560786Sps 8660786Spsstatic void 8760786Spsg_disk_fini(struct g_class *mp __unused) 8860786Sps{ 8960786Sps 9060786Sps mtx_destroy(&g_disk_done_mtx); 9160786Sps} 9260786Sps 9360786SpsDECLARE_GEOM_CLASS(g_disk_class, g_disk); 9460786Sps 9560786Spsstatic void __inline 96172471Sdelphijg_disk_lock_giant(struct disk *dp) 97172471Sdelphij{ 9860786Sps if (dp->d_flags & DISKFLAG_NEEDSGIANT) 9960786Sps mtx_lock(&Giant); 10060786Sps} 10160786Sps 10260786Spsstatic void __inline 10360786Spsg_disk_unlock_giant(struct disk *dp) 10460786Sps{ 10560786Sps if (dp->d_flags & DISKFLAG_NEEDSGIANT) 10660786Sps mtx_unlock(&Giant); 10760786Sps} 10860786Sps 10960786Spsstatic int 11060786Spsg_disk_access(struct g_provider *pp, int r, int w, int e) 11160786Sps{ 11260786Sps struct disk *dp; 11360786Sps int error; 11460786Sps 11560786Sps g_trace(G_T_ACCESS, "g_disk_access(%s, %d, %d, %d)", 11660786Sps pp->name, r, w, e); 11760786Sps g_topology_assert(); 11860786Sps dp = pp->geom->softc; 11960786Sps if (dp == NULL || dp->d_destroyed) { 12060786Sps /* 12160786Sps * Allow decreasing access count even if disk is not 12260786Sps * avaliable anymore. 12360786Sps */ 12460786Sps if (r <= 0 && w <= 0 && e <= 0) 12560786Sps return (0); 12660786Sps return (ENXIO); 12760786Sps } 12860786Sps r += pp->acr; 12960786Sps w += pp->acw; 13060786Sps e += pp->ace; 13160786Sps error = 0; 13260786Sps if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) { 13360786Sps if (dp->d_open != NULL) { 13460786Sps g_disk_lock_giant(dp); 13560786Sps error = dp->d_open(dp); 13660786Sps if (error != 0) 13760786Sps printf("Opened disk %s -> %d\n", 13860786Sps pp->name, error); 13960786Sps g_disk_unlock_giant(dp); 14060786Sps } 14160786Sps pp->mediasize = dp->d_mediasize; 14260786Sps pp->sectorsize = dp->d_sectorsize; 14360786Sps dp->d_flags |= DISKFLAG_OPEN; 14460786Sps if (dp->d_maxsize == 0) { 14560786Sps printf("WARNING: Disk drive %s%d has no d_maxsize\n", 14660786Sps dp->d_name, dp->d_unit); 14760786Sps dp->d_maxsize = DFLTPHYS; 14860786Sps } 14960786Sps } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) { 15060786Sps if (dp->d_close != NULL) { 15160786Sps g_disk_lock_giant(dp); 15260786Sps error = dp->d_close(dp); 15360786Sps if (error != 0) 15460786Sps printf("Closed disk %s -> %d\n", 15560786Sps pp->name, error); 15660786Sps g_disk_unlock_giant(dp); 15760786Sps } 15860786Sps dp->d_flags &= ~DISKFLAG_OPEN; 15960786Sps } 16060786Sps return (error); 161170259Sdelphij} 16260786Sps 16360786Spsstatic void 16460786Spsg_disk_kerneldump(struct bio *bp, struct disk *dp) 16560786Sps{ 16660786Sps int error; 16760786Sps struct g_kerneldump *gkd; 16860786Sps struct dumperinfo di; 16960786Sps struct g_geom *gp; 17060786Sps 17160786Sps gkd = (struct g_kerneldump*)bp->bio_data; 17260786Sps gp = bp->bio_to->geom; 17360786Sps g_trace(G_T_TOPOLOGY, "g_disk_kernedump(%s, %jd, %jd)", 17460786Sps gp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length); 17560786Sps if (dp->d_dump == NULL) { 17660786Sps g_io_deliver(bp, ENODEV); 17760786Sps return; 17860786Sps } 17960786Sps di.dumper = dp->d_dump; 18060786Sps di.priv = dp; 18160786Sps di.blocksize = dp->d_sectorsize; 18260786Sps di.mediaoffset = gkd->offset; 18360786Sps di.mediasize = gkd->length; 18460786Sps error = set_dumper(&di); 18560786Sps g_io_deliver(bp, error); 18660786Sps} 18760786Sps 18860786Spsstatic void 18960786Spsg_disk_done(struct bio *bp) 19060786Sps{ 19160786Sps struct bio *bp2; 19260786Sps struct disk *dp; 19360786Sps 19460786Sps /* See "notes" for why we need a mutex here */ 19560786Sps /* XXX: will witness accept a mix of Giant/unGiant drivers here ? */ 19660786Sps mtx_lock(&g_disk_done_mtx); 19760786Sps bp->bio_completed = bp->bio_length - bp->bio_resid; 19860786Sps 19960786Sps bp2 = bp->bio_parent; 20060786Sps if (bp2->bio_error == 0) 20160786Sps bp2->bio_error = bp->bio_error; 20260786Sps bp2->bio_completed += bp->bio_completed; 20360786Sps g_destroy_bio(bp); 20460786Sps bp2->bio_inbed++; 20560786Sps if (bp2->bio_children == bp2->bio_inbed) { 20660786Sps bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed; 20760786Sps if ((dp = bp2->bio_to->geom->softc)) 20860786Sps devstat_end_transaction_bio(dp->d_devstat, bp2); 20960786Sps g_io_deliver(bp2, bp2->bio_error); 21060786Sps } 21160786Sps mtx_unlock(&g_disk_done_mtx); 21260786Sps} 21360786Sps 21460786Spsstatic int 21560786Spsg_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, struct thread *td) 21660786Sps{ 21760786Sps struct g_geom *gp; 21860786Sps struct disk *dp; 21960786Sps int error; 22060786Sps 22160786Sps gp = pp->geom; 22260786Sps dp = gp->softc; 22360786Sps 22460786Sps if (dp->d_ioctl == NULL) 22560786Sps return (ENOIOCTL); 22660786Sps g_disk_lock_giant(dp); 22760786Sps error = dp->d_ioctl(dp, cmd, data, 0, td); 22860786Sps g_disk_unlock_giant(dp); 22960786Sps return(error); 23060786Sps} 23160786Sps 23260786Spsstatic void 23360786Spsg_disk_start(struct bio *bp) 23460786Sps{ 23560786Sps struct bio *bp2, *bp3; 23660786Sps struct disk *dp; 23760786Sps int error; 23860786Sps off_t off; 23960786Sps 24060786Sps dp = bp->bio_to->geom->softc; 24160786Sps if (dp == NULL || dp->d_destroyed) 24260786Sps g_io_deliver(bp, ENXIO); 24360786Sps error = EJUSTRETURN; 24460786Sps switch(bp->bio_cmd) { 24560786Sps case BIO_DELETE: 24660786Sps if (!(dp->d_flags & DISKFLAG_CANDELETE)) { 24760786Sps error = 0; 24860786Sps break; 249170259Sdelphij } 25060786Sps /* fall-through */ 25160786Sps case BIO_READ: 25260786Sps case BIO_WRITE: 25360786Sps off = 0; 25460786Sps bp3 = NULL; 25560786Sps bp2 = g_clone_bio(bp); 256161478Sdelphij if (bp2 == NULL) { 257161478Sdelphij error = ENOMEM; 25860786Sps break; 25960786Sps } 26060786Sps devstat_start_transaction_bio(dp->d_devstat, bp); 26160786Sps do { 26260786Sps bp2->bio_offset += off; 26360786Sps bp2->bio_length -= off; 26460786Sps bp2->bio_data += off; 26560786Sps if (bp2->bio_length > dp->d_maxsize) { 26660786Sps /* 26760786Sps * XXX: If we have a stripesize we should really 26860786Sps * use it here. 26960786Sps */ 27060786Sps bp2->bio_length = dp->d_maxsize; 27160786Sps off += dp->d_maxsize; 27260786Sps /* 27360786Sps * To avoid a race, we need to grab the next bio 27460786Sps * before we schedule this one. See "notes". 27560786Sps */ 27660786Sps bp3 = g_clone_bio(bp); 27760786Sps if (bp3 == NULL) 27860786Sps bp->bio_error = ENOMEM; 279 } 280 bp2->bio_done = g_disk_done; 281 bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize; 282 bp2->bio_bcount = bp2->bio_length; 283 bp2->bio_disk = dp; 284 g_disk_lock_giant(dp); 285 dp->d_strategy(bp2); 286 g_disk_unlock_giant(dp); 287 bp2 = bp3; 288 bp3 = NULL; 289 } while (bp2 != NULL); 290 break; 291 case BIO_GETATTR: 292 if (g_handleattr_int(bp, "GEOM::fwsectors", dp->d_fwsectors)) 293 break; 294 else if (g_handleattr_int(bp, "GEOM::fwheads", dp->d_fwheads)) 295 break; 296 else if (g_handleattr_off_t(bp, "GEOM::frontstuff", 0)) 297 break; 298 else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump")) 299 g_disk_kerneldump(bp, dp); 300 else 301 error = ENOIOCTL; 302 break; 303 default: 304 error = EOPNOTSUPP; 305 break; 306 } 307 if (error != EJUSTRETURN) 308 g_io_deliver(bp, error); 309 return; 310} 311 312static void 313g_disk_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) 314{ 315 struct disk *dp; 316 317 dp = gp->softc; 318 if (dp == NULL) 319 return; 320 if (indent == NULL) { 321 sbuf_printf(sb, " hd %u", dp->d_fwheads); 322 sbuf_printf(sb, " sc %u", dp->d_fwsectors); 323 return; 324 } 325 if (pp != NULL) { 326 sbuf_printf(sb, "%s<fwheads>%u</fwheads>\n", 327 indent, dp->d_fwheads); 328 sbuf_printf(sb, "%s<fwsectors>%u</fwsectors>\n", 329 indent, dp->d_fwsectors); 330 } 331} 332 333static void 334g_disk_create(void *arg, int flag) 335{ 336 struct g_geom *gp; 337 struct g_provider *pp; 338 struct disk *dp; 339 340 if (flag == EV_CANCEL) 341 return; 342 g_topology_assert(); 343 dp = arg; 344 gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit); 345 gp->softc = dp; 346 pp = g_new_providerf(gp, "%s", gp->name); 347 pp->mediasize = dp->d_mediasize; 348 pp->sectorsize = dp->d_sectorsize; 349 if (dp->d_flags & DISKFLAG_CANDELETE) 350 pp->flags |= G_PF_CANDELETE; 351 pp->stripeoffset = dp->d_stripeoffset; 352 pp->stripesize = dp->d_stripesize; 353 if (bootverbose) 354 printf("GEOM: new disk %s\n", gp->name); 355 dp->d_geom = gp; 356 g_error_provider(pp, 0); 357} 358 359static void 360g_disk_destroy(void *ptr, int flag) 361{ 362 struct disk *dp; 363 struct g_geom *gp; 364 365 g_topology_assert(); 366 dp = ptr; 367 gp = dp->d_geom; 368 gp->softc = NULL; 369 g_wither_geom(gp, ENXIO); 370 g_free(dp); 371} 372 373struct disk * 374disk_alloc() 375{ 376 struct disk *dp; 377 378 dp = g_malloc(sizeof *dp, M_WAITOK | M_ZERO); 379 return (dp); 380} 381 382void 383disk_create(struct disk *dp, int version) 384{ 385 if (version != DISK_VERSION_00) { 386 printf("WARNING: Attempt to add disk %s%d %s", 387 dp->d_name, dp->d_unit, 388 " using incompatible ABI version of disk(9)\n"); 389 printf("WARNING: Ignoring disk %s%d\n", 390 dp->d_name, dp->d_unit); 391 return; 392 } 393 KASSERT(dp->d_strategy != NULL, ("disk_create need d_strategy")); 394 KASSERT(dp->d_name != NULL, ("disk_create need d_name")); 395 KASSERT(*dp->d_name != 0, ("disk_create need d_name")); 396 KASSERT(strlen(dp->d_name) < SPECNAMELEN - 4, ("disk name too long")); 397 if (dp->d_devstat == NULL) 398 dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit, 399 dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED, 400 DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); 401 dp->d_geom = NULL; 402 g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL); 403} 404 405void 406disk_destroy(struct disk *dp) 407{ 408 409 g_cancel_event(dp); 410 dp->d_destroyed = 1; 411 if (dp->d_devstat != NULL) 412 devstat_remove_entry(dp->d_devstat); 413 g_post_event(g_disk_destroy, dp, M_WAITOK, NULL); 414} 415 416static void 417g_kern_disks(void *p, int flag __unused) 418{ 419 struct sbuf *sb; 420 struct g_geom *gp; 421 char *sp; 422 423 sb = p; 424 sp = ""; 425 g_topology_assert(); 426 LIST_FOREACH(gp, &g_disk_class.geom, geom) { 427 sbuf_printf(sb, "%s%s", sp, gp->name); 428 sp = " "; 429 } 430 sbuf_finish(sb); 431} 432 433static int 434sysctl_disks(SYSCTL_HANDLER_ARGS) 435{ 436 int error; 437 struct sbuf *sb; 438 439 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 440 g_waitfor_event(g_kern_disks, sb, M_WAITOK, NULL); 441 error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 442 sbuf_delete(sb); 443 return error; 444} 445 446SYSCTL_PROC(_kern, OID_AUTO, disks, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NOLOCK, 0, 0, 447 sysctl_disks, "A", "names of available disks"); 448 449