subr_disk.c revision 103714
150565Sphk/* 250565Sphk * ---------------------------------------------------------------------------- 350565Sphk * "THE BEER-WARE LICENSE" (Revision 42): 450565Sphk * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 550565Sphk * can do whatever you want with this stuff. If we meet some day, and you think 650565Sphk * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 750565Sphk * ---------------------------------------------------------------------------- 850565Sphk * 950565Sphk * $FreeBSD: head/sys/kern/subr_disk.c 103714 2002-09-20 19:36:05Z phk $ 1050565Sphk * 1150565Sphk */ 1250565Sphk 1392074Sphk#include "opt_geom.h" 1492074Sphk 1550565Sphk#include <sys/param.h> 1650565Sphk#include <sys/systm.h> 17103675Sphk#include <sys/stdint.h> 1860041Sphk#include <sys/bio.h> 1950565Sphk#include <sys/conf.h> 2050565Sphk#include <sys/disk.h> 21103714Sphk#include <sys/diskslice.h> 22103714Sphk#include <sys/disklabel.h> 23103675Sphk#ifndef GEOM 24103675Sphk#include <sys/kernel.h> 25103675Sphk#include <sys/sysctl.h> 2650565Sphk#include <sys/malloc.h> 2761953Snbm#include <sys/sysctl.h> 2850728Sphk#include <machine/md_var.h> 2964880Sphk#include <sys/ctype.h> 3064880Sphk 3169774Sphkstatic MALLOC_DEFINE(M_DISK, "disk", "disk data"); 3250565Sphk 3350565Sphkstatic d_strategy_t diskstrategy; 3450565Sphkstatic d_open_t diskopen; 3550565Sphkstatic d_close_t diskclose; 3650565Sphkstatic d_ioctl_t diskioctl; 3750565Sphkstatic d_psize_t diskpsize; 3861717Sphk 3961717Sphkstatic LIST_HEAD(, disk) disklist = LIST_HEAD_INITIALIZER(&disklist); 4064880Sphk 4185603Sphkvoid disk_dev_synth(dev_t dev); 4285603Sphk 4385603Sphkvoid 4485603Sphkdisk_dev_synth(dev_t dev) 4585603Sphk{ 4685603Sphk struct disk *dp; 4785603Sphk int u, s, p; 4885603Sphk dev_t pdev; 4985603Sphk 5086012Sphk if (dksparebits(dev)) 5185996Sphk return; 5285603Sphk LIST_FOREACH(dp, &disklist, d_list) { 5385603Sphk if (major(dev) != dp->d_devsw->d_maj) 5485603Sphk continue; 5585603Sphk u = dkunit(dev); 5685603Sphk p = RAW_PART; 5785603Sphk s = WHOLE_DISK_SLICE; 5885603Sphk pdev = makedev(dp->d_devsw->d_maj, dkmakeminor(u, s, p)); 5985624Sphk if (pdev->si_devsw == NULL) 6085624Sphk return; /* Probably a unit we don't have */ 6185603Sphk s = dkslice(dev); 6285603Sphk p = dkpart(dev); 6385603Sphk if (s == WHOLE_DISK_SLICE && p == RAW_PART) { 6485603Sphk /* XXX: actually should not happen */ 6585603Sphk dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 6685603Sphk UID_ROOT, GID_OPERATOR, 0640, "%s%d", 6785603Sphk dp->d_devsw->d_name, u); 6885603Sphk dev_depends(pdev, dev); 6985603Sphk return; 7085603Sphk } 7185603Sphk if (s == COMPATIBILITY_SLICE) { 7285603Sphk dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 7385603Sphk UID_ROOT, GID_OPERATOR, 0640, "%s%d%c", 7485603Sphk dp->d_devsw->d_name, u, 'a' + p); 7585603Sphk dev_depends(pdev, dev); 7685603Sphk return; 7785603Sphk } 7885858Sphk if (p != RAW_PART) { 7985858Sphk dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 8085858Sphk UID_ROOT, GID_OPERATOR, 0640, "%s%ds%d%c", 8185858Sphk dp->d_devsw->d_name, u, s - BASE_SLICE + 1, 8285858Sphk 'a' + p); 8385858Sphk } else { 8485858Sphk dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 8585858Sphk UID_ROOT, GID_OPERATOR, 0640, "%s%ds%d", 8685858Sphk dp->d_devsw->d_name, u, s - BASE_SLICE + 1); 8785858Sphk make_dev_alias(dev, "%s%ds%dc", 8885858Sphk dp->d_devsw->d_name, u, s - BASE_SLICE + 1); 8985858Sphk } 9085603Sphk dev_depends(pdev, dev); 9185603Sphk return; 9285603Sphk } 9385603Sphk} 9485603Sphk 9562617Simpstatic void 9664880Sphkdisk_clone(void *arg, char *name, int namelen, dev_t *dev) 9764880Sphk{ 9864880Sphk struct disk *dp; 9964880Sphk char const *d; 10092074Sphk char *e; 10192074Sphk int j, u, s, p; 10264880Sphk dev_t pdev; 10364880Sphk 10464880Sphk if (*dev != NODEV) 10564880Sphk return; 10664880Sphk 10764880Sphk LIST_FOREACH(dp, &disklist, d_list) { 10864880Sphk d = dp->d_devsw->d_name; 10992074Sphk j = dev_stdclone(name, &e, d, &u); 11092074Sphk if (j == 0) 11164880Sphk continue; 11270058Sphk if (u > DKMAXUNIT) 11370058Sphk continue; 11464880Sphk p = RAW_PART; 11564880Sphk s = WHOLE_DISK_SLICE; 11664880Sphk pdev = makedev(dp->d_devsw->d_maj, dkmakeminor(u, s, p)); 11764880Sphk if (pdev->si_disk == NULL) 11864880Sphk continue; 11992074Sphk if (*e != '\0') { 12092074Sphk j = dev_stdclone(e, &e, "s", &s); 12192074Sphk if (j == 0) 12292074Sphk s = COMPATIBILITY_SLICE; 12392074Sphk else if (j == 1 || j == 2) 12464880Sphk s += BASE_SLICE - 1; 12592074Sphk if (!*e) 12692074Sphk ; /* ad0s1 case */ 12792074Sphk else if (e[1] != '\0') 12892074Sphk return; /* can never be a disk name */ 12992074Sphk else if (*e < 'a' || *e > 'h') 13092074Sphk return; /* can never be a disk name */ 13164880Sphk else 13292074Sphk p = *e - 'a'; 13364880Sphk } 13492074Sphk if (s == WHOLE_DISK_SLICE && p == RAW_PART) { 13592074Sphk return; 13692074Sphk } else if (s >= BASE_SLICE && p != RAW_PART) { 13785603Sphk *dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 13885603Sphk UID_ROOT, GID_OPERATOR, 0640, "%s%ds%d%c", 13985858Sphk pdev->si_devsw->d_name, u, s - BASE_SLICE + 1, 14085858Sphk p + 'a'); 14185858Sphk } else if (s >= BASE_SLICE) { 14285603Sphk *dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 14385858Sphk UID_ROOT, GID_OPERATOR, 0640, "%s%ds%d", 14485858Sphk pdev->si_devsw->d_name, u, s - BASE_SLICE + 1); 14585858Sphk make_dev_alias(*dev, "%s%ds%dc", 14685858Sphk pdev->si_devsw->d_name, u, s - BASE_SLICE + 1); 14785858Sphk } else { 14885858Sphk *dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 14992074Sphk UID_ROOT, GID_OPERATOR, 0640, "%s%d%c", 15092074Sphk pdev->si_devsw->d_name, u, p + 'a'); 15185858Sphk } 15277215Sphk dev_depends(pdev, *dev); 15364880Sphk return; 15464880Sphk } 15564880Sphk} 15664880Sphk 15764880Sphkstatic void 15862617Simpinherit_raw(dev_t pdev, dev_t dev) 15962617Simp{ 16062617Simp dev->si_disk = pdev->si_disk; 16162617Simp dev->si_drv1 = pdev->si_drv1; 16262617Simp dev->si_drv2 = pdev->si_drv2; 16362617Simp dev->si_iosize_max = pdev->si_iosize_max; 16462617Simp dev->si_bsize_phys = pdev->si_bsize_phys; 16562617Simp dev->si_bsize_best = pdev->si_bsize_best; 16662617Simp} 16762617Simp 16850565Sphkdev_t 16951215Sphkdisk_create(int unit, struct disk *dp, int flags, struct cdevsw *cdevsw, struct cdevsw *proto) 17050565Sphk{ 17164880Sphk static int once; 17277408Sphk dev_t dev; 17350565Sphk 17477215Sphk if (!once) { 17577215Sphk EVENTHANDLER_REGISTER(dev_clone, disk_clone, 0, 1000); 17677215Sphk once++; 17777215Sphk } 17877215Sphk 17951198Sphk bzero(dp, sizeof(*dp)); 180103714Sphk dp->d_label = malloc(sizeof *dp->d_label, M_DEVBUF, M_WAITOK|M_ZERO); 18151198Sphk 18277147Sphk if (proto->d_open != diskopen) { 18351215Sphk *proto = *cdevsw; 18451215Sphk proto->d_open = diskopen; 18551215Sphk proto->d_close = diskclose; 18651215Sphk proto->d_ioctl = diskioctl; 18751215Sphk proto->d_strategy = diskstrategy; 18851215Sphk proto->d_psize = diskpsize; 18950565Sphk } 19050565Sphk 19153437Sjkh if (bootverbose) 19253437Sjkh printf("Creating DISK %s%d\n", cdevsw->d_name, unit); 19351243Sphk dev = make_dev(proto, dkmakeminor(unit, WHOLE_DISK_SLICE, RAW_PART), 19464880Sphk UID_ROOT, GID_OPERATOR, 0640, "%s%d", cdevsw->d_name, unit); 19550565Sphk 19650565Sphk dev->si_disk = dp; 19750565Sphk dp->d_dev = dev; 19852917Sphk dp->d_dsflags = flags; 19951215Sphk dp->d_devsw = cdevsw; 20061717Sphk LIST_INSERT_HEAD(&disklist, dp, d_list); 20177215Sphk 20250565Sphk return (dev); 20350565Sphk} 20450565Sphk 20593496Sphkstatic int 20693496Sphkdiskdumpconf(u_int onoff, dev_t dev, struct disk *dp) 20750728Sphk{ 20893496Sphk struct dumperinfo di; 20950728Sphk struct disklabel *dl; 21050728Sphk 21193496Sphk if (!onoff) 21293496Sphk return(set_dumper(NULL)); 21350728Sphk dl = dsgetlabel(dev, dp->d_slice); 21450728Sphk if (!dl) 21550728Sphk return (ENXIO); 21693496Sphk bzero(&di, sizeof di); 21793496Sphk di.dumper = (dumper_t *)dp->d_devsw->d_dump; 21893496Sphk di.priv = dp->d_dev; 21993496Sphk di.blocksize = dl->d_secsize; 22093496Sphk di.mediaoffset = (off_t)(dl->d_partitions[dkpart(dev)].p_offset + 22193496Sphk dp->d_slice->dss_slices[dkslice(dev)].ds_offset) * DEV_BSIZE; 22293496Sphk di.mediasize = 22393496Sphk (off_t)(dl->d_partitions[dkpart(dev)].p_size) * DEV_BSIZE; 22493496Sphk return(set_dumper(&di)); 22550728Sphk} 22650728Sphk 22750728Sphkvoid 22850728Sphkdisk_invalidate (struct disk *disk) 22950728Sphk{ 23057325Ssos if (disk->d_slice) 23157325Ssos dsgone(&disk->d_slice); 23250728Sphk} 23350728Sphk 23450565Sphkvoid 23556767Sphkdisk_destroy(dev_t dev) 23650565Sphk{ 23761717Sphk LIST_REMOVE(dev->si_disk, d_list); 238103714Sphk free(dev->si_disk->d_label, M_DEVBUF); 23961717Sphk bzero(dev->si_disk, sizeof(*dev->si_disk)); 24057325Ssos dev->si_disk = NULL; 24157325Ssos destroy_dev(dev); 24250565Sphk return; 24350565Sphk} 24450565Sphk 24561717Sphkstruct disk * 24661717Sphkdisk_enumerate(struct disk *disk) 24761717Sphk{ 24861717Sphk if (!disk) 24961717Sphk return (LIST_FIRST(&disklist)); 25061717Sphk else 25161717Sphk return (LIST_NEXT(disk, d_list)); 25261717Sphk} 25361717Sphk 25461953Snbmstatic int 25562573Sphksysctl_disks(SYSCTL_HANDLER_ARGS) 25661953Snbm{ 25761953Snbm struct disk *disk; 25861953Snbm int error, first; 25961953Snbm 26061953Snbm disk = NULL; 26161953Snbm first = 1; 26261953Snbm 26361953Snbm while ((disk = disk_enumerate(disk))) { 26461953Snbm if (!first) { 26561953Snbm error = SYSCTL_OUT(req, " ", 1); 26661953Snbm if (error) 26761953Snbm return error; 26861953Snbm } else { 26961953Snbm first = 0; 27061953Snbm } 27161953Snbm error = SYSCTL_OUT(req, disk->d_dev->si_name, strlen(disk->d_dev->si_name)); 27261953Snbm if (error) 27361953Snbm return error; 27461953Snbm } 27561953Snbm error = SYSCTL_OUT(req, "", 1); 27661953Snbm return error; 27761953Snbm} 27861953Snbm 279102241SarchieSYSCTL_PROC(_kern, OID_AUTO, disks, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, 28061953Snbm sysctl_disks, "A", "names of available disks"); 28161953Snbm 28250728Sphk/* 28350728Sphk * The cdevsw functions 28450728Sphk */ 28550728Sphk 28650565Sphkstatic int 28783366Sjuliandiskopen(dev_t dev, int oflags, int devtype, struct thread *td) 28850565Sphk{ 28950565Sphk dev_t pdev; 29050565Sphk struct disk *dp; 29150565Sphk int error; 29250565Sphk 29350728Sphk error = 0; 29450565Sphk pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART); 29550728Sphk 29650565Sphk dp = pdev->si_disk; 29750565Sphk if (!dp) 29850565Sphk return (ENXIO); 29950728Sphk 30052917Sphk while (dp->d_flags & DISKFLAG_LOCK) { 30152917Sphk dp->d_flags |= DISKFLAG_WANTED; 30254815Sphk error = tsleep(dp, PRIBIO | PCATCH, "diskopen", hz); 30354815Sphk if (error) 30454815Sphk return (error); 30552917Sphk } 30652917Sphk dp->d_flags |= DISKFLAG_LOCK; 30752917Sphk 30851860Sphk if (!dsisopen(dp->d_slice)) { 30951878Ssos if (!pdev->si_iosize_max) 31051878Ssos pdev->si_iosize_max = dev->si_iosize_max; 31183366Sjulian error = dp->d_devsw->d_open(pdev, oflags, devtype, td); 312103714Sphk dp->d_label->d_secsize = dp->d_sectorsize; 313103714Sphk dp->d_label->d_secperunit = dp->d_mediasize / dp->d_sectorsize; 31451860Sphk } 31551826Sphk 31651826Sphk /* Inherit properties from the whole/raw dev_t */ 31762617Simp inherit_raw(pdev, dev); 31850728Sphk 31950728Sphk if (error) 32052917Sphk goto out; 32150728Sphk 322103714Sphk error = dsopen(dev, devtype, dp->d_dsflags, &dp->d_slice, dp->d_label); 32350565Sphk 32450728Sphk if (!dsisopen(dp->d_slice)) 32583366Sjulian dp->d_devsw->d_close(pdev, oflags, devtype, td); 32652917Sphkout: 32752917Sphk dp->d_flags &= ~DISKFLAG_LOCK; 32852917Sphk if (dp->d_flags & DISKFLAG_WANTED) { 32952917Sphk dp->d_flags &= ~DISKFLAG_WANTED; 33052917Sphk wakeup(dp); 33152917Sphk } 33250728Sphk 33350565Sphk return(error); 33450565Sphk} 33550565Sphk 33650565Sphkstatic int 33783366Sjuliandiskclose(dev_t dev, int fflag, int devtype, struct thread *td) 33850565Sphk{ 33950565Sphk struct disk *dp; 34050565Sphk int error; 34162617Simp dev_t pdev; 34250565Sphk 34350565Sphk error = 0; 34462617Simp pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART); 34562617Simp dp = pdev->si_disk; 34674206Ssos if (!dp) 34774206Ssos return (ENXIO); 34851822Sphk dsclose(dev, devtype, dp->d_slice); 34974206Ssos if (!dsisopen(dp->d_slice)) 35083366Sjulian error = dp->d_devsw->d_close(dp->d_dev, fflag, devtype, td); 35150565Sphk return (error); 35250565Sphk} 35350565Sphk 35450565Sphkstatic void 35559249Sphkdiskstrategy(struct bio *bp) 35650565Sphk{ 35750565Sphk dev_t pdev; 35850565Sphk struct disk *dp; 35950565Sphk 36062617Simp pdev = dkmodpart(dkmodslice(bp->bio_dev, WHOLE_DISK_SLICE), RAW_PART); 36162617Simp dp = pdev->si_disk; 36276361Sphk bp->bio_resid = bp->bio_bcount; 36362617Simp if (dp != bp->bio_dev->si_disk) 36462617Simp inherit_raw(pdev, bp->bio_dev); 36550565Sphk 36650565Sphk if (!dp) { 36776322Sphk biofinish(bp, NULL, ENXIO); 36850565Sphk return; 36950565Sphk } 37050565Sphk 37155763Sphk if (dscheck(bp, dp->d_slice) <= 0) { 37250565Sphk biodone(bp); 37350565Sphk return; 37450565Sphk } 37550565Sphk 37676324Sphk if (bp->bio_bcount == 0) { 37776324Sphk biodone(bp); 37876324Sphk return; 37976324Sphk } 38076324Sphk 38159623Sphk KASSERT(dp->d_devsw != NULL, ("NULL devsw")); 38259623Sphk KASSERT(dp->d_devsw->d_strategy != NULL, ("NULL d_strategy")); 38351215Sphk dp->d_devsw->d_strategy(bp); 38450565Sphk return; 38550565Sphk 38650565Sphk} 38750565Sphk 38850565Sphkstatic int 38983366Sjuliandiskioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct thread *td) 39050565Sphk{ 39150565Sphk struct disk *dp; 39250565Sphk int error; 39393496Sphk u_int u; 39462617Simp dev_t pdev; 39550565Sphk 39662617Simp pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART); 39762617Simp dp = pdev->si_disk; 39874206Ssos if (!dp) 39974206Ssos return (ENXIO); 40094272Sphk if (cmd == DIOCSKERNELDUMP) { 40193496Sphk u = *(u_int *)data; 40293496Sphk return (diskdumpconf(u, dev, dp)); 40393496Sphk } 40494287Sphk if (cmd == DIOCGFRONTSTUFF) { 40594287Sphk *(off_t *)data = 8192; /* XXX: crude but enough) */ 40694287Sphk return (0); 40794287Sphk } 40850565Sphk error = dsioctl(dev, cmd, data, fflag, &dp->d_slice); 40950565Sphk if (error == ENOIOCTL) 41083366Sjulian error = dp->d_devsw->d_ioctl(dev, cmd, data, fflag, td); 41150565Sphk return (error); 41250565Sphk} 41350565Sphk 41450565Sphkstatic int 41550565Sphkdiskpsize(dev_t dev) 41650565Sphk{ 41750565Sphk struct disk *dp; 41850728Sphk dev_t pdev; 41950565Sphk 42062617Simp pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART); 42162617Simp dp = pdev->si_disk; 42262617Simp if (!dp) 42362617Simp return (-1); 42462617Simp if (dp != dev->si_disk) { 42550728Sphk dev->si_drv1 = pdev->si_drv1; 42650728Sphk dev->si_drv2 = pdev->si_drv2; 42750728Sphk /* XXX: don't set bp->b_dev->si_disk (?) */ 42850728Sphk } 42950565Sphk return (dssize(dev, &dp->d_slice)); 43050565Sphk} 43151111Sjulian 43251111SjulianSYSCTL_INT(_debug_sizeof, OID_AUTO, disklabel, CTLFLAG_RD, 43351111Sjulian 0, sizeof(struct disklabel), "sizeof(struct disklabel)"); 43451111Sjulian 43551111SjulianSYSCTL_INT(_debug_sizeof, OID_AUTO, diskslices, CTLFLAG_RD, 43651111Sjulian 0, sizeof(struct diskslices), "sizeof(struct diskslices)"); 43751111Sjulian 43851111SjulianSYSCTL_INT(_debug_sizeof, OID_AUTO, disk, CTLFLAG_RD, 43951111Sjulian 0, sizeof(struct disk), "sizeof(struct disk)"); 44092074Sphk 44192074Sphk#endif 442103675Sphk 443103675Sphk/*- 444103675Sphk * Disk error is the preface to plaintive error messages 445103675Sphk * about failing disk transfers. It prints messages of the form 446103675Sphk * "hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347" 447103675Sphk * blkdone should be -1 if the position of the error is unknown. 448103675Sphk * The message is printed with printf. 449103675Sphk */ 450103675Sphkvoid 451103675Sphkdisk_err(struct bio *bp, const char *what, int blkdone, int nl) 452103675Sphk{ 453103675Sphk daddr_t sn; 454103675Sphk 455103675Sphk printf("%s: %s", devtoname(bp->bio_dev), what); 456103675Sphk switch(bp->bio_cmd) { 457103675Sphk case BIO_READ: printf("cmd=read"); break; 458103675Sphk case BIO_WRITE: printf("cmd=write"); break; 459103675Sphk case BIO_DELETE: printf("cmd=delete"); break; 460103675Sphk case BIO_GETATTR: printf("cmd=getattr"); break; 461103675Sphk case BIO_SETATTR: printf("cmd=setattr"); break; 462103675Sphk default: printf("cmd=%x", bp->bio_cmd); break; 463103675Sphk } 464103675Sphk sn = bp->bio_blkno; 465103675Sphk if (bp->bio_bcount <= DEV_BSIZE) { 466103675Sphk printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : ""); 467103675Sphk return; 468103675Sphk } 469103675Sphk if (blkdone >= 0) { 470103675Sphk sn += blkdone; 471103675Sphk printf("fsbn %jd of ", (intmax_t)sn); 472103675Sphk } 473103675Sphk printf("%jd-%jd", (intmax_t)bp->bio_blkno, 474103675Sphk (intmax_t)(bp->bio_blkno + (bp->bio_bcount - 1) / DEV_BSIZE)); 475103675Sphk if (nl) 476103675Sphk printf("\n"); 477103675Sphk} 478103683Sphk 479103683Sphk#ifdef notquite 480103683Sphk/* 481103683Sphk * Mutex to use when delaying niced I/O bound processes in bioq_disksort(). 482103683Sphk */ 483103683Sphkstatic struct mtx dksort_mtx; 484103683Sphkstatic void 485103683Sphkdksort_init(void) 486103683Sphk{ 487103683Sphk 488103683Sphk mtx_init(&dksort_mtx, "dksort", NULL, MTX_DEF); 489103683Sphk} 490103683SphkSYSINIT(dksort, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, dksort_init, NULL) 491103683Sphk#endif 492103683Sphk 493103683Sphk/* 494103683Sphk * Seek sort for disks. 495103683Sphk * 496103683Sphk * The buf_queue keep two queues, sorted in ascending block order. The first 497103683Sphk * queue holds those requests which are positioned after the current block 498103683Sphk * (in the first request); the second, which starts at queue->switch_point, 499103683Sphk * holds requests which came in after their block number was passed. Thus 500103683Sphk * we implement a one way scan, retracting after reaching the end of the drive 501103683Sphk * to the first request on the second queue, at which time it becomes the 502103683Sphk * first queue. 503103683Sphk * 504103683Sphk * A one-way scan is natural because of the way UNIX read-ahead blocks are 505103683Sphk * allocated. 506103683Sphk */ 507103683Sphk 508103683Sphkvoid 509103683Sphkbioq_disksort(bioq, bp) 510103683Sphk struct bio_queue_head *bioq; 511103683Sphk struct bio *bp; 512103683Sphk{ 513103683Sphk struct bio *bq; 514103683Sphk struct bio *bn; 515103683Sphk struct bio *be; 516103683Sphk 517103683Sphk#ifdef notquite 518103683Sphk struct thread *td = curthread; 519103683Sphk 520103683Sphk if (td && td->td_ksegrp->kg_nice > 0) { 521103683Sphk TAILQ_FOREACH(bn, &bioq->queue, bio_queue) 522103683Sphk if (BIOTOBUF(bp)->b_vp != BIOTOBUF(bn)->b_vp) 523103683Sphk break; 524103683Sphk if (bn != NULL) { 525103683Sphk mtx_lock(&dksort_mtx); 526103683Sphk msleep(&dksort_mtx, &dksort_mtx, 527103683Sphk PPAUSE | PCATCH | PDROP, "ioslow", 528103683Sphk td->td_ksegrp->kg_nice); 529103683Sphk } 530103683Sphk } 531103683Sphk#endif 532103683Sphk if (!atomic_cmpset_int(&bioq->busy, 0, 1)) 533103683Sphk panic("Recursing in bioq_disksort()"); 534103683Sphk be = TAILQ_LAST(&bioq->queue, bio_queue); 535103683Sphk /* 536103683Sphk * If the queue is empty or we are an 537103683Sphk * ordered transaction, then it's easy. 538103683Sphk */ 539103683Sphk if ((bq = bioq_first(bioq)) == NULL) { 540103683Sphk bioq_insert_tail(bioq, bp); 541103683Sphk bioq->busy = 0; 542103683Sphk return; 543103683Sphk } else if (bioq->insert_point != NULL) { 544103683Sphk 545103683Sphk /* 546103683Sphk * A certain portion of the list is 547103683Sphk * "locked" to preserve ordering, so 548103683Sphk * we can only insert after the insert 549103683Sphk * point. 550103683Sphk */ 551103683Sphk bq = bioq->insert_point; 552103683Sphk } else { 553103683Sphk 554103683Sphk /* 555103683Sphk * If we lie before the last removed (currently active) 556103683Sphk * request, and are not inserting ourselves into the 557103683Sphk * "locked" portion of the list, then we must add ourselves 558103683Sphk * to the second request list. 559103683Sphk */ 560103683Sphk if (bp->bio_pblkno < bioq->last_pblkno) { 561103683Sphk 562103683Sphk bq = bioq->switch_point; 563103683Sphk /* 564103683Sphk * If we are starting a new secondary list, 565103683Sphk * then it's easy. 566103683Sphk */ 567103683Sphk if (bq == NULL) { 568103683Sphk bioq->switch_point = bp; 569103683Sphk bioq_insert_tail(bioq, bp); 570103683Sphk bioq->busy = 0; 571103683Sphk return; 572103683Sphk } 573103683Sphk /* 574103683Sphk * If we lie ahead of the current switch point, 575103683Sphk * insert us before the switch point and move 576103683Sphk * the switch point. 577103683Sphk */ 578103683Sphk if (bp->bio_pblkno < bq->bio_pblkno) { 579103683Sphk bioq->switch_point = bp; 580103683Sphk TAILQ_INSERT_BEFORE(bq, bp, bio_queue); 581103683Sphk bioq->busy = 0; 582103683Sphk return; 583103683Sphk } 584103683Sphk } else { 585103683Sphk if (bioq->switch_point != NULL) 586103683Sphk be = TAILQ_PREV(bioq->switch_point, 587103683Sphk bio_queue, bio_queue); 588103683Sphk /* 589103683Sphk * If we lie between last_pblkno and bq, 590103683Sphk * insert before bq. 591103683Sphk */ 592103683Sphk if (bp->bio_pblkno < bq->bio_pblkno) { 593103683Sphk TAILQ_INSERT_BEFORE(bq, bp, bio_queue); 594103683Sphk bioq->busy = 0; 595103683Sphk return; 596103683Sphk } 597103683Sphk } 598103683Sphk } 599103683Sphk 600103683Sphk /* 601103683Sphk * Request is at/after our current position in the list. 602103683Sphk * Optimize for sequential I/O by seeing if we go at the tail. 603103683Sphk */ 604103683Sphk if (bp->bio_pblkno > be->bio_pblkno) { 605103683Sphk TAILQ_INSERT_AFTER(&bioq->queue, be, bp, bio_queue); 606103683Sphk bioq->busy = 0; 607103683Sphk return; 608103683Sphk } 609103683Sphk 610103683Sphk /* Otherwise, insertion sort */ 611103683Sphk while ((bn = TAILQ_NEXT(bq, bio_queue)) != NULL) { 612103683Sphk 613103683Sphk /* 614103683Sphk * We want to go after the current request if it is the end 615103683Sphk * of the first request list, or if the next request is a 616103683Sphk * larger cylinder than our request. 617103683Sphk */ 618103683Sphk if (bn == bioq->switch_point 619103683Sphk || bp->bio_pblkno < bn->bio_pblkno) 620103683Sphk break; 621103683Sphk bq = bn; 622103683Sphk } 623103683Sphk TAILQ_INSERT_AFTER(&bioq->queue, bq, bp, bio_queue); 624103683Sphk bioq->busy = 0; 625103683Sphk} 626103683Sphk 627103683Sphk 628