blkfront.c revision 189699
1/*- 2 * All rights reserved. 3 * 4 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 5 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 6 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 7 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 8 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 9 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 10 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 11 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 12 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 13 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 14 * SUCH DAMAGE. 15 * 16 */ 17 18/* 19 * XenoBSD block device driver 20 */ 21 22#include <sys/cdefs.h> 23__FBSDID("$FreeBSD: head/sys/dev/xen/blkfront/blkfront.c 189699 2009-03-11 15:30:12Z dfr $"); 24 25#include <sys/param.h> 26#include <sys/systm.h> 27#include <sys/malloc.h> 28#include <sys/kernel.h> 29#include <vm/vm.h> 30#include <vm/pmap.h> 31 32#include <sys/bio.h> 33#include <sys/bus.h> 34#include <sys/conf.h> 35#include <sys/module.h> 36 37#include <machine/bus.h> 38#include <sys/rman.h> 39#include <machine/resource.h> 40#include <machine/intr_machdep.h> 41#include <machine/vmparam.h> 42 43#include <machine/xen/xen-os.h> 44#include <machine/xen/xenfunc.h> 45#include <xen/hypervisor.h> 46#include <xen/xen_intr.h> 47#include <xen/evtchn.h> 48#include <xen/gnttab.h> 49#include <xen/interface/grant_table.h> 50#include <xen/interface/io/protocols.h> 51#include <xen/xenbus/xenbusvar.h> 52 53#include <geom/geom_disk.h> 54 55#include <dev/xen/blkfront/block.h> 56 57#include "xenbus_if.h" 58 59#define ASSERT(S) KASSERT(S, (#S)) 60/* prototypes */ 61struct xb_softc; 62static void xb_startio(struct xb_softc *sc); 63static void connect(device_t, struct blkfront_info *); 64static void blkfront_closing(device_t); 65static int blkfront_detach(device_t); 66static int talk_to_backend(device_t, struct blkfront_info *); 67static int setup_blkring(device_t, struct blkfront_info *); 68static void blkif_int(void *); 69#if 0 70static void blkif_restart_queue(void *arg); 71#endif 72static void blkif_recover(struct blkfront_info *); 73static void blkif_completion(struct blk_shadow *); 74static void blkif_free(struct blkfront_info *, int); 75 76#define GRANT_INVALID_REF 0 77#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) 78 79LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head; 80 81/* Control whether runtime update of vbds is enabled. */ 82#define ENABLE_VBD_UPDATE 0 83 84#if ENABLE_VBD_UPDATE 85static void vbd_update(void); 86#endif 87 88 89#define BLKIF_STATE_DISCONNECTED 0 90#define BLKIF_STATE_CONNECTED 1 91#define BLKIF_STATE_SUSPENDED 2 92 93#ifdef notyet 94static char *blkif_state_name[] = { 95 [BLKIF_STATE_DISCONNECTED] = "disconnected", 96 [BLKIF_STATE_CONNECTED] = "connected", 97 [BLKIF_STATE_SUSPENDED] = "closed", 98}; 99 100static char * blkif_status_name[] = { 101 [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", 102 [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", 103 [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", 104 [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", 105}; 106#endif 107#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args) 108#if 0 109#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) 110#else 111#define DPRINTK(fmt, args...) 112#endif 113 114static grant_ref_t gref_head; 115#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 116 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) 117 118static void kick_pending_request_queues(struct blkfront_info *); 119static int blkif_open(struct disk *dp); 120static int blkif_close(struct disk *dp); 121static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); 122static int blkif_queue_request(struct bio *bp); 123static void xb_strategy(struct bio *bp); 124 125 126 127/* XXX move to xb_vbd.c when VBD update support is added */ 128#define MAX_VBDS 64 129 130#define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ 131#define XBD_SECTOR_SHFT 9 132 133static struct mtx blkif_io_lock; 134 135static vm_paddr_t 136pfn_to_mfn(vm_paddr_t pfn) 137{ 138 return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT); 139} 140 141/* 142 * Translate Linux major/minor to an appropriate name and unit 143 * number. For HVM guests, this allows us to use the same drive names 144 * with blkfront as the emulated drives, easing transition slightly. 145 */ 146static void 147blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name) 148{ 149 static struct vdev_info { 150 int major; 151 int shift; 152 int base; 153 const char *name; 154 } info[] = { 155 {3, 6, 0, "ad"}, /* ide0 */ 156 {22, 6, 2, "ad"}, /* ide1 */ 157 {33, 6, 4, "ad"}, /* ide2 */ 158 {34, 6, 6, "ad"}, /* ide3 */ 159 {56, 6, 8, "ad"}, /* ide4 */ 160 {57, 6, 10, "ad"}, /* ide5 */ 161 {88, 6, 12, "ad"}, /* ide6 */ 162 {89, 6, 14, "ad"}, /* ide7 */ 163 {90, 6, 16, "ad"}, /* ide8 */ 164 {91, 6, 18, "ad"}, /* ide9 */ 165 166 {8, 4, 0, "da"}, /* scsi disk0 */ 167 {65, 4, 16, "da"}, /* scsi disk1 */ 168 {66, 4, 32, "da"}, /* scsi disk2 */ 169 {67, 4, 48, "da"}, /* scsi disk3 */ 170 {68, 4, 64, "da"}, /* scsi disk4 */ 171 {69, 4, 80, "da"}, /* scsi disk5 */ 172 {70, 4, 96, "da"}, /* scsi disk6 */ 173 {71, 4, 112, "da"}, /* scsi disk7 */ 174 {128, 4, 128, "da"}, /* scsi disk8 */ 175 {129, 4, 144, "da"}, /* scsi disk9 */ 176 {130, 4, 160, "da"}, /* scsi disk10 */ 177 {131, 4, 176, "da"}, /* scsi disk11 */ 178 {132, 4, 192, "da"}, /* scsi disk12 */ 179 {133, 4, 208, "da"}, /* scsi disk13 */ 180 {134, 4, 224, "da"}, /* scsi disk14 */ 181 {135, 4, 240, "da"}, /* scsi disk15 */ 182 183 {202, 4, 0, "xbd"}, /* xbd */ 184 185 {0, 0, 0, NULL}, 186 }; 187 int major = vdevice >> 8; 188 int minor = vdevice & 0xff; 189 int i; 190 191 if (vdevice & (1 << 28)) { 192 *unit = (vdevice & ((1 << 28) - 1)) >> 8; 193 *name = "xbd"; 194 } 195 196 for (i = 0; info[i].major; i++) { 197 if (info[i].major == major) { 198 *unit = info[i].base + (minor >> info[i].shift); 199 *name = info[i].name; 200 return; 201 } 202 } 203 204 *unit = minor >> 4; 205 *name = "xbd"; 206} 207 208int 209xlvbd_add(device_t dev, blkif_sector_t capacity, 210 int vdevice, uint16_t vdisk_info, uint16_t sector_size, 211 struct blkfront_info *info) 212{ 213 struct xb_softc *sc; 214 int unit, error = 0; 215 const char *name; 216 217 blkfront_vdevice_to_unit(vdevice, &unit, &name); 218 219 sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 220 sc->xb_unit = unit; 221 sc->xb_info = info; 222 info->sc = sc; 223 224 if (strcmp(name, "xbd")) 225 device_printf(dev, "attaching as %s%d\n", name, unit); 226 227 memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); 228 sc->xb_disk = disk_alloc(); 229 sc->xb_disk->d_unit = sc->xb_unit; 230 sc->xb_disk->d_open = blkif_open; 231 sc->xb_disk->d_close = blkif_close; 232 sc->xb_disk->d_ioctl = blkif_ioctl; 233 sc->xb_disk->d_strategy = xb_strategy; 234 sc->xb_disk->d_name = name; 235 sc->xb_disk->d_drv1 = sc; 236 sc->xb_disk->d_sectorsize = sector_size; 237 238 /* XXX */ 239 sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT; 240#if 0 241 sc->xb_disk->d_maxsize = DFLTPHYS; 242#else /* XXX: xen can't handle large single i/o requests */ 243 sc->xb_disk->d_maxsize = 4096; 244#endif 245#ifdef notyet 246 XENPRINTF("attaching device 0x%x unit %d capacity %llu\n", 247 xb_diskinfo[sc->xb_unit].device, sc->xb_unit, 248 sc->xb_disk->d_mediasize); 249#endif 250 sc->xb_disk->d_flags = 0; 251 disk_create(sc->xb_disk, DISK_VERSION_00); 252 bioq_init(&sc->xb_bioq); 253 254 return error; 255} 256 257void 258xlvbd_del(struct blkfront_info *info) 259{ 260 struct xb_softc *sc; 261 262 sc = info->sc; 263 disk_destroy(sc->xb_disk); 264} 265/************************ end VBD support *****************/ 266 267/* 268 * Read/write routine for a buffer. Finds the proper unit, place it on 269 * the sortq and kick the controller. 270 */ 271static void 272xb_strategy(struct bio *bp) 273{ 274 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 275 276 /* bogus disk? */ 277 if (sc == NULL) { 278 bp->bio_error = EINVAL; 279 bp->bio_flags |= BIO_ERROR; 280 goto bad; 281 } 282 283 DPRINTK(""); 284 285 /* 286 * Place it in the queue of disk activities for this disk 287 */ 288 mtx_lock(&blkif_io_lock); 289 bioq_disksort(&sc->xb_bioq, bp); 290 291 xb_startio(sc); 292 mtx_unlock(&blkif_io_lock); 293 return; 294 295 bad: 296 /* 297 * Correctly set the bio to indicate a failed tranfer. 298 */ 299 bp->bio_resid = bp->bio_bcount; 300 biodone(bp); 301 return; 302} 303 304static int 305blkfront_probe(device_t dev) 306{ 307 308 if (!strcmp(xenbus_get_type(dev), "vbd")) { 309 device_set_desc(dev, "Virtual Block Device"); 310 device_quiet(dev); 311 return (0); 312 } 313 314 return (ENXIO); 315} 316 317/* 318 * Setup supplies the backend dir, virtual device. We place an event 319 * channel and shared frame entries. We watch backend to wait if it's 320 * ok. 321 */ 322static int 323blkfront_attach(device_t dev) 324{ 325 int error, vdevice, i, unit; 326 struct blkfront_info *info; 327 const char *name; 328 329 /* FIXME: Use dynamic device id if this is not set. */ 330 error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev), 331 "virtual-device", NULL, "%i", &vdevice); 332 if (error) { 333 xenbus_dev_fatal(dev, error, "reading virtual-device"); 334 printf("couldn't find virtual device"); 335 return (error); 336 } 337 338 blkfront_vdevice_to_unit(vdevice, &unit, &name); 339 if (!strcmp(name, "xbd")) 340 device_set_unit(dev, unit); 341 342 info = device_get_softc(dev); 343 344 /* 345 * XXX debug only 346 */ 347 for (i = 0; i < sizeof(*info); i++) 348 if (((uint8_t *)info)[i] != 0) 349 panic("non-null memory"); 350 351 info->shadow_free = 0; 352 info->xbdev = dev; 353 info->vdevice = vdevice; 354 info->connected = BLKIF_STATE_DISCONNECTED; 355 356 /* work queue needed ? */ 357 for (i = 0; i < BLK_RING_SIZE; i++) 358 info->shadow[i].req.id = i+1; 359 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 360 361 /* Front end dir is a number, which is used as the id. */ 362 info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0); 363 364 error = talk_to_backend(dev, info); 365 if (error) 366 return (error); 367 368 return (0); 369} 370 371static int 372blkfront_suspend(device_t dev) 373{ 374 struct blkfront_info *info = device_get_softc(dev); 375 376 /* Prevent new requests being issued until we fix things up. */ 377 mtx_lock(&blkif_io_lock); 378 info->connected = BLKIF_STATE_SUSPENDED; 379 mtx_unlock(&blkif_io_lock); 380 381 return (0); 382} 383 384static int 385blkfront_resume(device_t dev) 386{ 387 struct blkfront_info *info = device_get_softc(dev); 388 int err; 389 390 DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev)); 391 392 blkif_free(info, 1); 393 err = talk_to_backend(dev, info); 394 if (info->connected == BLKIF_STATE_SUSPENDED && !err) 395 blkif_recover(info); 396 397 return (err); 398} 399 400/* Common code used when first setting up, and when resuming. */ 401static int 402talk_to_backend(device_t dev, struct blkfront_info *info) 403{ 404 const char *message = NULL; 405 struct xenbus_transaction xbt; 406 int err; 407 408 /* Create shared ring, alloc event channel. */ 409 err = setup_blkring(dev, info); 410 if (err) 411 goto out; 412 413 again: 414 err = xenbus_transaction_start(&xbt); 415 if (err) { 416 xenbus_dev_fatal(dev, err, "starting transaction"); 417 goto destroy_blkring; 418 } 419 420 err = xenbus_printf(xbt, xenbus_get_node(dev), 421 "ring-ref","%u", info->ring_ref); 422 if (err) { 423 message = "writing ring-ref"; 424 goto abort_transaction; 425 } 426 err = xenbus_printf(xbt, xenbus_get_node(dev), 427 "event-channel", "%u", irq_to_evtchn_port(info->irq)); 428 if (err) { 429 message = "writing event-channel"; 430 goto abort_transaction; 431 } 432 err = xenbus_printf(xbt, xenbus_get_node(dev), 433 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); 434 if (err) { 435 message = "writing protocol"; 436 goto abort_transaction; 437 } 438 439 err = xenbus_transaction_end(xbt, 0); 440 if (err) { 441 if (err == EAGAIN) 442 goto again; 443 xenbus_dev_fatal(dev, err, "completing transaction"); 444 goto destroy_blkring; 445 } 446 xenbus_set_state(dev, XenbusStateInitialised); 447 448 return 0; 449 450 abort_transaction: 451 xenbus_transaction_end(xbt, 1); 452 if (message) 453 xenbus_dev_fatal(dev, err, "%s", message); 454 destroy_blkring: 455 blkif_free(info, 0); 456 out: 457 return err; 458} 459 460static int 461setup_blkring(device_t dev, struct blkfront_info *info) 462{ 463 blkif_sring_t *sring; 464 int error; 465 466 info->ring_ref = GRANT_INVALID_REF; 467 468 sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); 469 if (sring == NULL) { 470 xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring"); 471 return ENOMEM; 472 } 473 SHARED_RING_INIT(sring); 474 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); 475 476 error = xenbus_grant_ring(dev, 477 (vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref); 478 if (error) { 479 free(sring, M_DEVBUF); 480 info->ring.sring = NULL; 481 goto fail; 482 } 483 484 error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev), 485 "xbd", (driver_intr_t *)blkif_int, info, 486 INTR_TYPE_BIO | INTR_MPSAFE, &info->irq); 487 if (error) { 488 xenbus_dev_fatal(dev, error, 489 "bind_evtchn_to_irqhandler failed"); 490 goto fail; 491 } 492 493 return (0); 494 fail: 495 blkif_free(info, 0); 496 return (error); 497} 498 499 500/** 501 * Callback received when the backend's state changes. 502 */ 503static void 504blkfront_backend_changed(device_t dev, XenbusState backend_state) 505{ 506 struct blkfront_info *info = device_get_softc(dev); 507 508 DPRINTK("backend_state=%d\n", backend_state); 509 510 switch (backend_state) { 511 case XenbusStateUnknown: 512 case XenbusStateInitialising: 513 case XenbusStateInitWait: 514 case XenbusStateInitialised: 515 case XenbusStateClosed: 516 case XenbusStateReconfigured: 517 case XenbusStateReconfiguring: 518 break; 519 520 case XenbusStateConnected: 521 connect(dev, info); 522 break; 523 524 case XenbusStateClosing: 525 if (info->users > 0) 526 xenbus_dev_error(dev, -EBUSY, 527 "Device in use; refusing to close"); 528 else 529 blkfront_closing(dev); 530#ifdef notyet 531 bd = bdget(info->dev); 532 if (bd == NULL) 533 xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); 534 535 down(&bd->bd_sem); 536 if (info->users > 0) 537 xenbus_dev_error(dev, -EBUSY, 538 "Device in use; refusing to close"); 539 else 540 blkfront_closing(dev); 541 up(&bd->bd_sem); 542 bdput(bd); 543#endif 544 } 545} 546 547/* 548** Invoked when the backend is finally 'ready' (and has told produced 549** the details about the physical device - #sectors, size, etc). 550*/ 551static void 552connect(device_t dev, struct blkfront_info *info) 553{ 554 unsigned long sectors, sector_size; 555 unsigned int binfo; 556 int err; 557 558 if( (info->connected == BLKIF_STATE_CONNECTED) || 559 (info->connected == BLKIF_STATE_SUSPENDED) ) 560 return; 561 562 DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); 563 564 err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), 565 "sectors", "%lu", §ors, 566 "info", "%u", &binfo, 567 "sector-size", "%lu", §or_size, 568 NULL); 569 if (err) { 570 xenbus_dev_fatal(dev, err, 571 "reading backend fields at %s", 572 xenbus_get_otherend_path(dev)); 573 return; 574 } 575 err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), 576 "feature-barrier", "%lu", &info->feature_barrier, 577 NULL); 578 if (err) 579 info->feature_barrier = 0; 580 581 device_printf(dev, "%juMB <%s> at %s", 582 (uintmax_t) sectors / (1048576 / sector_size), 583 device_get_desc(dev), 584 xenbus_get_node(dev)); 585 bus_print_child_footer(device_get_parent(dev), dev); 586 587 xlvbd_add(dev, sectors, info->vdevice, binfo, sector_size, info); 588 589 (void)xenbus_set_state(dev, XenbusStateConnected); 590 591 /* Kick pending requests. */ 592 mtx_lock(&blkif_io_lock); 593 info->connected = BLKIF_STATE_CONNECTED; 594 kick_pending_request_queues(info); 595 mtx_unlock(&blkif_io_lock); 596 info->is_ready = 1; 597 598#if 0 599 add_disk(info->gd); 600#endif 601} 602 603/** 604 * Handle the change of state of the backend to Closing. We must delete our 605 * device-layer structures now, to ensure that writes are flushed through to 606 * the backend. Once is this done, we can switch to Closed in 607 * acknowledgement. 608 */ 609static void 610blkfront_closing(device_t dev) 611{ 612 struct blkfront_info *info = device_get_softc(dev); 613 614 DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev)); 615 616 if (info->mi) { 617 DPRINTK("Calling xlvbd_del\n"); 618 xlvbd_del(info); 619 info->mi = NULL; 620 } 621 622 xenbus_set_state(dev, XenbusStateClosed); 623} 624 625 626static int 627blkfront_detach(device_t dev) 628{ 629 struct blkfront_info *info = device_get_softc(dev); 630 631 DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev)); 632 633 blkif_free(info, 0); 634 635 return 0; 636} 637 638 639static inline int 640GET_ID_FROM_FREELIST(struct blkfront_info *info) 641{ 642 unsigned long nfree = info->shadow_free; 643 644 KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree)); 645 info->shadow_free = info->shadow[nfree].req.id; 646 info->shadow[nfree].req.id = 0x0fffffee; /* debug */ 647 return nfree; 648} 649 650static inline void 651ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id) 652{ 653 info->shadow[id].req.id = info->shadow_free; 654 info->shadow[id].request = 0; 655 info->shadow_free = id; 656} 657 658static inline void 659flush_requests(struct blkfront_info *info) 660{ 661 int notify; 662 663 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); 664 665 if (notify) 666 notify_remote_via_irq(info->irq); 667} 668 669static void 670kick_pending_request_queues(struct blkfront_info *info) 671{ 672 /* XXX check if we can't simplify */ 673#if 0 674 if (!RING_FULL(&info->ring)) { 675 /* Re-enable calldowns. */ 676 blk_start_queue(info->rq); 677 /* Kick things off immediately. */ 678 do_blkif_request(info->rq); 679 } 680#endif 681 if (!RING_FULL(&info->ring)) { 682#if 0 683 sc = LIST_FIRST(&xbsl_head); 684 LIST_REMOVE(sc, entry); 685 /* Re-enable calldowns. */ 686 blk_start_queue(di->rq); 687#endif 688 /* Kick things off immediately. */ 689 xb_startio(info->sc); 690 } 691} 692 693#if 0 694/* XXX */ 695static void blkif_restart_queue(void *arg) 696{ 697 struct blkfront_info *info = (struct blkfront_info *)arg; 698 699 mtx_lock(&blkif_io_lock); 700 kick_pending_request_queues(info); 701 mtx_unlock(&blkif_io_lock); 702} 703#endif 704 705static void blkif_restart_queue_callback(void *arg) 706{ 707#if 0 708 struct blkfront_info *info = (struct blkfront_info *)arg; 709 /* XXX BSD equiv ? */ 710 711 schedule_work(&info->work); 712#endif 713} 714 715static int 716blkif_open(struct disk *dp) 717{ 718 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 719 720 if (sc == NULL) { 721 printf("xb%d: not found", sc->xb_unit); 722 return (ENXIO); 723 } 724 725 sc->xb_flags |= XB_OPEN; 726 sc->xb_info->users++; 727 return (0); 728} 729 730static int 731blkif_close(struct disk *dp) 732{ 733 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 734 735 if (sc == NULL) 736 return (ENXIO); 737 sc->xb_flags &= ~XB_OPEN; 738 if (--(sc->xb_info->users) == 0) { 739 /* Check whether we have been instructed to close. We will 740 have ignored this request initially, as the device was 741 still mounted. */ 742 device_t dev = sc->xb_info->xbdev; 743 XenbusState state = 744 xenbus_read_driver_state(xenbus_get_otherend_path(dev)); 745 746 if (state == XenbusStateClosing) 747 blkfront_closing(dev); 748 } 749 return (0); 750} 751 752static int 753blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) 754{ 755 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 756 757 if (sc == NULL) 758 return (ENXIO); 759 760 return (ENOTTY); 761} 762 763 764/* 765 * blkif_queue_request 766 * 767 * request block io 768 * 769 * id: for guest use only. 770 * operation: BLKIF_OP_{READ,WRITE,PROBE} 771 * buffer: buffer to read/write into. this should be a 772 * virtual address in the guest os. 773 */ 774static int blkif_queue_request(struct bio *bp) 775{ 776 caddr_t alignbuf; 777 vm_paddr_t buffer_ma; 778 blkif_request_t *ring_req; 779 unsigned long id; 780 uint64_t fsect, lsect; 781 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 782 struct blkfront_info *info = sc->xb_info; 783 int ref; 784 785 if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED)) 786 return 1; 787 788 if (gnttab_alloc_grant_references( 789 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 790 gnttab_request_free_callback( 791 &info->callback, 792 blkif_restart_queue_callback, 793 info, 794 BLKIF_MAX_SEGMENTS_PER_REQUEST); 795 return 1; 796 } 797 798 /* Check if the buffer is properly aligned */ 799 if ((vm_offset_t)bp->bio_data & PAGE_MASK) { 800 int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE : 801 PAGE_SIZE; 802 caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF, 803 M_NOWAIT); 804 805 alignbuf = (char *)roundup2((u_long)newbuf, align); 806 807 /* save a copy of the current buffer */ 808 bp->bio_driver1 = newbuf; 809 bp->bio_driver2 = alignbuf; 810 811 /* Copy the data for a write */ 812 if (bp->bio_cmd == BIO_WRITE) 813 bcopy(bp->bio_data, alignbuf, bp->bio_bcount); 814 } else 815 alignbuf = bp->bio_data; 816 817 /* Fill out a communications ring structure. */ 818 ring_req = RING_GET_REQUEST(&info->ring, 819 info->ring.req_prod_pvt); 820 id = GET_ID_FROM_FREELIST(info); 821 info->shadow[id].request = (unsigned long)bp; 822 823 ring_req->id = id; 824 ring_req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : 825 BLKIF_OP_WRITE; 826 827 ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno; 828 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; 829 830 ring_req->nr_segments = 0; /* XXX not doing scatter/gather since buffer 831 * chaining is not supported. 832 */ 833 834 buffer_ma = vtomach(alignbuf); 835 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; 836 lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1; 837 /* install a grant reference. */ 838 ref = gnttab_claim_grant_reference(&gref_head); 839 KASSERT( ref != -ENOSPC, ("grant_reference failed") ); 840 841 gnttab_grant_foreign_access_ref( 842 ref, 843 xenbus_get_otherend_id(info->xbdev), 844 buffer_ma >> PAGE_SHIFT, 845 ring_req->operation & 1 ); /* ??? */ 846 info->shadow[id].frame[ring_req->nr_segments] = 847 buffer_ma >> PAGE_SHIFT; 848 849 ring_req->seg[ring_req->nr_segments] = 850 (struct blkif_request_segment) { 851 .gref = ref, 852 .first_sect = fsect, 853 .last_sect = lsect }; 854 855 ring_req->nr_segments++; 856 KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0, 857 ("XEN buffer must be sector aligned")); 858 KASSERT(lsect <= 7, 859 ("XEN disk driver data cannot cross a page boundary")); 860 861 buffer_ma &= ~PAGE_MASK; 862 863 info->ring.req_prod_pvt++; 864 865 /* Keep a private copy so we can reissue requests when recovering. */ 866 info->shadow[id].req = *ring_req; 867 868 gnttab_free_grant_references(gref_head); 869 870 return 0; 871} 872 873 874 875/* 876 * Dequeue buffers and place them in the shared communication ring. 877 * Return when no more requests can be accepted or all buffers have 878 * been queued. 879 * 880 * Signal XEN once the ring has been filled out. 881 */ 882static void 883xb_startio(struct xb_softc *sc) 884{ 885 struct bio *bp; 886 int queued = 0; 887 struct blkfront_info *info = sc->xb_info; 888 DPRINTK(""); 889 890 mtx_assert(&blkif_io_lock, MA_OWNED); 891 892 while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) { 893 894 if (RING_FULL(&info->ring)) 895 goto wait; 896 897 if (blkif_queue_request(bp)) { 898 wait: 899 bioq_insert_head(&sc->xb_bioq, bp); 900 break; 901 } 902 queued++; 903 } 904 905 if (queued != 0) 906 flush_requests(sc->xb_info); 907} 908 909static void 910blkif_int(void *xsc) 911{ 912 struct xb_softc *sc = NULL; 913 struct bio *bp; 914 blkif_response_t *bret; 915 RING_IDX i, rp; 916 struct blkfront_info *info = xsc; 917 DPRINTK(""); 918 919 TRACE_ENTER; 920 921 mtx_lock(&blkif_io_lock); 922 923 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { 924 mtx_unlock(&blkif_io_lock); 925 return; 926 } 927 928 again: 929 rp = info->ring.sring->rsp_prod; 930 rmb(); /* Ensure we see queued responses up to 'rp'. */ 931 932 for (i = info->ring.rsp_cons; i != rp; i++) { 933 unsigned long id; 934 935 bret = RING_GET_RESPONSE(&info->ring, i); 936 id = bret->id; 937 bp = (struct bio *)info->shadow[id].request; 938 939 blkif_completion(&info->shadow[id]); 940 941 ADD_ID_TO_FREELIST(info, id); 942 943 switch (bret->operation) { 944 case BLKIF_OP_READ: 945 /* had an unaligned buffer that needs to be copied */ 946 if (bp->bio_driver1) 947 bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount); 948 /* FALLTHROUGH */ 949 case BLKIF_OP_WRITE: 950 951 /* free the copy buffer */ 952 if (bp->bio_driver1) { 953 free(bp->bio_driver1, M_DEVBUF); 954 bp->bio_driver1 = NULL; 955 } 956 957 if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) { 958 printf("Bad return from blkdev data request: %x\n", 959 bret->status); 960 bp->bio_flags |= BIO_ERROR; 961 } 962 963 sc = (struct xb_softc *)bp->bio_disk->d_drv1; 964 965 if (bp->bio_flags & BIO_ERROR) 966 bp->bio_error = EIO; 967 else 968 bp->bio_resid = 0; 969 970 biodone(bp); 971 break; 972 default: 973 panic("received invalid operation"); 974 break; 975 } 976 } 977 978 info->ring.rsp_cons = i; 979 980 if (i != info->ring.req_prod_pvt) { 981 int more_to_do; 982 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); 983 if (more_to_do) 984 goto again; 985 } else { 986 info->ring.sring->rsp_event = i + 1; 987 } 988 989 kick_pending_request_queues(info); 990 991 mtx_unlock(&blkif_io_lock); 992} 993 994static void 995blkif_free(struct blkfront_info *info, int suspend) 996{ 997 998/* Prevent new requests being issued until we fix things up. */ 999 mtx_lock(&blkif_io_lock); 1000 info->connected = suspend ? 1001 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 1002 mtx_unlock(&blkif_io_lock); 1003 1004 /* Free resources associated with old device channel. */ 1005 if (info->ring_ref != GRANT_INVALID_REF) { 1006 gnttab_end_foreign_access(info->ring_ref, 1007 info->ring.sring); 1008 info->ring_ref = GRANT_INVALID_REF; 1009 info->ring.sring = NULL; 1010 } 1011 if (info->irq) 1012 unbind_from_irqhandler(info->irq); 1013 info->irq = 0; 1014 1015} 1016 1017static void 1018blkif_completion(struct blk_shadow *s) 1019{ 1020 int i; 1021 1022 for (i = 0; i < s->req.nr_segments; i++) 1023 gnttab_end_foreign_access(s->req.seg[i].gref, 0UL); 1024} 1025 1026static void 1027blkif_recover(struct blkfront_info *info) 1028{ 1029 int i, j; 1030 blkif_request_t *req; 1031 struct blk_shadow *copy; 1032 1033 if (!info->sc) 1034 return; 1035 1036 /* Stage 1: Make a safe copy of the shadow state. */ 1037 copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO); 1038 memcpy(copy, info->shadow, sizeof(info->shadow)); 1039 1040 /* Stage 2: Set up free list. */ 1041 memset(&info->shadow, 0, sizeof(info->shadow)); 1042 for (i = 0; i < BLK_RING_SIZE; i++) 1043 info->shadow[i].req.id = i+1; 1044 info->shadow_free = info->ring.req_prod_pvt; 1045 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 1046 1047 /* Stage 3: Find pending requests and requeue them. */ 1048 for (i = 0; i < BLK_RING_SIZE; i++) { 1049 /* Not in use? */ 1050 if (copy[i].request == 0) 1051 continue; 1052 1053 /* Grab a request slot and copy shadow state into it. */ 1054 req = RING_GET_REQUEST( 1055 &info->ring, info->ring.req_prod_pvt); 1056 *req = copy[i].req; 1057 1058 /* We get a new request id, and must reset the shadow state. */ 1059 req->id = GET_ID_FROM_FREELIST(info); 1060 memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); 1061 1062 /* Rewrite any grant references invalidated by suspend/resume. */ 1063 for (j = 0; j < req->nr_segments; j++) 1064 gnttab_grant_foreign_access_ref( 1065 req->seg[j].gref, 1066 xenbus_get_otherend_id(info->xbdev), 1067 pfn_to_mfn(info->shadow[req->id].frame[j]), 1068 0 /* assume not readonly */); 1069 1070 info->shadow[req->id].req = *req; 1071 1072 info->ring.req_prod_pvt++; 1073 } 1074 1075 free(copy, M_DEVBUF); 1076 1077 xenbus_set_state(info->xbdev, XenbusStateConnected); 1078 1079 /* Now safe for us to use the shared ring */ 1080 mtx_lock(&blkif_io_lock); 1081 info->connected = BLKIF_STATE_CONNECTED; 1082 mtx_unlock(&blkif_io_lock); 1083 1084 /* Send off requeued requests */ 1085 mtx_lock(&blkif_io_lock); 1086 flush_requests(info); 1087 1088 /* Kick any other new requests queued since we resumed */ 1089 kick_pending_request_queues(info); 1090 mtx_unlock(&blkif_io_lock); 1091} 1092 1093/* ** Driver registration ** */ 1094static device_method_t blkfront_methods[] = { 1095 /* Device interface */ 1096 DEVMETHOD(device_probe, blkfront_probe), 1097 DEVMETHOD(device_attach, blkfront_attach), 1098 DEVMETHOD(device_detach, blkfront_detach), 1099 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1100 DEVMETHOD(device_suspend, blkfront_suspend), 1101 DEVMETHOD(device_resume, blkfront_resume), 1102 1103 /* Xenbus interface */ 1104 DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed), 1105 1106 { 0, 0 } 1107}; 1108 1109static driver_t blkfront_driver = { 1110 "xbd", 1111 blkfront_methods, 1112 sizeof(struct blkfront_info), 1113}; 1114devclass_t blkfront_devclass; 1115 1116DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0); 1117 1118MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */ 1119 1120