blkfront.c revision 181805
1/*- 2 * All rights reserved. 3 * 4 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 5 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 6 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 7 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 8 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 9 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 10 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 11 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 12 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 13 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 14 * SUCH DAMAGE. 15 * 16 */ 17 18/* 19 * XenoBSD block device driver 20 */ 21 22#include <sys/cdefs.h> 23__FBSDID("$FreeBSD: head/sys/dev/xen/blkfront/blkfront.c 181805 2008-08-17 23:33:33Z kmacy $"); 24 25#include <sys/param.h> 26#include <sys/systm.h> 27#include <sys/malloc.h> 28#include <sys/kernel.h> 29#include <vm/vm.h> 30#include <vm/pmap.h> 31 32#include <sys/bio.h> 33#include <sys/bus.h> 34#include <sys/conf.h> 35#include <sys/module.h> 36 37#include <machine/bus.h> 38#include <sys/rman.h> 39#include <machine/resource.h> 40#include <machine/intr_machdep.h> 41#include <machine/vmparam.h> 42 43#include <machine/xen/hypervisor.h> 44#include <machine/xen/xen-os.h> 45#include <machine/xen/xen_intr.h> 46#include <machine/xen/xenbus.h> 47#include <machine/xen/evtchn.h> 48#include <xen/interface/grant_table.h> 49 50#include <geom/geom_disk.h> 51#include <machine/xen/xenfunc.h> 52#include <xen/gnttab.h> 53 54#include <dev/xen/blkfront/block.h> 55 56#define ASSERT(S) KASSERT(S, (#S)) 57/* prototypes */ 58struct xb_softc; 59static void xb_startio(struct xb_softc *sc); 60static void connect(struct blkfront_info *); 61static void blkfront_closing(struct xenbus_device *); 62static int blkfront_remove(struct xenbus_device *); 63static int talk_to_backend(struct xenbus_device *, struct blkfront_info *); 64static int setup_blkring(struct xenbus_device *, struct blkfront_info *); 65static void blkif_int(void *); 66#if 0 67static void blkif_restart_queue(void *arg); 68#endif 69static void blkif_recover(struct blkfront_info *); 70static void blkif_completion(struct blk_shadow *); 71static void blkif_free(struct blkfront_info *, int); 72 73#define GRANT_INVALID_REF 0 74#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) 75 76LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head; 77 78/* Control whether runtime update of vbds is enabled. */ 79#define ENABLE_VBD_UPDATE 0 80 81#if ENABLE_VBD_UPDATE 82static void vbd_update(void); 83#endif 84 85 86#define BLKIF_STATE_DISCONNECTED 0 87#define BLKIF_STATE_CONNECTED 1 88#define BLKIF_STATE_SUSPENDED 2 89 90#ifdef notyet 91static char *blkif_state_name[] = { 92 [BLKIF_STATE_DISCONNECTED] = "disconnected", 93 [BLKIF_STATE_CONNECTED] = "connected", 94 [BLKIF_STATE_SUSPENDED] = "closed", 95}; 96 97static char * blkif_status_name[] = { 98 [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", 99 [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", 100 [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", 101 [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", 102}; 103#endif 104#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args) 105#if 0 106#define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, __LINE__,##args) 107#else 108#define DPRINTK(fmt, args...) 109#endif 110 111static grant_ref_t gref_head; 112#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 113 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) 114 115static void kick_pending_request_queues(struct blkfront_info *); 116static int blkif_open(struct disk *dp); 117static int blkif_close(struct disk *dp); 118static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); 119static int blkif_queue_request(struct bio *bp); 120static void xb_strategy(struct bio *bp); 121 122 123 124/* XXX move to xb_vbd.c when VBD update support is added */ 125#define MAX_VBDS 64 126 127#define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ 128#define XBD_SECTOR_SHFT 9 129 130static struct mtx blkif_io_lock; 131 132static vm_paddr_t 133pfn_to_mfn(vm_paddr_t pfn) 134{ 135 return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT); 136} 137 138 139int 140xlvbd_add(blkif_sector_t capacity, int unit, uint16_t vdisk_info, uint16_t sector_size, 141 struct blkfront_info *info) 142{ 143 struct xb_softc *sc; 144 int error = 0; 145 146 sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 147 sc->xb_unit = unit; 148 sc->xb_info = info; 149 info->sc = sc; 150 151 memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); 152 sc->xb_disk = disk_alloc(); 153 sc->xb_disk->d_unit = unit; 154 sc->xb_disk->d_open = blkif_open; 155 sc->xb_disk->d_close = blkif_close; 156 sc->xb_disk->d_ioctl = blkif_ioctl; 157 sc->xb_disk->d_strategy = xb_strategy; 158 sc->xb_disk->d_name = "xbd"; 159 sc->xb_disk->d_drv1 = sc; 160 sc->xb_disk->d_sectorsize = sector_size; 161 162 /* XXX */ 163 sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT; 164#if 0 165 sc->xb_disk->d_maxsize = DFLTPHYS; 166#else /* XXX: xen can't handle large single i/o requests */ 167 sc->xb_disk->d_maxsize = 4096; 168#endif 169#ifdef notyet 170 XENPRINTF("attaching device 0x%x unit %d capacity %llu\n", 171 xb_diskinfo[sc->xb_unit].device, sc->xb_unit, 172 sc->xb_disk->d_mediasize); 173#endif 174 sc->xb_disk->d_flags = 0; 175 disk_create(sc->xb_disk, DISK_VERSION_00); 176 bioq_init(&sc->xb_bioq); 177 178 return error; 179} 180 181void 182xlvbd_del(struct blkfront_info *info) 183{ 184 struct xb_softc *sc; 185 186 sc = info->sc; 187 disk_destroy(sc->xb_disk); 188} 189/************************ end VBD support *****************/ 190 191/* 192 * Read/write routine for a buffer. Finds the proper unit, place it on 193 * the sortq and kick the controller. 194 */ 195static void 196xb_strategy(struct bio *bp) 197{ 198 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 199 200 /* bogus disk? */ 201 if (sc == NULL) { 202 bp->bio_error = EINVAL; 203 bp->bio_flags |= BIO_ERROR; 204 goto bad; 205 } 206 207 DPRINTK(""); 208 209 /* 210 * Place it in the queue of disk activities for this disk 211 */ 212 mtx_lock(&blkif_io_lock); 213 bioq_disksort(&sc->xb_bioq, bp); 214 215 xb_startio(sc); 216 mtx_unlock(&blkif_io_lock); 217 return; 218 219 bad: 220 /* 221 * Correctly set the bio to indicate a failed tranfer. 222 */ 223 bp->bio_resid = bp->bio_bcount; 224 biodone(bp); 225 return; 226} 227 228 229/* Setup supplies the backend dir, virtual device. 230 231We place an event channel and shared frame entries. 232We watch backend to wait if it's ok. */ 233static int blkfront_probe(struct xenbus_device *dev, 234 const struct xenbus_device_id *id) 235{ 236 int err, vdevice, i; 237 struct blkfront_info *info; 238 239 /* FIXME: Use dynamic device id if this is not set. */ 240 err = xenbus_scanf(XBT_NIL, dev->nodename, 241 "virtual-device", "%i", &vdevice); 242 if (err != 1) { 243 xenbus_dev_fatal(dev, err, "reading virtual-device"); 244 printf("couldn't find virtual device"); 245 return (err); 246 } 247 248 info = malloc(sizeof(*info), M_DEVBUF, M_NOWAIT|M_ZERO); 249 if (info == NULL) { 250 xenbus_dev_fatal(dev, ENOMEM, "allocating info structure"); 251 return ENOMEM; 252 } 253 254 /* 255 * XXX debug only 256 */ 257 for (i = 0; i < sizeof(*info); i++) 258 if (((uint8_t *)info)[i] != 0) 259 panic("non-null memory"); 260 261 info->shadow_free = 0; 262 info->xbdev = dev; 263 info->vdevice = vdevice; 264 info->connected = BLKIF_STATE_DISCONNECTED; 265 266 /* work queue needed ? */ 267 for (i = 0; i < BLK_RING_SIZE; i++) 268 info->shadow[i].req.id = i+1; 269 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 270 271 /* Front end dir is a number, which is used as the id. */ 272 info->handle = strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); 273 dev->dev_driver_data = info; 274 275 err = talk_to_backend(dev, info); 276 if (err) { 277 free(info, M_DEVBUF); 278 dev->dev_driver_data = NULL; 279 return err; 280 } 281 282 return 0; 283} 284 285 286static int blkfront_resume(struct xenbus_device *dev) 287{ 288 struct blkfront_info *info = dev->dev_driver_data; 289 int err; 290 291 DPRINTK("blkfront_resume: %s\n", dev->nodename); 292 293 blkif_free(info, 1); 294 295 err = talk_to_backend(dev, info); 296 if (!err) 297 blkif_recover(info); 298 299 return err; 300} 301 302/* Common code used when first setting up, and when resuming. */ 303static int talk_to_backend(struct xenbus_device *dev, 304 struct blkfront_info *info) 305{ 306 const char *message = NULL; 307 struct xenbus_transaction xbt; 308 int err; 309 310 /* Create shared ring, alloc event channel. */ 311 err = setup_blkring(dev, info); 312 if (err) 313 goto out; 314 315 again: 316 err = xenbus_transaction_start(&xbt); 317 if (err) { 318 xenbus_dev_fatal(dev, err, "starting transaction"); 319 goto destroy_blkring; 320 } 321 322 err = xenbus_printf(xbt, dev->nodename, 323 "ring-ref","%u", info->ring_ref); 324 if (err) { 325 message = "writing ring-ref"; 326 goto abort_transaction; 327 } 328 err = xenbus_printf(xbt, dev->nodename, 329 "event-channel", "%u", irq_to_evtchn_port(info->irq)); 330 if (err) { 331 message = "writing event-channel"; 332 goto abort_transaction; 333 } 334 335 err = xenbus_transaction_end(xbt, 0); 336 if (err) { 337 if (err == -EAGAIN) 338 goto again; 339 xenbus_dev_fatal(dev, err, "completing transaction"); 340 goto destroy_blkring; 341 } 342 xenbus_switch_state(dev, XenbusStateInitialised); 343 344 return 0; 345 346 abort_transaction: 347 xenbus_transaction_end(xbt, 1); 348 if (message) 349 xenbus_dev_fatal(dev, err, "%s", message); 350 destroy_blkring: 351 blkif_free(info, 0); 352 out: 353 return err; 354} 355 356static int 357setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) 358{ 359 blkif_sring_t *sring; 360 int err; 361 362 info->ring_ref = GRANT_INVALID_REF; 363 364 sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); 365 if (sring == NULL) { 366 xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring"); 367 return ENOMEM; 368 } 369 SHARED_RING_INIT(sring); 370 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); 371 372 err = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> PAGE_SHIFT)); 373 if (err < 0) { 374 free(sring, M_DEVBUF); 375 info->ring.sring = NULL; 376 goto fail; 377 } 378 info->ring_ref = err; 379 380 err = bind_listening_port_to_irqhandler(dev->otherend_id, 381 "xbd", (driver_intr_t *)blkif_int, info, 382 INTR_TYPE_BIO | INTR_MPSAFE, NULL); 383 if (err <= 0) { 384 xenbus_dev_fatal(dev, err, 385 "bind_evtchn_to_irqhandler failed"); 386 goto fail; 387 } 388 info->irq = err; 389 390 return 0; 391 fail: 392 blkif_free(info, 0); 393 return err; 394} 395 396 397/** 398 * Callback received when the backend's state changes. 399 */ 400static void backend_changed(struct xenbus_device *dev, 401 XenbusState backend_state) 402{ 403 struct blkfront_info *info = dev->dev_driver_data; 404 405 DPRINTK("blkfront:backend_changed.\n"); 406 407 switch (backend_state) { 408 case XenbusStateUnknown: 409 case XenbusStateInitialising: 410 case XenbusStateInitWait: 411 case XenbusStateInitialised: 412 case XenbusStateClosed: 413 break; 414 415 case XenbusStateConnected: 416 connect(info); 417 break; 418 419 case XenbusStateClosing: 420 if (info->users > 0) 421 xenbus_dev_error(dev, -EBUSY, 422 "Device in use; refusing to close"); 423 else 424 blkfront_closing(dev); 425#ifdef notyet 426 bd = bdget(info->dev); 427 if (bd == NULL) 428 xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); 429 430 down(&bd->bd_sem); 431 if (info->users > 0) 432 xenbus_dev_error(dev, -EBUSY, 433 "Device in use; refusing to close"); 434 else 435 blkfront_closing(dev); 436 up(&bd->bd_sem); 437 bdput(bd); 438#endif 439 } 440} 441 442/* 443** Invoked when the backend is finally 'ready' (and has told produced 444** the details about the physical device - #sectors, size, etc). 445*/ 446static void 447connect(struct blkfront_info *info) 448{ 449 unsigned long sectors, sector_size; 450 unsigned int binfo; 451 int err; 452 453 if( (info->connected == BLKIF_STATE_CONNECTED) || 454 (info->connected == BLKIF_STATE_SUSPENDED) ) 455 return; 456 457 DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend); 458 459 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 460 "sectors", "%lu", §ors, 461 "info", "%u", &binfo, 462 "sector-size", "%lu", §or_size, 463 NULL); 464 if (err) { 465 xenbus_dev_fatal(info->xbdev, err, 466 "reading backend fields at %s", 467 info->xbdev->otherend); 468 return; 469 } 470 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 471 "feature-barrier", "%lu", &info->feature_barrier, 472 NULL); 473 if (err) 474 info->feature_barrier = 0; 475 476 xlvbd_add(sectors, info->vdevice, binfo, sector_size, info); 477 478 (void)xenbus_switch_state(info->xbdev, XenbusStateConnected); 479 480 /* Kick pending requests. */ 481 mtx_lock(&blkif_io_lock); 482 info->connected = BLKIF_STATE_CONNECTED; 483 kick_pending_request_queues(info); 484 mtx_unlock(&blkif_io_lock); 485 486#if 0 487 add_disk(info->gd); 488#endif 489} 490 491/** 492 * Handle the change of state of the backend to Closing. We must delete our 493 * device-layer structures now, to ensure that writes are flushed through to 494 * the backend. Once is this done, we can switch to Closed in 495 * acknowledgement. 496 */ 497static void blkfront_closing(struct xenbus_device *dev) 498{ 499 struct blkfront_info *info = dev->dev_driver_data; 500 501 DPRINTK("blkfront_closing: %s removed\n", dev->nodename); 502 503 if (info->mi) { 504 DPRINTK("Calling xlvbd_del\n"); 505 xlvbd_del(info); 506 info->mi = NULL; 507 } 508 509 xenbus_switch_state(dev, XenbusStateClosed); 510} 511 512 513static int blkfront_remove(struct xenbus_device *dev) 514{ 515 struct blkfront_info *info = dev->dev_driver_data; 516 517 DPRINTK("blkfront_remove: %s removed\n", dev->nodename); 518 519 blkif_free(info, 0); 520 521 free(info, M_DEVBUF); 522 523 return 0; 524} 525 526 527static inline int 528GET_ID_FROM_FREELIST(struct blkfront_info *info) 529{ 530 unsigned long nfree = info->shadow_free; 531 532 KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree)); 533 info->shadow_free = info->shadow[nfree].req.id; 534 info->shadow[nfree].req.id = 0x0fffffee; /* debug */ 535 return nfree; 536} 537 538static inline void 539ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id) 540{ 541 info->shadow[id].req.id = info->shadow_free; 542 info->shadow[id].request = 0; 543 info->shadow_free = id; 544} 545 546static inline void 547flush_requests(struct blkfront_info *info) 548{ 549 int notify; 550 551 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); 552 553 if (notify) 554 notify_remote_via_irq(info->irq); 555} 556 557static void 558kick_pending_request_queues(struct blkfront_info *info) 559{ 560 /* XXX check if we can't simplify */ 561#if 0 562 if (!RING_FULL(&info->ring)) { 563 /* Re-enable calldowns. */ 564 blk_start_queue(info->rq); 565 /* Kick things off immediately. */ 566 do_blkif_request(info->rq); 567 } 568#endif 569 if (!RING_FULL(&info->ring)) { 570#if 0 571 sc = LIST_FIRST(&xbsl_head); 572 LIST_REMOVE(sc, entry); 573 /* Re-enable calldowns. */ 574 blk_start_queue(di->rq); 575#endif 576 /* Kick things off immediately. */ 577 xb_startio(info->sc); 578 } 579} 580 581#if 0 582/* XXX */ 583static void blkif_restart_queue(void *arg) 584{ 585 struct blkfront_info *info = (struct blkfront_info *)arg; 586 587 mtx_lock(&blkif_io_lock); 588 kick_pending_request_queues(info); 589 mtx_unlock(&blkif_io_lock); 590} 591#endif 592 593static void blkif_restart_queue_callback(void *arg) 594{ 595#if 0 596 struct blkfront_info *info = (struct blkfront_info *)arg; 597 /* XXX BSD equiv ? */ 598 599 schedule_work(&info->work); 600#endif 601} 602 603static int 604blkif_open(struct disk *dp) 605{ 606 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 607 608 if (sc == NULL) { 609 printk("xb%d: not found", sc->xb_unit); 610 return (ENXIO); 611 } 612 613 sc->xb_flags |= XB_OPEN; 614 sc->xb_info->users++; 615 return (0); 616} 617 618static int 619blkif_close(struct disk *dp) 620{ 621 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 622 623 if (sc == NULL) 624 return (ENXIO); 625 sc->xb_flags &= ~XB_OPEN; 626 if (--(sc->xb_info->users) == 0) { 627 /* Check whether we have been instructed to close. We will 628 have ignored this request initially, as the device was 629 still mounted. */ 630 struct xenbus_device * dev = sc->xb_info->xbdev; 631 XenbusState state = xenbus_read_driver_state(dev->otherend); 632 633 if (state == XenbusStateClosing) 634 blkfront_closing(dev); 635 } 636 return (0); 637} 638 639static int 640blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) 641{ 642 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 643 644 if (sc == NULL) 645 return (ENXIO); 646 647 return (ENOTTY); 648} 649 650 651/* 652 * blkif_queue_request 653 * 654 * request block io 655 * 656 * id: for guest use only. 657 * operation: BLKIF_OP_{READ,WRITE,PROBE} 658 * buffer: buffer to read/write into. this should be a 659 * virtual address in the guest os. 660 */ 661static int blkif_queue_request(struct bio *bp) 662{ 663 caddr_t alignbuf; 664 vm_paddr_t buffer_ma; 665 blkif_request_t *ring_req; 666 unsigned long id; 667 uint64_t fsect, lsect; 668 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 669 struct blkfront_info *info = sc->xb_info; 670 int ref; 671 672 if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED)) 673 return 1; 674 675 if (gnttab_alloc_grant_references( 676 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 677 gnttab_request_free_callback( 678 &info->callback, 679 blkif_restart_queue_callback, 680 info, 681 BLKIF_MAX_SEGMENTS_PER_REQUEST); 682 return 1; 683 } 684 685 /* Check if the buffer is properly aligned */ 686 if ((vm_offset_t)bp->bio_data & PAGE_MASK) { 687 int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE : 688 PAGE_SIZE; 689 caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF, 690 M_NOWAIT); 691 692 alignbuf = (char *)roundup2((u_long)newbuf, align); 693 694 /* save a copy of the current buffer */ 695 bp->bio_driver1 = newbuf; 696 bp->bio_driver2 = alignbuf; 697 698 /* Copy the data for a write */ 699 if (bp->bio_cmd == BIO_WRITE) 700 bcopy(bp->bio_data, alignbuf, bp->bio_bcount); 701 } else 702 alignbuf = bp->bio_data; 703 704 /* Fill out a communications ring structure. */ 705 ring_req = RING_GET_REQUEST(&info->ring, 706 info->ring.req_prod_pvt); 707 id = GET_ID_FROM_FREELIST(info); 708 info->shadow[id].request = (unsigned long)bp; 709 710 ring_req->id = id; 711 ring_req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : 712 BLKIF_OP_WRITE; 713 714 ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno; 715 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; 716 717 ring_req->nr_segments = 0; /* XXX not doing scatter/gather since buffer 718 * chaining is not supported. 719 */ 720 721 buffer_ma = vtomach(alignbuf); 722 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; 723 lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1; 724 /* install a grant reference. */ 725 ref = gnttab_claim_grant_reference(&gref_head); 726 KASSERT( ref != -ENOSPC, ("grant_reference failed") ); 727 728 gnttab_grant_foreign_access_ref( 729 ref, 730 info->xbdev->otherend_id, 731 buffer_ma >> PAGE_SHIFT, 732 ring_req->operation & 1 ); /* ??? */ 733 info->shadow[id].frame[ring_req->nr_segments] = 734 buffer_ma >> PAGE_SHIFT; 735 736 ring_req->seg[ring_req->nr_segments] = 737 (struct blkif_request_segment) { 738 .gref = ref, 739 .first_sect = fsect, 740 .last_sect = lsect }; 741 742 ring_req->nr_segments++; 743 KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0, 744 ("XEN buffer must be sector aligned")); 745 KASSERT(lsect <= 7, 746 ("XEN disk driver data cannot cross a page boundary")); 747 748 buffer_ma &= ~PAGE_MASK; 749 750 info->ring.req_prod_pvt++; 751 752 /* Keep a private copy so we can reissue requests when recovering. */ 753 info->shadow[id].req = *ring_req; 754 755 gnttab_free_grant_references(gref_head); 756 757 return 0; 758} 759 760 761 762/* 763 * Dequeue buffers and place them in the shared communication ring. 764 * Return when no more requests can be accepted or all buffers have 765 * been queued. 766 * 767 * Signal XEN once the ring has been filled out. 768 */ 769static void 770xb_startio(struct xb_softc *sc) 771{ 772 struct bio *bp; 773 int queued = 0; 774 struct blkfront_info *info = sc->xb_info; 775 DPRINTK(""); 776 777 mtx_assert(&blkif_io_lock, MA_OWNED); 778 779 while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) { 780 781 if (RING_FULL(&info->ring)) 782 goto wait; 783 784 if (blkif_queue_request(bp)) { 785 wait: 786 bioq_insert_head(&sc->xb_bioq, bp); 787 break; 788 } 789 queued++; 790 } 791 792 if (queued != 0) 793 flush_requests(sc->xb_info); 794} 795 796static void 797blkif_int(void *xsc) 798{ 799 struct xb_softc *sc = NULL; 800 struct bio *bp; 801 blkif_response_t *bret; 802 RING_IDX i, rp; 803 struct blkfront_info *info = xsc; 804 DPRINTK(""); 805 806 TRACE_ENTER; 807 808 mtx_lock(&blkif_io_lock); 809 810 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { 811 mtx_unlock(&blkif_io_lock); 812 return; 813 } 814 815 again: 816 rp = info->ring.sring->rsp_prod; 817 rmb(); /* Ensure we see queued responses up to 'rp'. */ 818 819 for (i = info->ring.rsp_cons; i != rp; i++) { 820 unsigned long id; 821 822 bret = RING_GET_RESPONSE(&info->ring, i); 823 id = bret->id; 824 bp = (struct bio *)info->shadow[id].request; 825 826 blkif_completion(&info->shadow[id]); 827 828 ADD_ID_TO_FREELIST(info, id); 829 830 switch (bret->operation) { 831 case BLKIF_OP_READ: 832 /* had an unaligned buffer that needs to be copied */ 833 if (bp->bio_driver1) 834 bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount); 835 /* FALLTHROUGH */ 836 case BLKIF_OP_WRITE: 837 838 /* free the copy buffer */ 839 if (bp->bio_driver1) { 840 free(bp->bio_driver1, M_DEVBUF); 841 bp->bio_driver1 = NULL; 842 } 843 844 if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) { 845 printf("Bad return from blkdev data request: %x\n", 846 bret->status); 847 bp->bio_flags |= BIO_ERROR; 848 } 849 850 sc = (struct xb_softc *)bp->bio_disk->d_drv1; 851 852 if (bp->bio_flags & BIO_ERROR) 853 bp->bio_error = EIO; 854 else 855 bp->bio_resid = 0; 856 857 biodone(bp); 858 break; 859 default: 860 panic("received invalid operation"); 861 break; 862 } 863 } 864 865 info->ring.rsp_cons = i; 866 867 if (i != info->ring.req_prod_pvt) { 868 int more_to_do; 869 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); 870 if (more_to_do) 871 goto again; 872 } else { 873 info->ring.sring->rsp_event = i + 1; 874 } 875 876 kick_pending_request_queues(info); 877 878 mtx_unlock(&blkif_io_lock); 879} 880 881static void 882blkif_free(struct blkfront_info *info, int suspend) 883{ 884 885/* Prevent new requests being issued until we fix things up. */ 886 mtx_lock(&blkif_io_lock); 887 info->connected = suspend ? 888 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 889 mtx_unlock(&blkif_io_lock); 890 891 /* Free resources associated with old device channel. */ 892 if (info->ring_ref != GRANT_INVALID_REF) { 893 gnttab_end_foreign_access(info->ring_ref, 0, 894 info->ring.sring); 895 info->ring_ref = GRANT_INVALID_REF; 896 info->ring.sring = NULL; 897 } 898 if (info->irq) 899 unbind_from_irqhandler(info->irq, info); 900 info->irq = 0; 901 902} 903 904static void 905blkif_completion(struct blk_shadow *s) 906{ 907 int i; 908 909 for (i = 0; i < s->req.nr_segments; i++) 910 gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); 911} 912 913static void 914blkif_recover(struct blkfront_info *info) 915{ 916 int i, j; 917 blkif_request_t *req; 918 struct blk_shadow *copy; 919 920 /* Stage 1: Make a safe copy of the shadow state. */ 921 copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO); 922 PANIC_IF(copy == NULL); 923 memcpy(copy, info->shadow, sizeof(info->shadow)); 924 925 /* Stage 2: Set up free list. */ 926 memset(&info->shadow, 0, sizeof(info->shadow)); 927 for (i = 0; i < BLK_RING_SIZE; i++) 928 info->shadow[i].req.id = i+1; 929 info->shadow_free = info->ring.req_prod_pvt; 930 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 931 932 /* Stage 3: Find pending requests and requeue them. */ 933 for (i = 0; i < BLK_RING_SIZE; i++) { 934 /* Not in use? */ 935 if (copy[i].request == 0) 936 continue; 937 938 /* Grab a request slot and copy shadow state into it. */ 939 req = RING_GET_REQUEST( 940 &info->ring, info->ring.req_prod_pvt); 941 *req = copy[i].req; 942 943 /* We get a new request id, and must reset the shadow state. */ 944 req->id = GET_ID_FROM_FREELIST(info); 945 memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); 946 947 /* Rewrite any grant references invalidated by suspend/resume. */ 948 for (j = 0; j < req->nr_segments; j++) 949 gnttab_grant_foreign_access_ref( 950 req->seg[j].gref, 951 info->xbdev->otherend_id, 952 pfn_to_mfn(info->shadow[req->id].frame[j]), 953 0 /* assume not readonly */); 954 955 info->shadow[req->id].req = *req; 956 957 info->ring.req_prod_pvt++; 958 } 959 960 free(copy, M_DEVBUF); 961 962 xenbus_switch_state(info->xbdev, XenbusStateConnected); 963 964 /* Now safe for us to use the shared ring */ 965 mtx_lock(&blkif_io_lock); 966 info->connected = BLKIF_STATE_CONNECTED; 967 mtx_unlock(&blkif_io_lock); 968 969 /* Send off requeued requests */ 970 mtx_lock(&blkif_io_lock); 971 flush_requests(info); 972 973 /* Kick any other new requests queued since we resumed */ 974 kick_pending_request_queues(info); 975 mtx_unlock(&blkif_io_lock); 976} 977 978static int 979blkfront_is_ready(struct xenbus_device *dev) 980{ 981 struct blkfront_info *info = dev->dev_driver_data; 982 983 return info->is_ready; 984} 985 986static struct xenbus_device_id blkfront_ids[] = { 987 { "vbd" }, 988 { "" } 989}; 990 991 992static struct xenbus_driver blkfront = { 993 .name = "vbd", 994 .ids = blkfront_ids, 995 .probe = blkfront_probe, 996 .remove = blkfront_remove, 997 .resume = blkfront_resume, 998 .otherend_changed = backend_changed, 999 .is_ready = blkfront_is_ready, 1000}; 1001 1002 1003 1004static void 1005xenbus_init(void) 1006{ 1007 xenbus_register_frontend(&blkfront); 1008} 1009 1010MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */ 1011SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_SECOND, xenbus_init, NULL); 1012 1013 1014/* 1015 * Local variables: 1016 * mode: C 1017 * c-set-style: "BSD" 1018 * c-basic-offset: 8 1019 * tab-width: 4 1020 * indent-tabs-mode: t 1021 * End: 1022 */ 1023