blkfront.c revision 181914
1/*- 2 * All rights reserved. 3 * 4 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 5 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 6 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 7 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 8 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 9 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 10 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 11 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 12 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 13 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 14 * SUCH DAMAGE. 15 * 16 */ 17 18/* 19 * XenoBSD block device driver 20 */ 21 22#include <sys/cdefs.h> 23__FBSDID("$FreeBSD: head/sys/dev/xen/blkfront/blkfront.c 181914 2008-08-20 09:22:37Z kmacy $"); 24 25#include <sys/param.h> 26#include <sys/systm.h> 27#include <sys/malloc.h> 28#include <sys/kernel.h> 29#include <vm/vm.h> 30#include <vm/pmap.h> 31 32#include <sys/bio.h> 33#include <sys/bus.h> 34#include <sys/conf.h> 35#include <sys/module.h> 36 37#include <machine/bus.h> 38#include <sys/rman.h> 39#include <machine/resource.h> 40#include <machine/intr_machdep.h> 41#include <machine/vmparam.h> 42 43#include <machine/xen/hypervisor.h> 44#include <machine/xen/xen-os.h> 45#include <machine/xen/xen_intr.h> 46#include <machine/xen/xenbus.h> 47#include <machine/xen/evtchn.h> 48#include <xen/interface/grant_table.h> 49 50#include <geom/geom_disk.h> 51#include <machine/xen/xenfunc.h> 52#include <xen/gnttab.h> 53 54#include <dev/xen/blkfront/block.h> 55 56#define ASSERT(S) KASSERT(S, (#S)) 57/* prototypes */ 58struct xb_softc; 59static void xb_startio(struct xb_softc *sc); 60static void connect(struct blkfront_info *); 61static void blkfront_closing(struct xenbus_device *); 62static int blkfront_remove(struct xenbus_device *); 63static int talk_to_backend(struct xenbus_device *, struct blkfront_info *); 64static int setup_blkring(struct xenbus_device *, struct blkfront_info *); 65static void blkif_int(void *); 66#if 0 67static void blkif_restart_queue(void *arg); 68#endif 69static void blkif_recover(struct blkfront_info *); 70static void blkif_completion(struct blk_shadow *); 71static void blkif_free(struct blkfront_info *, int); 72 73#define GRANT_INVALID_REF 0 74#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) 75 76LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head; 77 78/* Control whether runtime update of vbds is enabled. */ 79#define ENABLE_VBD_UPDATE 0 80 81#if ENABLE_VBD_UPDATE 82static void vbd_update(void); 83#endif 84 85 86#define BLKIF_STATE_DISCONNECTED 0 87#define BLKIF_STATE_CONNECTED 1 88#define BLKIF_STATE_SUSPENDED 2 89 90#ifdef notyet 91static char *blkif_state_name[] = { 92 [BLKIF_STATE_DISCONNECTED] = "disconnected", 93 [BLKIF_STATE_CONNECTED] = "connected", 94 [BLKIF_STATE_SUSPENDED] = "closed", 95}; 96 97static char * blkif_status_name[] = { 98 [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", 99 [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", 100 [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", 101 [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", 102}; 103#endif 104#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args) 105#if 0 106#define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, __LINE__,##args) 107#else 108#define DPRINTK(fmt, args...) 109#endif 110 111static grant_ref_t gref_head; 112#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 113 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) 114 115static void kick_pending_request_queues(struct blkfront_info *); 116static int blkif_open(struct disk *dp); 117static int blkif_close(struct disk *dp); 118static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); 119static int blkif_queue_request(struct bio *bp); 120static void xb_strategy(struct bio *bp); 121 122 123 124/* XXX move to xb_vbd.c when VBD update support is added */ 125#define MAX_VBDS 64 126 127#define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ 128#define XBD_SECTOR_SHFT 9 129 130static struct mtx blkif_io_lock; 131 132static vm_paddr_t 133pfn_to_mfn(vm_paddr_t pfn) 134{ 135 return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT); 136} 137 138 139int 140xlvbd_add(blkif_sector_t capacity, int unit, uint16_t vdisk_info, uint16_t sector_size, 141 struct blkfront_info *info) 142{ 143 struct xb_softc *sc; 144 int error = 0; 145 146 sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 147 sc->xb_unit = unit; 148 sc->xb_info = info; 149 info->sc = sc; 150 151 memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); 152 sc->xb_disk = disk_alloc(); 153 sc->xb_disk->d_unit = unit; 154 sc->xb_disk->d_open = blkif_open; 155 sc->xb_disk->d_close = blkif_close; 156 sc->xb_disk->d_ioctl = blkif_ioctl; 157 sc->xb_disk->d_strategy = xb_strategy; 158 sc->xb_disk->d_name = "xbd"; 159 sc->xb_disk->d_drv1 = sc; 160 sc->xb_disk->d_sectorsize = sector_size; 161 162 /* XXX */ 163 sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT; 164#if 0 165 sc->xb_disk->d_maxsize = DFLTPHYS; 166#else /* XXX: xen can't handle large single i/o requests */ 167 sc->xb_disk->d_maxsize = 4096; 168#endif 169#ifdef notyet 170 XENPRINTF("attaching device 0x%x unit %d capacity %llu\n", 171 xb_diskinfo[sc->xb_unit].device, sc->xb_unit, 172 sc->xb_disk->d_mediasize); 173#endif 174 sc->xb_disk->d_flags = 0; 175 disk_create(sc->xb_disk, DISK_VERSION_00); 176 bioq_init(&sc->xb_bioq); 177 178 return error; 179} 180 181void 182xlvbd_del(struct blkfront_info *info) 183{ 184 struct xb_softc *sc; 185 186 sc = info->sc; 187 disk_destroy(sc->xb_disk); 188} 189/************************ end VBD support *****************/ 190 191/* 192 * Read/write routine for a buffer. Finds the proper unit, place it on 193 * the sortq and kick the controller. 194 */ 195static void 196xb_strategy(struct bio *bp) 197{ 198 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 199 200 /* bogus disk? */ 201 if (sc == NULL) { 202 bp->bio_error = EINVAL; 203 bp->bio_flags |= BIO_ERROR; 204 goto bad; 205 } 206 207 DPRINTK(""); 208 209 /* 210 * Place it in the queue of disk activities for this disk 211 */ 212 mtx_lock(&blkif_io_lock); 213 bioq_disksort(&sc->xb_bioq, bp); 214 215 xb_startio(sc); 216 mtx_unlock(&blkif_io_lock); 217 return; 218 219 bad: 220 /* 221 * Correctly set the bio to indicate a failed tranfer. 222 */ 223 bp->bio_resid = bp->bio_bcount; 224 biodone(bp); 225 return; 226} 227 228 229/* Setup supplies the backend dir, virtual device. 230 231We place an event channel and shared frame entries. 232We watch backend to wait if it's ok. */ 233static int blkfront_probe(struct xenbus_device *dev, 234 const struct xenbus_device_id *id) 235{ 236 int err, vdevice, i; 237 struct blkfront_info *info; 238 239 /* FIXME: Use dynamic device id if this is not set. */ 240 err = xenbus_scanf(XBT_NIL, dev->nodename, 241 "virtual-device", "%i", &vdevice); 242 if (err != 1) { 243 xenbus_dev_fatal(dev, err, "reading virtual-device"); 244 printf("couldn't find virtual device"); 245 return (err); 246 } 247 248 info = malloc(sizeof(*info), M_DEVBUF, M_NOWAIT|M_ZERO); 249 if (info == NULL) { 250 xenbus_dev_fatal(dev, ENOMEM, "allocating info structure"); 251 return ENOMEM; 252 } 253 254 /* 255 * XXX debug only 256 */ 257 for (i = 0; i < sizeof(*info); i++) 258 if (((uint8_t *)info)[i] != 0) 259 panic("non-null memory"); 260 261 info->shadow_free = 0; 262 info->xbdev = dev; 263 info->vdevice = vdevice; 264 info->connected = BLKIF_STATE_DISCONNECTED; 265 266 /* work queue needed ? */ 267 for (i = 0; i < BLK_RING_SIZE; i++) 268 info->shadow[i].req.id = i+1; 269 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 270 271 /* Front end dir is a number, which is used as the id. */ 272 info->handle = strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); 273 dev->dev_driver_data = info; 274 275 err = talk_to_backend(dev, info); 276 if (err) { 277 free(info, M_DEVBUF); 278 dev->dev_driver_data = NULL; 279 return err; 280 } 281 282 return 0; 283} 284 285 286static int blkfront_resume(struct xenbus_device *dev) 287{ 288 struct blkfront_info *info = dev->dev_driver_data; 289 int err; 290 291 DPRINTK("blkfront_resume: %s\n", dev->nodename); 292 293 blkif_free(info, 1); 294 295 err = talk_to_backend(dev, info); 296 if (!err) 297 blkif_recover(info); 298 299 return err; 300} 301 302/* Common code used when first setting up, and when resuming. */ 303static int talk_to_backend(struct xenbus_device *dev, 304 struct blkfront_info *info) 305{ 306 const char *message = NULL; 307 struct xenbus_transaction xbt; 308 int err; 309 310 /* Create shared ring, alloc event channel. */ 311 err = setup_blkring(dev, info); 312 if (err) 313 goto out; 314 315 again: 316 err = xenbus_transaction_start(&xbt); 317 if (err) { 318 xenbus_dev_fatal(dev, err, "starting transaction"); 319 goto destroy_blkring; 320 } 321 322 err = xenbus_printf(xbt, dev->nodename, 323 "ring-ref","%u", info->ring_ref); 324 if (err) { 325 message = "writing ring-ref"; 326 goto abort_transaction; 327 } 328 err = xenbus_printf(xbt, dev->nodename, 329 "event-channel", "%u", irq_to_evtchn_port(info->irq)); 330 if (err) { 331 message = "writing event-channel"; 332 goto abort_transaction; 333 } 334 335 err = xenbus_transaction_end(xbt, 0); 336 if (err) { 337 if (err == -EAGAIN) 338 goto again; 339 xenbus_dev_fatal(dev, err, "completing transaction"); 340 goto destroy_blkring; 341 } 342 xenbus_switch_state(dev, XenbusStateInitialised); 343 344 return 0; 345 346 abort_transaction: 347 xenbus_transaction_end(xbt, 1); 348 if (message) 349 xenbus_dev_fatal(dev, err, "%s", message); 350 destroy_blkring: 351 blkif_free(info, 0); 352 out: 353 return err; 354} 355 356static int 357setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) 358{ 359 blkif_sring_t *sring; 360 int err; 361 362 info->ring_ref = GRANT_INVALID_REF; 363 364 sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); 365 if (sring == NULL) { 366 xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring"); 367 return ENOMEM; 368 } 369 SHARED_RING_INIT(sring); 370 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); 371 372 err = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> PAGE_SHIFT)); 373 if (err < 0) { 374 free(sring, M_DEVBUF); 375 info->ring.sring = NULL; 376 goto fail; 377 } 378 info->ring_ref = err; 379 380 err = bind_listening_port_to_irqhandler(dev->otherend_id, 381 "xbd", (driver_intr_t *)blkif_int, info, 382 INTR_TYPE_BIO | INTR_MPSAFE, NULL); 383 if (err <= 0) { 384 xenbus_dev_fatal(dev, err, 385 "bind_evtchn_to_irqhandler failed"); 386 goto fail; 387 } 388 info->irq = err; 389 390 return 0; 391 fail: 392 blkif_free(info, 0); 393 return err; 394} 395 396 397/** 398 * Callback received when the backend's state changes. 399 */ 400static void backend_changed(struct xenbus_device *dev, 401 XenbusState backend_state) 402{ 403 struct blkfront_info *info = dev->dev_driver_data; 404 405 DPRINTK("blkfront:backend_changed.\n"); 406 407 switch (backend_state) { 408 case XenbusStateUnknown: 409 case XenbusStateInitialising: 410 case XenbusStateInitWait: 411 case XenbusStateInitialised: 412 case XenbusStateClosed: 413 break; 414 415 case XenbusStateConnected: 416 connect(info); 417 break; 418 419 case XenbusStateClosing: 420 if (info->users > 0) 421 xenbus_dev_error(dev, -EBUSY, 422 "Device in use; refusing to close"); 423 else 424 blkfront_closing(dev); 425#ifdef notyet 426 bd = bdget(info->dev); 427 if (bd == NULL) 428 xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); 429 430 down(&bd->bd_sem); 431 if (info->users > 0) 432 xenbus_dev_error(dev, -EBUSY, 433 "Device in use; refusing to close"); 434 else 435 blkfront_closing(dev); 436 up(&bd->bd_sem); 437 bdput(bd); 438#endif 439 } 440} 441 442/* 443** Invoked when the backend is finally 'ready' (and has told produced 444** the details about the physical device - #sectors, size, etc). 445*/ 446static void 447connect(struct blkfront_info *info) 448{ 449 unsigned long sectors, sector_size; 450 unsigned int binfo; 451 int err; 452 453 if( (info->connected == BLKIF_STATE_CONNECTED) || 454 (info->connected == BLKIF_STATE_SUSPENDED) ) 455 return; 456 457 DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend); 458 459 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 460 "sectors", "%lu", §ors, 461 "info", "%u", &binfo, 462 "sector-size", "%lu", §or_size, 463 NULL); 464 if (err) { 465 xenbus_dev_fatal(info->xbdev, err, 466 "reading backend fields at %s", 467 info->xbdev->otherend); 468 return; 469 } 470 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 471 "feature-barrier", "%lu", &info->feature_barrier, 472 NULL); 473 if (err) 474 info->feature_barrier = 0; 475 476 xlvbd_add(sectors, info->vdevice, binfo, sector_size, info); 477 478 (void)xenbus_switch_state(info->xbdev, XenbusStateConnected); 479 480 /* Kick pending requests. */ 481 mtx_lock(&blkif_io_lock); 482 info->connected = BLKIF_STATE_CONNECTED; 483 kick_pending_request_queues(info); 484 mtx_unlock(&blkif_io_lock); 485 info->is_ready = 1; 486 487#if 0 488 add_disk(info->gd); 489#endif 490} 491 492/** 493 * Handle the change of state of the backend to Closing. We must delete our 494 * device-layer structures now, to ensure that writes are flushed through to 495 * the backend. Once is this done, we can switch to Closed in 496 * acknowledgement. 497 */ 498static void blkfront_closing(struct xenbus_device *dev) 499{ 500 struct blkfront_info *info = dev->dev_driver_data; 501 502 DPRINTK("blkfront_closing: %s removed\n", dev->nodename); 503 504 if (info->mi) { 505 DPRINTK("Calling xlvbd_del\n"); 506 xlvbd_del(info); 507 info->mi = NULL; 508 } 509 510 xenbus_switch_state(dev, XenbusStateClosed); 511} 512 513 514static int blkfront_remove(struct xenbus_device *dev) 515{ 516 struct blkfront_info *info = dev->dev_driver_data; 517 518 DPRINTK("blkfront_remove: %s removed\n", dev->nodename); 519 520 blkif_free(info, 0); 521 522 free(info, M_DEVBUF); 523 524 return 0; 525} 526 527 528static inline int 529GET_ID_FROM_FREELIST(struct blkfront_info *info) 530{ 531 unsigned long nfree = info->shadow_free; 532 533 KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree)); 534 info->shadow_free = info->shadow[nfree].req.id; 535 info->shadow[nfree].req.id = 0x0fffffee; /* debug */ 536 return nfree; 537} 538 539static inline void 540ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id) 541{ 542 info->shadow[id].req.id = info->shadow_free; 543 info->shadow[id].request = 0; 544 info->shadow_free = id; 545} 546 547static inline void 548flush_requests(struct blkfront_info *info) 549{ 550 int notify; 551 552 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); 553 554 if (notify) 555 notify_remote_via_irq(info->irq); 556} 557 558static void 559kick_pending_request_queues(struct blkfront_info *info) 560{ 561 /* XXX check if we can't simplify */ 562#if 0 563 if (!RING_FULL(&info->ring)) { 564 /* Re-enable calldowns. */ 565 blk_start_queue(info->rq); 566 /* Kick things off immediately. */ 567 do_blkif_request(info->rq); 568 } 569#endif 570 if (!RING_FULL(&info->ring)) { 571#if 0 572 sc = LIST_FIRST(&xbsl_head); 573 LIST_REMOVE(sc, entry); 574 /* Re-enable calldowns. */ 575 blk_start_queue(di->rq); 576#endif 577 /* Kick things off immediately. */ 578 xb_startio(info->sc); 579 } 580} 581 582#if 0 583/* XXX */ 584static void blkif_restart_queue(void *arg) 585{ 586 struct blkfront_info *info = (struct blkfront_info *)arg; 587 588 mtx_lock(&blkif_io_lock); 589 kick_pending_request_queues(info); 590 mtx_unlock(&blkif_io_lock); 591} 592#endif 593 594static void blkif_restart_queue_callback(void *arg) 595{ 596#if 0 597 struct blkfront_info *info = (struct blkfront_info *)arg; 598 /* XXX BSD equiv ? */ 599 600 schedule_work(&info->work); 601#endif 602} 603 604static int 605blkif_open(struct disk *dp) 606{ 607 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 608 609 if (sc == NULL) { 610 printk("xb%d: not found", sc->xb_unit); 611 return (ENXIO); 612 } 613 614 sc->xb_flags |= XB_OPEN; 615 sc->xb_info->users++; 616 return (0); 617} 618 619static int 620blkif_close(struct disk *dp) 621{ 622 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 623 624 if (sc == NULL) 625 return (ENXIO); 626 sc->xb_flags &= ~XB_OPEN; 627 if (--(sc->xb_info->users) == 0) { 628 /* Check whether we have been instructed to close. We will 629 have ignored this request initially, as the device was 630 still mounted. */ 631 struct xenbus_device * dev = sc->xb_info->xbdev; 632 XenbusState state = xenbus_read_driver_state(dev->otherend); 633 634 if (state == XenbusStateClosing) 635 blkfront_closing(dev); 636 } 637 return (0); 638} 639 640static int 641blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) 642{ 643 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 644 645 if (sc == NULL) 646 return (ENXIO); 647 648 return (ENOTTY); 649} 650 651 652/* 653 * blkif_queue_request 654 * 655 * request block io 656 * 657 * id: for guest use only. 658 * operation: BLKIF_OP_{READ,WRITE,PROBE} 659 * buffer: buffer to read/write into. this should be a 660 * virtual address in the guest os. 661 */ 662static int blkif_queue_request(struct bio *bp) 663{ 664 caddr_t alignbuf; 665 vm_paddr_t buffer_ma; 666 blkif_request_t *ring_req; 667 unsigned long id; 668 uint64_t fsect, lsect; 669 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 670 struct blkfront_info *info = sc->xb_info; 671 int ref; 672 673 if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED)) 674 return 1; 675 676 if (gnttab_alloc_grant_references( 677 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 678 gnttab_request_free_callback( 679 &info->callback, 680 blkif_restart_queue_callback, 681 info, 682 BLKIF_MAX_SEGMENTS_PER_REQUEST); 683 return 1; 684 } 685 686 /* Check if the buffer is properly aligned */ 687 if ((vm_offset_t)bp->bio_data & PAGE_MASK) { 688 int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE : 689 PAGE_SIZE; 690 caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF, 691 M_NOWAIT); 692 693 alignbuf = (char *)roundup2((u_long)newbuf, align); 694 695 /* save a copy of the current buffer */ 696 bp->bio_driver1 = newbuf; 697 bp->bio_driver2 = alignbuf; 698 699 /* Copy the data for a write */ 700 if (bp->bio_cmd == BIO_WRITE) 701 bcopy(bp->bio_data, alignbuf, bp->bio_bcount); 702 } else 703 alignbuf = bp->bio_data; 704 705 /* Fill out a communications ring structure. */ 706 ring_req = RING_GET_REQUEST(&info->ring, 707 info->ring.req_prod_pvt); 708 id = GET_ID_FROM_FREELIST(info); 709 info->shadow[id].request = (unsigned long)bp; 710 711 ring_req->id = id; 712 ring_req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : 713 BLKIF_OP_WRITE; 714 715 ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno; 716 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; 717 718 ring_req->nr_segments = 0; /* XXX not doing scatter/gather since buffer 719 * chaining is not supported. 720 */ 721 722 buffer_ma = vtomach(alignbuf); 723 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; 724 lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1; 725 /* install a grant reference. */ 726 ref = gnttab_claim_grant_reference(&gref_head); 727 KASSERT( ref != -ENOSPC, ("grant_reference failed") ); 728 729 gnttab_grant_foreign_access_ref( 730 ref, 731 info->xbdev->otherend_id, 732 buffer_ma >> PAGE_SHIFT, 733 ring_req->operation & 1 ); /* ??? */ 734 info->shadow[id].frame[ring_req->nr_segments] = 735 buffer_ma >> PAGE_SHIFT; 736 737 ring_req->seg[ring_req->nr_segments] = 738 (struct blkif_request_segment) { 739 .gref = ref, 740 .first_sect = fsect, 741 .last_sect = lsect }; 742 743 ring_req->nr_segments++; 744 KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0, 745 ("XEN buffer must be sector aligned")); 746 KASSERT(lsect <= 7, 747 ("XEN disk driver data cannot cross a page boundary")); 748 749 buffer_ma &= ~PAGE_MASK; 750 751 info->ring.req_prod_pvt++; 752 753 /* Keep a private copy so we can reissue requests when recovering. */ 754 info->shadow[id].req = *ring_req; 755 756 gnttab_free_grant_references(gref_head); 757 758 return 0; 759} 760 761 762 763/* 764 * Dequeue buffers and place them in the shared communication ring. 765 * Return when no more requests can be accepted or all buffers have 766 * been queued. 767 * 768 * Signal XEN once the ring has been filled out. 769 */ 770static void 771xb_startio(struct xb_softc *sc) 772{ 773 struct bio *bp; 774 int queued = 0; 775 struct blkfront_info *info = sc->xb_info; 776 DPRINTK(""); 777 778 mtx_assert(&blkif_io_lock, MA_OWNED); 779 780 while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) { 781 782 if (RING_FULL(&info->ring)) 783 goto wait; 784 785 if (blkif_queue_request(bp)) { 786 wait: 787 bioq_insert_head(&sc->xb_bioq, bp); 788 break; 789 } 790 queued++; 791 } 792 793 if (queued != 0) 794 flush_requests(sc->xb_info); 795} 796 797static void 798blkif_int(void *xsc) 799{ 800 struct xb_softc *sc = NULL; 801 struct bio *bp; 802 blkif_response_t *bret; 803 RING_IDX i, rp; 804 struct blkfront_info *info = xsc; 805 DPRINTK(""); 806 807 TRACE_ENTER; 808 809 mtx_lock(&blkif_io_lock); 810 811 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { 812 mtx_unlock(&blkif_io_lock); 813 return; 814 } 815 816 again: 817 rp = info->ring.sring->rsp_prod; 818 rmb(); /* Ensure we see queued responses up to 'rp'. */ 819 820 for (i = info->ring.rsp_cons; i != rp; i++) { 821 unsigned long id; 822 823 bret = RING_GET_RESPONSE(&info->ring, i); 824 id = bret->id; 825 bp = (struct bio *)info->shadow[id].request; 826 827 blkif_completion(&info->shadow[id]); 828 829 ADD_ID_TO_FREELIST(info, id); 830 831 switch (bret->operation) { 832 case BLKIF_OP_READ: 833 /* had an unaligned buffer that needs to be copied */ 834 if (bp->bio_driver1) 835 bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount); 836 /* FALLTHROUGH */ 837 case BLKIF_OP_WRITE: 838 839 /* free the copy buffer */ 840 if (bp->bio_driver1) { 841 free(bp->bio_driver1, M_DEVBUF); 842 bp->bio_driver1 = NULL; 843 } 844 845 if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) { 846 printf("Bad return from blkdev data request: %x\n", 847 bret->status); 848 bp->bio_flags |= BIO_ERROR; 849 } 850 851 sc = (struct xb_softc *)bp->bio_disk->d_drv1; 852 853 if (bp->bio_flags & BIO_ERROR) 854 bp->bio_error = EIO; 855 else 856 bp->bio_resid = 0; 857 858 biodone(bp); 859 break; 860 default: 861 panic("received invalid operation"); 862 break; 863 } 864 } 865 866 info->ring.rsp_cons = i; 867 868 if (i != info->ring.req_prod_pvt) { 869 int more_to_do; 870 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); 871 if (more_to_do) 872 goto again; 873 } else { 874 info->ring.sring->rsp_event = i + 1; 875 } 876 877 kick_pending_request_queues(info); 878 879 mtx_unlock(&blkif_io_lock); 880} 881 882static void 883blkif_free(struct blkfront_info *info, int suspend) 884{ 885 886/* Prevent new requests being issued until we fix things up. */ 887 mtx_lock(&blkif_io_lock); 888 info->connected = suspend ? 889 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 890 mtx_unlock(&blkif_io_lock); 891 892 /* Free resources associated with old device channel. */ 893 if (info->ring_ref != GRANT_INVALID_REF) { 894 gnttab_end_foreign_access(info->ring_ref, 0, 895 info->ring.sring); 896 info->ring_ref = GRANT_INVALID_REF; 897 info->ring.sring = NULL; 898 } 899 if (info->irq) 900 unbind_from_irqhandler(info->irq, info); 901 info->irq = 0; 902 903} 904 905static void 906blkif_completion(struct blk_shadow *s) 907{ 908 int i; 909 910 for (i = 0; i < s->req.nr_segments; i++) 911 gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); 912} 913 914static void 915blkif_recover(struct blkfront_info *info) 916{ 917 int i, j; 918 blkif_request_t *req; 919 struct blk_shadow *copy; 920 921 /* Stage 1: Make a safe copy of the shadow state. */ 922 copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO); 923 PANIC_IF(copy == NULL); 924 memcpy(copy, info->shadow, sizeof(info->shadow)); 925 926 /* Stage 2: Set up free list. */ 927 memset(&info->shadow, 0, sizeof(info->shadow)); 928 for (i = 0; i < BLK_RING_SIZE; i++) 929 info->shadow[i].req.id = i+1; 930 info->shadow_free = info->ring.req_prod_pvt; 931 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 932 933 /* Stage 3: Find pending requests and requeue them. */ 934 for (i = 0; i < BLK_RING_SIZE; i++) { 935 /* Not in use? */ 936 if (copy[i].request == 0) 937 continue; 938 939 /* Grab a request slot and copy shadow state into it. */ 940 req = RING_GET_REQUEST( 941 &info->ring, info->ring.req_prod_pvt); 942 *req = copy[i].req; 943 944 /* We get a new request id, and must reset the shadow state. */ 945 req->id = GET_ID_FROM_FREELIST(info); 946 memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); 947 948 /* Rewrite any grant references invalidated by suspend/resume. */ 949 for (j = 0; j < req->nr_segments; j++) 950 gnttab_grant_foreign_access_ref( 951 req->seg[j].gref, 952 info->xbdev->otherend_id, 953 pfn_to_mfn(info->shadow[req->id].frame[j]), 954 0 /* assume not readonly */); 955 956 info->shadow[req->id].req = *req; 957 958 info->ring.req_prod_pvt++; 959 } 960 961 free(copy, M_DEVBUF); 962 963 xenbus_switch_state(info->xbdev, XenbusStateConnected); 964 965 /* Now safe for us to use the shared ring */ 966 mtx_lock(&blkif_io_lock); 967 info->connected = BLKIF_STATE_CONNECTED; 968 mtx_unlock(&blkif_io_lock); 969 970 /* Send off requeued requests */ 971 mtx_lock(&blkif_io_lock); 972 flush_requests(info); 973 974 /* Kick any other new requests queued since we resumed */ 975 kick_pending_request_queues(info); 976 mtx_unlock(&blkif_io_lock); 977} 978 979static int 980blkfront_is_ready(struct xenbus_device *dev) 981{ 982 struct blkfront_info *info = dev->dev_driver_data; 983 984 return info->is_ready; 985} 986 987static struct xenbus_device_id blkfront_ids[] = { 988 { "vbd" }, 989 { "" } 990}; 991 992 993static struct xenbus_driver blkfront = { 994 .name = "vbd", 995 .ids = blkfront_ids, 996 .probe = blkfront_probe, 997 .remove = blkfront_remove, 998 .resume = blkfront_resume, 999 .otherend_changed = backend_changed, 1000 .is_ready = blkfront_is_ready, 1001}; 1002 1003 1004 1005static void 1006xenbus_init(void) 1007{ 1008 xenbus_register_frontend(&blkfront); 1009} 1010 1011MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */ 1012SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_SECOND, xenbus_init, NULL); 1013 1014 1015/* 1016 * Local variables: 1017 * mode: C 1018 * c-set-style: "BSD" 1019 * c-basic-offset: 8 1020 * tab-width: 4 1021 * indent-tabs-mode: t 1022 * End: 1023 */ 1024