blkfront.c revision 186557
11556Srgrimes/*- 250471Speter * All rights reserved. 31556Srgrimes * 41556Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 51556Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 6119837Sru * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 71556Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 81556Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 9 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 10 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 11 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 12 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 13 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 14 * SUCH DAMAGE. 15 * 16 */ 17 18/* 19 * XenoBSD block device driver 20 */ 21 22#include <sys/cdefs.h> 23__FBSDID("$FreeBSD: head/sys/dev/xen/blkfront/blkfront.c 186557 2008-12-29 06:31:03Z kmacy $"); 24 25#include <sys/param.h> 26#include <sys/systm.h> 27#include <sys/malloc.h> 28#include <sys/kernel.h> 29#include <vm/vm.h> 30#include <vm/pmap.h> 31 32#include <sys/bio.h> 33#include <sys/bus.h> 34#include <sys/conf.h> 35#include <sys/module.h> 36 37#include <machine/bus.h> 38#include <sys/rman.h> 39#include <machine/resource.h> 40#include <machine/intr_machdep.h> 41#include <machine/vmparam.h> 42 43#include <xen/hypervisor.h> 44#include <machine/xen/xen-os.h> 45#include <xen/xen_intr.h> 46#include <xen/evtchn.h> 47#include <xen/interface/grant_table.h> 48#include <xen/interface/io/protocols.h> 49#include <xen/xenbus/xenbusvar.h> 50 51#include <geom/geom_disk.h> 52#include <machine/xen/xenfunc.h> 53#include <xen/gnttab.h> 54 55#include <dev/xen/blkfront/block.h> 56 57#include "xenbus_if.h" 58 59#define ASSERT(S) KASSERT(S, (#S)) 60/* prototypes */ 61struct xb_softc; 62static void xb_startio(struct xb_softc *sc); 63static void connect(device_t, struct blkfront_info *); 64static void blkfront_closing(device_t); 65static int blkfront_detach(device_t); 66static int talk_to_backend(device_t, struct blkfront_info *); 67static int setup_blkring(device_t, struct blkfront_info *); 68static void blkif_int(void *); 69#if 0 70static void blkif_restart_queue(void *arg); 71#endif 72static void blkif_recover(struct blkfront_info *); 73static void blkif_completion(struct blk_shadow *); 74static void blkif_free(struct blkfront_info *, int); 75 76#define GRANT_INVALID_REF 0 77#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) 78 79LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head; 80 81/* Control whether runtime update of vbds is enabled. */ 82#define ENABLE_VBD_UPDATE 0 83 84#if ENABLE_VBD_UPDATE 85static void vbd_update(void); 86#endif 87 88 89#define BLKIF_STATE_DISCONNECTED 0 90#define BLKIF_STATE_CONNECTED 1 91#define BLKIF_STATE_SUSPENDED 2 92 93#ifdef notyet 94static char *blkif_state_name[] = { 95 [BLKIF_STATE_DISCONNECTED] = "disconnected", 96 [BLKIF_STATE_CONNECTED] = "connected", 97 [BLKIF_STATE_SUSPENDED] = "closed", 98}; 99 100static char * blkif_status_name[] = { 101 [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", 102 [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", 103 [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", 104 [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", 105}; 106#endif 107#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args) 108#if 0 109#define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, __LINE__,##args) 110#else 111#define DPRINTK(fmt, args...) 112#endif 113 114static grant_ref_t gref_head; 115#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 116 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) 117 118static void kick_pending_request_queues(struct blkfront_info *); 119static int blkif_open(struct disk *dp); 120static int blkif_close(struct disk *dp); 121static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); 122static int blkif_queue_request(struct bio *bp); 123static void xb_strategy(struct bio *bp); 124 125 126 127/* XXX move to xb_vbd.c when VBD update support is added */ 128#define MAX_VBDS 64 129 130#define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ 131#define XBD_SECTOR_SHFT 9 132 133static struct mtx blkif_io_lock; 134 135static vm_paddr_t 136pfn_to_mfn(vm_paddr_t pfn) 137{ 138 return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT); 139} 140 141 142/* 143 * Translate Linux major/minor to an appropriate name and unit 144 * number. For HVM guests, this allows us to use the same drive names 145 * with blkfront as the emulated drives, easing transition slightly. 146 */ 147static void 148blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name) 149{ 150 static struct vdev_info { 151 int major; 152 int shift; 153 int base; 154 const char *name; 155 } info[] = { 156 {3, 6, 0, "ad"}, /* ide0 */ 157 {22, 6, 2, "ad"}, /* ide1 */ 158 {33, 6, 4, "ad"}, /* ide2 */ 159 {34, 6, 6, "ad"}, /* ide3 */ 160 {56, 6, 8, "ad"}, /* ide4 */ 161 {57, 6, 10, "ad"}, /* ide5 */ 162 {88, 6, 12, "ad"}, /* ide6 */ 163 {89, 6, 14, "ad"}, /* ide7 */ 164 {90, 6, 16, "ad"}, /* ide8 */ 165 {91, 6, 18, "ad"}, /* ide9 */ 166 167 {8, 4, 0, "da"}, /* scsi disk0 */ 168 {65, 4, 16, "da"}, /* scsi disk1 */ 169 {66, 4, 32, "da"}, /* scsi disk2 */ 170 {67, 4, 48, "da"}, /* scsi disk3 */ 171 {68, 4, 64, "da"}, /* scsi disk4 */ 172 {69, 4, 80, "da"}, /* scsi disk5 */ 173 {70, 4, 96, "da"}, /* scsi disk6 */ 174 {71, 4, 112, "da"}, /* scsi disk7 */ 175 {128, 4, 128, "da"}, /* scsi disk8 */ 176 {129, 4, 144, "da"}, /* scsi disk9 */ 177 {130, 4, 160, "da"}, /* scsi disk10 */ 178 {131, 4, 176, "da"}, /* scsi disk11 */ 179 {132, 4, 192, "da"}, /* scsi disk12 */ 180 {133, 4, 208, "da"}, /* scsi disk13 */ 181 {134, 4, 224, "da"}, /* scsi disk14 */ 182 {135, 4, 240, "da"}, /* scsi disk15 */ 183 184 {202, 4, 0, "xbd"}, /* xbd */ 185 186 {0, 0, 0, NULL}, 187 }; 188 int major = vdevice >> 8; 189 int minor = vdevice & 0xff; 190 int i; 191 192 if (vdevice & (1 << 28)) { 193 *unit = (vdevice & ((1 << 28) - 1)) >> 8; 194 *name = "xbd"; 195 } 196 197 for (i = 0; info[i].major; i++) { 198 if (info[i].major == major) { 199 *unit = info[i].base + (minor >> info[i].shift); 200 *name = info[i].name; 201 return; 202 } 203 } 204 205 *unit = minor >> 4; 206 *name = "xbd"; 207} 208 209int 210xlvbd_add(device_t dev, blkif_sector_t capacity, 211 int vdevice, uint16_t vdisk_info, uint16_t sector_size, 212 struct blkfront_info *info) 213{ 214 struct xb_softc *sc; 215 int unit, error = 0; 216 const char *name; 217 218 blkfront_vdevice_to_unit(vdevice, &unit, &name); 219 220 sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 221 sc->xb_unit = unit; 222 sc->xb_info = info; 223 info->sc = sc; 224 225 if (strcmp(name, "xbd")) 226 device_printf(dev, "attaching as %s%d\n", name, unit); 227 228 memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); 229 sc->xb_disk = disk_alloc(); 230 sc->xb_disk->d_unit = sc->xb_unit; 231 sc->xb_disk->d_open = blkif_open; 232 sc->xb_disk->d_close = blkif_close; 233 sc->xb_disk->d_ioctl = blkif_ioctl; 234 sc->xb_disk->d_strategy = xb_strategy; 235 sc->xb_disk->d_name = name; 236 sc->xb_disk->d_drv1 = sc; 237 sc->xb_disk->d_sectorsize = sector_size; 238 239 /* XXX */ 240 sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT; 241#if 0 242 sc->xb_disk->d_maxsize = DFLTPHYS; 243#else /* XXX: xen can't handle large single i/o requests */ 244 sc->xb_disk->d_maxsize = 4096; 245#endif 246#ifdef notyet 247 XENPRINTF("attaching device 0x%x unit %d capacity %llu\n", 248 xb_diskinfo[sc->xb_unit].device, sc->xb_unit, 249 sc->xb_disk->d_mediasize); 250#endif 251 sc->xb_disk->d_flags = 0; 252 disk_create(sc->xb_disk, DISK_VERSION_00); 253 bioq_init(&sc->xb_bioq); 254 255 return error; 256} 257 258void 259xlvbd_del(struct blkfront_info *info) 260{ 261 struct xb_softc *sc; 262 263 sc = info->sc; 264 disk_destroy(sc->xb_disk); 265} 266/************************ end VBD support *****************/ 267 268/* 269 * Read/write routine for a buffer. Finds the proper unit, place it on 270 * the sortq and kick the controller. 271 */ 272static void 273xb_strategy(struct bio *bp) 274{ 275 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 276 277 /* bogus disk? */ 278 if (sc == NULL) { 279 bp->bio_error = EINVAL; 280 bp->bio_flags |= BIO_ERROR; 281 goto bad; 282 } 283 284 DPRINTK(""); 285 286 /* 287 * Place it in the queue of disk activities for this disk 288 */ 289 mtx_lock(&blkif_io_lock); 290 bioq_disksort(&sc->xb_bioq, bp); 291 292 xb_startio(sc); 293 mtx_unlock(&blkif_io_lock); 294 return; 295 296 bad: 297 /* 298 * Correctly set the bio to indicate a failed tranfer. 299 */ 300 bp->bio_resid = bp->bio_bcount; 301 biodone(bp); 302 return; 303} 304 305static int 306blkfront_probe(device_t dev) 307{ 308 309 if (!strcmp(xenbus_get_type(dev), "vbd")) { 310 device_set_desc(dev, "Virtual Block Device"); 311 device_quiet(dev); 312 return (0); 313 } 314 315 return (ENXIO); 316} 317 318/* 319 * Setup supplies the backend dir, virtual device. We place an event 320 * channel and shared frame entries. We watch backend to wait if it's 321 * ok. 322 */ 323static int 324blkfront_attach(device_t dev) 325{ 326 int err, vdevice, i, unit; 327 struct blkfront_info *info; 328 const char *name; 329 330 /* FIXME: Use dynamic device id if this is not set. */ 331 err = xenbus_scanf(XBT_NIL, xenbus_get_node(dev), 332 "virtual-device", NULL, "%i", &vdevice); 333 if (err) { 334 xenbus_dev_fatal(dev, err, "reading virtual-device"); 335 printf("couldn't find virtual device"); 336 return (err); 337 } 338 339 blkfront_vdevice_to_unit(vdevice, &unit, &name); 340 if (!strcmp(name, "xbd")) 341 device_set_unit(dev, unit); 342 343 info = device_get_softc(dev); 344 345 /* 346 * XXX debug only 347 */ 348 for (i = 0; i < sizeof(*info); i++) 349 if (((uint8_t *)info)[i] != 0) 350 panic("non-null memory"); 351 352 info->shadow_free = 0; 353 info->xbdev = dev; 354 info->vdevice = vdevice; 355 info->connected = BLKIF_STATE_DISCONNECTED; 356 357 /* work queue needed ? */ 358 for (i = 0; i < BLK_RING_SIZE; i++) 359 info->shadow[i].req.id = i+1; 360 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 361 362 /* Front end dir is a number, which is used as the id. */ 363 info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0); 364 365 err = talk_to_backend(dev, info); 366 if (err) 367 return (err); 368 369 return (0); 370} 371 372static int 373blkfront_resume(device_t dev) 374{ 375 struct blkfront_info *info = device_get_softc(dev); 376 int err; 377 378 DPRINTK("blkfront_resume: %s\n", dev->nodename); 379 380 blkif_free(info, 1); 381 382 err = talk_to_backend(dev, info); 383 384 if (info->connected == BLKIF_STATE_SUSPENDED && !err) 385 blkif_recover(info); 386 387 return err; 388} 389 390/* Common code used when first setting up, and when resuming. */ 391static int 392talk_to_backend(device_t dev, struct blkfront_info *info) 393{ 394 const char *message = NULL; 395 struct xenbus_transaction xbt; 396 int err; 397 398 /* Create shared ring, alloc event channel. */ 399 err = setup_blkring(dev, info); 400 if (err) 401 goto out; 402 403 again: 404 err = xenbus_transaction_start(&xbt); 405 if (err) { 406 xenbus_dev_fatal(dev, err, "starting transaction"); 407 goto destroy_blkring; 408 } 409 410 err = xenbus_printf(xbt, xenbus_get_node(dev), 411 "ring-ref","%u", info->ring_ref); 412 if (err) { 413 message = "writing ring-ref"; 414 goto abort_transaction; 415 } 416 err = xenbus_printf(xbt, xenbus_get_node(dev), 417 "event-channel", "%u", irq_to_evtchn_port(info->irq)); 418 if (err) { 419 message = "writing event-channel"; 420 goto abort_transaction; 421 } 422 err = xenbus_printf(xbt, xenbus_get_node(dev), 423 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); 424 if (err) { 425 message = "writing protocol"; 426 goto abort_transaction; 427 } 428 err = xenbus_transaction_end(xbt, 0); 429 if (err) { 430 if (err == EAGAIN) 431 goto again; 432 xenbus_dev_fatal(dev, err, "completing transaction"); 433 goto destroy_blkring; 434 } 435 xenbus_set_state(dev, XenbusStateInitialised); 436 437 return 0; 438 439 abort_transaction: 440 xenbus_transaction_end(xbt, 1); 441 if (message) 442 xenbus_dev_fatal(dev, err, "%s", message); 443 destroy_blkring: 444 blkif_free(info, 0); 445 out: 446 return err; 447} 448 449static int 450setup_blkring(device_t dev, struct blkfront_info *info) 451{ 452 blkif_sring_t *sring; 453 int error; 454 455 info->ring_ref = GRANT_INVALID_REF; 456 457 sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); 458 if (sring == NULL) { 459 xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring"); 460 return ENOMEM; 461 } 462 SHARED_RING_INIT(sring); 463 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); 464 465 error = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> PAGE_SHIFT), 466 &info->ring_ref); 467 if (error) { 468 free(sring, M_DEVBUF); 469 info->ring.sring = NULL; 470 goto fail; 471 } 472 473 error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev), 474 "xbd", (driver_intr_t *)blkif_int, info, 475 INTR_TYPE_BIO | INTR_MPSAFE, &info->irq); 476 if (error) { 477 xenbus_dev_fatal(dev, error, 478 "bind_evtchn_to_irqhandler failed"); 479 goto fail; 480 } 481 482 return (0); 483 fail: 484 blkif_free(info, 0); 485 return (error); 486} 487 488 489/** 490 * Callback received when the backend's state changes. 491 */ 492static void 493blkfront_backend_changed(device_t dev, XenbusState backend_state) 494{ 495 struct blkfront_info *info = device_get_softc(dev); 496 497 DPRINTK("blkfront:backend_changed.\n"); 498 499 switch (backend_state) { 500 case XenbusStateUnknown: 501 case XenbusStateInitialising: 502 case XenbusStateInitWait: 503 case XenbusStateInitialised: 504 case XenbusStateClosed: 505 case XenbusStateReconfigured: 506 case XenbusStateReconfiguring: 507 break; 508 509 case XenbusStateConnected: 510 connect(dev, info); 511 break; 512 513 case XenbusStateClosing: 514 if (info->users > 0) 515 xenbus_dev_error(dev, -EBUSY, 516 "Device in use; refusing to close"); 517 else 518 blkfront_closing(dev); 519#ifdef notyet 520 bd = bdget(info->dev); 521 if (bd == NULL) 522 xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); 523 524 down(&bd->bd_sem); 525 if (info->users > 0) 526 xenbus_dev_error(dev, -EBUSY, 527 "Device in use; refusing to close"); 528 else 529 blkfront_closing(dev); 530 up(&bd->bd_sem); 531 bdput(bd); 532#endif 533 } 534} 535 536/* 537** Invoked when the backend is finally 'ready' (and has told produced 538** the details about the physical device - #sectors, size, etc). 539*/ 540static void 541connect(device_t dev, struct blkfront_info *info) 542{ 543 unsigned long sectors, sector_size; 544 unsigned int binfo; 545 int err; 546 547 if( (info->connected == BLKIF_STATE_CONNECTED) || 548 (info->connected == BLKIF_STATE_SUSPENDED) ) 549 return; 550 551 DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); 552 553 err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), 554 "sectors", "%lu", §ors, 555 "info", "%u", &binfo, 556 "sector-size", "%lu", §or_size, 557 NULL); 558 if (err) { 559 xenbus_dev_fatal(dev, err, 560 "reading backend fields at %s", 561 xenbus_get_otherend_path(dev)); 562 return; 563 } 564 err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), 565 "feature-barrier", "%lu", &info->feature_barrier, 566 NULL); 567 if (err) 568 info->feature_barrier = 0; 569 570 device_printf(dev, "%juMB <%s> at %s", 571 (uintmax_t) sectors / (1048576 / sector_size), 572 device_get_desc(dev), 573 xenbus_get_node(dev)); 574 bus_print_child_footer(device_get_parent(dev), dev); 575 576 xlvbd_add(dev, sectors, info->vdevice, binfo, sector_size, info); 577 578 (void)xenbus_set_state(dev, XenbusStateConnected); 579 580 /* Kick pending requests. */ 581 mtx_lock(&blkif_io_lock); 582 info->connected = BLKIF_STATE_CONNECTED; 583 kick_pending_request_queues(info); 584 mtx_unlock(&blkif_io_lock); 585 info->is_ready = 1; 586 587#if 0 588 add_disk(info->gd); 589#endif 590} 591 592/** 593 * Handle the change of state of the backend to Closing. We must delete our 594 * device-layer structures now, to ensure that writes are flushed through to 595 * the backend. Once is this done, we can switch to Closed in 596 * acknowledgement. 597 */ 598static void 599blkfront_closing(device_t dev) 600{ 601 struct blkfront_info *info = device_get_softc(dev); 602 603 DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev)); 604 605 if (info->mi) { 606 DPRINTK("Calling xlvbd_del\n"); 607 xlvbd_del(info); 608 info->mi = NULL; 609 } 610 611 xenbus_set_state(dev, XenbusStateClosed); 612} 613 614 615static int 616blkfront_detach(device_t dev) 617{ 618 struct blkfront_info *info = device_get_softc(dev); 619 620 DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev)); 621 622 blkif_free(info, 0); 623 624 return 0; 625} 626 627 628static inline int 629GET_ID_FROM_FREELIST(struct blkfront_info *info) 630{ 631 unsigned long nfree = info->shadow_free; 632 633 KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree)); 634 info->shadow_free = info->shadow[nfree].req.id; 635 info->shadow[nfree].req.id = 0x0fffffee; /* debug */ 636 return nfree; 637} 638 639static inline void 640ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id) 641{ 642 info->shadow[id].req.id = info->shadow_free; 643 info->shadow[id].request = 0; 644 info->shadow_free = id; 645} 646 647static inline void 648flush_requests(struct blkfront_info *info) 649{ 650 int notify; 651 652 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); 653 654 if (notify) 655 notify_remote_via_irq(info->irq); 656} 657 658static void 659kick_pending_request_queues(struct blkfront_info *info) 660{ 661 /* XXX check if we can't simplify */ 662#if 0 663 if (!RING_FULL(&info->ring)) { 664 /* Re-enable calldowns. */ 665 blk_start_queue(info->rq); 666 /* Kick things off immediately. */ 667 do_blkif_request(info->rq); 668 } 669#endif 670 if (!RING_FULL(&info->ring)) { 671#if 0 672 sc = LIST_FIRST(&xbsl_head); 673 LIST_REMOVE(sc, entry); 674 /* Re-enable calldowns. */ 675 blk_start_queue(di->rq); 676#endif 677 /* Kick things off immediately. */ 678 xb_startio(info->sc); 679 } 680} 681 682#if 0 683/* XXX */ 684static void blkif_restart_queue(void *arg) 685{ 686 struct blkfront_info *info = (struct blkfront_info *)arg; 687 688 mtx_lock(&blkif_io_lock); 689 kick_pending_request_queues(info); 690 mtx_unlock(&blkif_io_lock); 691} 692#endif 693 694static void blkif_restart_queue_callback(void *arg) 695{ 696#if 0 697 struct blkfront_info *info = (struct blkfront_info *)arg; 698 /* XXX BSD equiv ? */ 699 700 schedule_work(&info->work); 701#endif 702} 703 704static int 705blkif_open(struct disk *dp) 706{ 707 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 708 709 if (sc == NULL) { 710 printk("xb%d: not found", sc->xb_unit); 711 return (ENXIO); 712 } 713 714 sc->xb_flags |= XB_OPEN; 715 sc->xb_info->users++; 716 return (0); 717} 718 719static int 720blkif_close(struct disk *dp) 721{ 722 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 723 724 if (sc == NULL) 725 return (ENXIO); 726 sc->xb_flags &= ~XB_OPEN; 727 if (--(sc->xb_info->users) == 0) { 728 /* Check whether we have been instructed to close. We will 729 have ignored this request initially, as the device was 730 still mounted. */ 731 device_t dev = sc->xb_info->xbdev; 732 XenbusState state = 733 xenbus_read_driver_state(xenbus_get_otherend_path(dev)); 734 735 if (state == XenbusStateClosing) 736 blkfront_closing(dev); 737 } 738 return (0); 739} 740 741static int 742blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) 743{ 744 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 745 746 if (sc == NULL) 747 return (ENXIO); 748 749 return (ENOTTY); 750} 751 752 753/* 754 * blkif_queue_request 755 * 756 * request block io 757 * 758 * id: for guest use only. 759 * operation: BLKIF_OP_{READ,WRITE,PROBE} 760 * buffer: buffer to read/write into. this should be a 761 * virtual address in the guest os. 762 */ 763static int blkif_queue_request(struct bio *bp) 764{ 765 caddr_t alignbuf; 766 vm_paddr_t buffer_ma; 767 blkif_request_t *ring_req; 768 unsigned long id; 769 uint64_t fsect, lsect; 770 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 771 struct blkfront_info *info = sc->xb_info; 772 int ref; 773 774 if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED)) 775 return 1; 776 777 if (gnttab_alloc_grant_references( 778 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 779 gnttab_request_free_callback( 780 &info->callback, 781 blkif_restart_queue_callback, 782 info, 783 BLKIF_MAX_SEGMENTS_PER_REQUEST); 784 return 1; 785 } 786 787 /* Check if the buffer is properly aligned */ 788 if ((vm_offset_t)bp->bio_data & PAGE_MASK) { 789 int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE : 790 PAGE_SIZE; 791 caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF, 792 M_NOWAIT); 793 794 alignbuf = (char *)roundup2((u_long)newbuf, align); 795 796 /* save a copy of the current buffer */ 797 bp->bio_driver1 = newbuf; 798 bp->bio_driver2 = alignbuf; 799 800 /* Copy the data for a write */ 801 if (bp->bio_cmd == BIO_WRITE) 802 bcopy(bp->bio_data, alignbuf, bp->bio_bcount); 803 } else 804 alignbuf = bp->bio_data; 805 806 /* Fill out a communications ring structure. */ 807 ring_req = RING_GET_REQUEST(&info->ring, 808 info->ring.req_prod_pvt); 809 id = GET_ID_FROM_FREELIST(info); 810 info->shadow[id].request = (unsigned long)bp; 811 812 ring_req->id = id; 813 ring_req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : 814 BLKIF_OP_WRITE; 815 816 ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno; 817 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; 818 819 ring_req->nr_segments = 0; /* XXX not doing scatter/gather since buffer 820 * chaining is not supported. 821 */ 822 823 buffer_ma = vtomach(alignbuf); 824 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; 825 lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1; 826 /* install a grant reference. */ 827 ref = gnttab_claim_grant_reference(&gref_head); 828 KASSERT( ref != -ENOSPC, ("grant_reference failed") ); 829 830 gnttab_grant_foreign_access_ref( 831 ref, 832 xenbus_get_otherend_id(info->xbdev), 833 buffer_ma >> PAGE_SHIFT, 834 ring_req->operation & 1 ); /* ??? */ 835 info->shadow[id].frame[ring_req->nr_segments] = 836 buffer_ma >> PAGE_SHIFT; 837 838 ring_req->seg[ring_req->nr_segments] = 839 (struct blkif_request_segment) { 840 .gref = ref, 841 .first_sect = fsect, 842 .last_sect = lsect }; 843 844 ring_req->nr_segments++; 845 KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0, 846 ("XEN buffer must be sector aligned")); 847 KASSERT(lsect <= 7, 848 ("XEN disk driver data cannot cross a page boundary")); 849 850 buffer_ma &= ~PAGE_MASK; 851 852 info->ring.req_prod_pvt++; 853 854 /* Keep a private copy so we can reissue requests when recovering. */ 855 info->shadow[id].req = *ring_req; 856 857 gnttab_free_grant_references(gref_head); 858 859 return 0; 860} 861 862 863 864/* 865 * Dequeue buffers and place them in the shared communication ring. 866 * Return when no more requests can be accepted or all buffers have 867 * been queued. 868 * 869 * Signal XEN once the ring has been filled out. 870 */ 871static void 872xb_startio(struct xb_softc *sc) 873{ 874 struct bio *bp; 875 int queued = 0; 876 struct blkfront_info *info = sc->xb_info; 877 DPRINTK(""); 878 879 mtx_assert(&blkif_io_lock, MA_OWNED); 880 881 while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) { 882 883 if (RING_FULL(&info->ring)) 884 goto wait; 885 886 if (blkif_queue_request(bp)) { 887 wait: 888 bioq_insert_head(&sc->xb_bioq, bp); 889 break; 890 } 891 queued++; 892 } 893 894 if (queued != 0) 895 flush_requests(sc->xb_info); 896} 897 898static void 899blkif_int(void *xsc) 900{ 901 struct xb_softc *sc = NULL; 902 struct bio *bp; 903 blkif_response_t *bret; 904 RING_IDX i, rp; 905 struct blkfront_info *info = xsc; 906 DPRINTK(""); 907 908 TRACE_ENTER; 909 910 mtx_lock(&blkif_io_lock); 911 912 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { 913 mtx_unlock(&blkif_io_lock); 914 return; 915 } 916 917 again: 918 rp = info->ring.sring->rsp_prod; 919 rmb(); /* Ensure we see queued responses up to 'rp'. */ 920 921 for (i = info->ring.rsp_cons; i != rp; i++) { 922 unsigned long id; 923 924 bret = RING_GET_RESPONSE(&info->ring, i); 925 id = bret->id; 926 bp = (struct bio *)info->shadow[id].request; 927 928 blkif_completion(&info->shadow[id]); 929 930 ADD_ID_TO_FREELIST(info, id); 931 932 switch (bret->operation) { 933 case BLKIF_OP_READ: 934 /* had an unaligned buffer that needs to be copied */ 935 if (bp->bio_driver1) 936 bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount); 937 /* FALLTHROUGH */ 938 case BLKIF_OP_WRITE: 939 940 /* free the copy buffer */ 941 if (bp->bio_driver1) { 942 free(bp->bio_driver1, M_DEVBUF); 943 bp->bio_driver1 = NULL; 944 } 945 946 if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) { 947 printf("Bad return from blkdev data request: %x\n", 948 bret->status); 949 bp->bio_flags |= BIO_ERROR; 950 } 951 952 sc = (struct xb_softc *)bp->bio_disk->d_drv1; 953 954 if (bp->bio_flags & BIO_ERROR) 955 bp->bio_error = EIO; 956 else 957 bp->bio_resid = 0; 958 959 biodone(bp); 960 break; 961 default: 962 panic("received invalid operation"); 963 break; 964 } 965 } 966 967 info->ring.rsp_cons = i; 968 969 if (i != info->ring.req_prod_pvt) { 970 int more_to_do; 971 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); 972 if (more_to_do) 973 goto again; 974 } else { 975 info->ring.sring->rsp_event = i + 1; 976 } 977 978 kick_pending_request_queues(info); 979 980 mtx_unlock(&blkif_io_lock); 981} 982 983static void 984blkif_free(struct blkfront_info *info, int suspend) 985{ 986 987/* Prevent new requests being issued until we fix things up. */ 988 mtx_lock(&blkif_io_lock); 989 info->connected = suspend ? 990 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 991 mtx_unlock(&blkif_io_lock); 992 993 /* Free resources associated with old device channel. */ 994 if (info->ring_ref != GRANT_INVALID_REF) { 995 gnttab_end_foreign_access(info->ring_ref, 996 info->ring.sring); 997 info->ring_ref = GRANT_INVALID_REF; 998 info->ring.sring = NULL; 999 } 1000 if (info->irq) 1001 unbind_from_irqhandler(info->irq); 1002 info->irq = 0; 1003 1004} 1005 1006static void 1007blkif_completion(struct blk_shadow *s) 1008{ 1009 int i; 1010 1011 for (i = 0; i < s->req.nr_segments; i++) 1012 gnttab_end_foreign_access(s->req.seg[i].gref, 0UL); 1013} 1014 1015static void 1016blkif_recover(struct blkfront_info *info) 1017{ 1018 int i, j; 1019 blkif_request_t *req; 1020 struct blk_shadow *copy; 1021 1022 /* Stage 1: Make a safe copy of the shadow state. */ 1023 copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO); 1024 PANIC_IF(copy == NULL); 1025 memcpy(copy, info->shadow, sizeof(info->shadow)); 1026 1027 /* Stage 2: Set up free list. */ 1028 memset(&info->shadow, 0, sizeof(info->shadow)); 1029 for (i = 0; i < BLK_RING_SIZE; i++) 1030 info->shadow[i].req.id = i+1; 1031 info->shadow_free = info->ring.req_prod_pvt; 1032 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 1033 1034 /* Stage 3: Find pending requests and requeue them. */ 1035 for (i = 0; i < BLK_RING_SIZE; i++) { 1036 /* Not in use? */ 1037 if (copy[i].request == 0) 1038 continue; 1039 1040 /* Grab a request slot and copy shadow state into it. */ 1041 req = RING_GET_REQUEST( 1042 &info->ring, info->ring.req_prod_pvt); 1043 *req = copy[i].req; 1044 1045 /* We get a new request id, and must reset the shadow state. */ 1046 req->id = GET_ID_FROM_FREELIST(info); 1047 memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); 1048 1049 /* Rewrite any grant references invalidated by suspend/resume. */ 1050 for (j = 0; j < req->nr_segments; j++) 1051 gnttab_grant_foreign_access_ref( 1052 req->seg[j].gref, 1053 xenbus_get_otherend_id(info->xbdev), 1054 pfn_to_mfn(info->shadow[req->id].frame[j]), 1055 0 /* assume not readonly */); 1056 1057 info->shadow[req->id].req = *req; 1058 1059 info->ring.req_prod_pvt++; 1060 } 1061 1062 free(copy, M_DEVBUF); 1063 1064 xenbus_set_state(info->xbdev, XenbusStateConnected); 1065 1066 /* Now safe for us to use the shared ring */ 1067 mtx_lock(&blkif_io_lock); 1068 info->connected = BLKIF_STATE_CONNECTED; 1069 mtx_unlock(&blkif_io_lock); 1070 1071 /* Send off requeued requests */ 1072 mtx_lock(&blkif_io_lock); 1073 flush_requests(info); 1074 1075 /* Kick any other new requests queued since we resumed */ 1076 kick_pending_request_queues(info); 1077 mtx_unlock(&blkif_io_lock); 1078} 1079 1080/* ** Driver registration ** */ 1081static device_method_t blkfront_methods[] = { 1082 /* Device interface */ 1083 DEVMETHOD(device_probe, blkfront_probe), 1084 DEVMETHOD(device_attach, blkfront_attach), 1085 DEVMETHOD(device_detach, blkfront_detach), 1086 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1087 DEVMETHOD(device_suspend, bus_generic_suspend), 1088 DEVMETHOD(device_resume, blkfront_resume), 1089 1090 /* Xenbus interface */ 1091 DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed), 1092 1093 { 0, 0 } 1094}; 1095 1096static driver_t blkfront_driver = { 1097 "xbd", 1098 blkfront_methods, 1099 sizeof(struct blkfront_info), 1100}; 1101devclass_t blkfront_devclass; 1102 1103DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0); 1104 1105MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */ 1106 1107