blkfront.c revision 183375
1139749Simp/*- 252417Sluoqi * All rights reserved. 352417Sluoqi * 452417Sluoqi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 552417Sluoqi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 652417Sluoqi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 752417Sluoqi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 852417Sluoqi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 952417Sluoqi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 1052417Sluoqi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 1152417Sluoqi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 1252417Sluoqi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 1352417Sluoqi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 1452417Sluoqi * SUCH DAMAGE. 1552417Sluoqi * 1652417Sluoqi */ 1752417Sluoqi 1852417Sluoqi/* 1952417Sluoqi * XenoBSD block device driver 2052417Sluoqi */ 2152417Sluoqi 2252417Sluoqi#include <sys/cdefs.h> 2352417Sluoqi__FBSDID("$FreeBSD: head/sys/dev/xen/blkfront/blkfront.c 183375 2008-09-26 05:29:39Z kmacy $"); 2452417Sluoqi 2552417Sluoqi#include <sys/param.h> 2652417Sluoqi#include <sys/systm.h> 2752417Sluoqi#include <sys/malloc.h> 2852417Sluoqi#include <sys/kernel.h> 2952417Sluoqi#include <vm/vm.h> 3052417Sluoqi#include <vm/pmap.h> 3152417Sluoqi 3252417Sluoqi#include <sys/bio.h> 3352417Sluoqi#include <sys/bus.h> 3452417Sluoqi#include <sys/conf.h> 3552417Sluoqi#include <sys/module.h> 3652417Sluoqi 3752417Sluoqi#include <machine/bus.h> 3852417Sluoqi#include <sys/rman.h> 3952417Sluoqi#include <machine/resource.h> 4052417Sluoqi#include <machine/intr_machdep.h> 4152417Sluoqi#include <machine/vmparam.h> 4252417Sluoqi 4352417Sluoqi#include <machine/xen/hypervisor.h> 4452417Sluoqi#include <machine/xen/xen-os.h> 4552417Sluoqi#include <machine/xen/xen_intr.h> 4652417Sluoqi#include <machine/xen/xenbus.h> 4752417Sluoqi#include <machine/xen/evtchn.h> 4852417Sluoqi#include <xen/interface/grant_table.h> 4952417Sluoqi 5052417Sluoqi#include <geom/geom_disk.h> 5152417Sluoqi#include <machine/xen/xenfunc.h> 5252417Sluoqi#include <xen/gnttab.h> 5352417Sluoqi 5452417Sluoqi#include <dev/xen/blkfront/block.h> 5552417Sluoqi 5652417Sluoqi#define ASSERT(S) KASSERT(S, (#S)) 5752417Sluoqi/* prototypes */ 5852417Sluoqistruct xb_softc; 5952417Sluoqistatic void xb_startio(struct xb_softc *sc); 6052417Sluoqistatic void connect(struct blkfront_info *); 6152417Sluoqistatic void blkfront_closing(struct xenbus_device *); 6252417Sluoqistatic int blkfront_remove(struct xenbus_device *); 6352417Sluoqistatic int talk_to_backend(struct xenbus_device *, struct blkfront_info *); 6452417Sluoqistatic int setup_blkring(struct xenbus_device *, struct blkfront_info *); 6552417Sluoqistatic void blkif_int(void *); 6652417Sluoqi#if 0 6752417Sluoqistatic void blkif_restart_queue(void *arg); 6852417Sluoqi#endif 6952417Sluoqistatic void blkif_recover(struct blkfront_info *); 7052417Sluoqistatic void blkif_completion(struct blk_shadow *); 7152417Sluoqistatic void blkif_free(struct blkfront_info *, int); 7252417Sluoqi 7352417Sluoqi#define GRANT_INVALID_REF 0 7452417Sluoqi#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) 7592370Sluoqi 7692370SluoqiLIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head; 7792370Sluoqi 7852417Sluoqi/* Control whether runtime update of vbds is enabled. */ 7952417Sluoqi#define ENABLE_VBD_UPDATE 0 8052417Sluoqi 8152417Sluoqi#if ENABLE_VBD_UPDATE 8252417Sluoqistatic void vbd_update(void); 8352417Sluoqi#endif 8452417Sluoqi 8552417Sluoqi 8652417Sluoqi#define BLKIF_STATE_DISCONNECTED 0 8752417Sluoqi#define BLKIF_STATE_CONNECTED 1 8852417Sluoqi#define BLKIF_STATE_SUSPENDED 2 8952417Sluoqi 9052417Sluoqi#ifdef notyet 9152417Sluoqistatic char *blkif_state_name[] = { 9252417Sluoqi [BLKIF_STATE_DISCONNECTED] = "disconnected", 9352417Sluoqi [BLKIF_STATE_CONNECTED] = "connected", 9452417Sluoqi [BLKIF_STATE_SUSPENDED] = "closed", 9552417Sluoqi}; 9652417Sluoqi 9752417Sluoqistatic char * blkif_status_name[] = { 9852417Sluoqi [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", 9952417Sluoqi [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", 10052417Sluoqi [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", 10152417Sluoqi [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", 10252417Sluoqi}; 10352417Sluoqi#endif 10452417Sluoqi#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args) 10552417Sluoqi#if 0 10652417Sluoqi#define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, __LINE__,##args) 10752417Sluoqi#else 10852417Sluoqi#define DPRINTK(fmt, args...) 10952417Sluoqi#endif 11052417Sluoqi 11152417Sluoqistatic grant_ref_t gref_head; 11252417Sluoqi#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 11352417Sluoqi (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) 11452417Sluoqi 11552417Sluoqistatic void kick_pending_request_queues(struct blkfront_info *); 11652417Sluoqistatic int blkif_open(struct disk *dp); 11752417Sluoqistatic int blkif_close(struct disk *dp); 11852417Sluoqistatic int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); 11952417Sluoqistatic int blkif_queue_request(struct bio *bp); 12052417Sluoqistatic void xb_strategy(struct bio *bp); 12152417Sluoqi 12252417Sluoqi 12352417Sluoqi 12452417Sluoqi/* XXX move to xb_vbd.c when VBD update support is added */ 12552417Sluoqi#define MAX_VBDS 64 12652417Sluoqi 12752417Sluoqi#define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ 12852417Sluoqi#define XBD_SECTOR_SHFT 9 12952417Sluoqi 13052417Sluoqistatic struct mtx blkif_io_lock; 13152417Sluoqi 13252417Sluoqistatic vm_paddr_t 13352417Sluoqipfn_to_mfn(vm_paddr_t pfn) 13452417Sluoqi{ 13552417Sluoqi return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT); 13652417Sluoqi} 13752417Sluoqi 13852417Sluoqi 13952417Sluoqiint 14052417Sluoqixlvbd_add(blkif_sector_t capacity, int unit, uint16_t vdisk_info, uint16_t sector_size, 14152417Sluoqi struct blkfront_info *info) 14252417Sluoqi{ 14352417Sluoqi struct xb_softc *sc; 14452417Sluoqi int error = 0; 14552417Sluoqi int unitno = unit - 767; 14652417Sluoqi 14752417Sluoqi sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 14852417Sluoqi sc->xb_unit = unitno; 14952417Sluoqi sc->xb_info = info; 15052417Sluoqi info->sc = sc; 15152417Sluoqi 15252417Sluoqi memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); 15352417Sluoqi sc->xb_disk = disk_alloc(); 15452417Sluoqi sc->xb_disk->d_unit = unitno; 15552417Sluoqi sc->xb_disk->d_open = blkif_open; 15652417Sluoqi sc->xb_disk->d_close = blkif_close; 15752417Sluoqi sc->xb_disk->d_ioctl = blkif_ioctl; 15852417Sluoqi sc->xb_disk->d_strategy = xb_strategy; 15952417Sluoqi sc->xb_disk->d_name = "xbd"; 16052417Sluoqi sc->xb_disk->d_drv1 = sc; 16152417Sluoqi sc->xb_disk->d_sectorsize = sector_size; 16252417Sluoqi 16352417Sluoqi /* XXX */ 16452417Sluoqi sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT; 16552417Sluoqi#if 0 16652417Sluoqi sc->xb_disk->d_maxsize = DFLTPHYS; 16752417Sluoqi#else /* XXX: xen can't handle large single i/o requests */ 16852417Sluoqi sc->xb_disk->d_maxsize = 4096; 16952417Sluoqi#endif 17052417Sluoqi#ifdef notyet 17152417Sluoqi XENPRINTF("attaching device 0x%x unit %d capacity %llu\n", 17252417Sluoqi xb_diskinfo[sc->xb_unit].device, sc->xb_unit, 17352417Sluoqi sc->xb_disk->d_mediasize); 17452417Sluoqi#endif 17552417Sluoqi sc->xb_disk->d_flags = 0; 17652417Sluoqi disk_create(sc->xb_disk, DISK_VERSION_00); 17752417Sluoqi bioq_init(&sc->xb_bioq); 17852417Sluoqi 17952417Sluoqi return error; 18052417Sluoqi} 18152417Sluoqi 18252417Sluoqivoid 18352417Sluoqixlvbd_del(struct blkfront_info *info) 18452417Sluoqi{ 18552417Sluoqi struct xb_softc *sc; 18652417Sluoqi 18752417Sluoqi sc = info->sc; 18852417Sluoqi disk_destroy(sc->xb_disk); 18952417Sluoqi} 19052417Sluoqi/************************ end VBD support *****************/ 19152417Sluoqi 19252417Sluoqi/* 19352417Sluoqi * Read/write routine for a buffer. Finds the proper unit, place it on 19452417Sluoqi * the sortq and kick the controller. 19552417Sluoqi */ 19652417Sluoqistatic void 19752417Sluoqixb_strategy(struct bio *bp) 19852417Sluoqi{ 19952417Sluoqi struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 20052417Sluoqi 20152417Sluoqi /* bogus disk? */ 20252417Sluoqi if (sc == NULL) { 20352417Sluoqi bp->bio_error = EINVAL; 20452417Sluoqi bp->bio_flags |= BIO_ERROR; 20552417Sluoqi goto bad; 20652417Sluoqi } 20752417Sluoqi 20852417Sluoqi DPRINTK(""); 20952417Sluoqi 21052417Sluoqi /* 21152417Sluoqi * Place it in the queue of disk activities for this disk 21252417Sluoqi */ 21352417Sluoqi mtx_lock(&blkif_io_lock); 21452417Sluoqi bioq_disksort(&sc->xb_bioq, bp); 21552417Sluoqi 21652417Sluoqi xb_startio(sc); 21752417Sluoqi mtx_unlock(&blkif_io_lock); 21852417Sluoqi return; 21952417Sluoqi 22052417Sluoqi bad: 22152417Sluoqi /* 22252417Sluoqi * Correctly set the bio to indicate a failed tranfer. 22352417Sluoqi */ 22452417Sluoqi bp->bio_resid = bp->bio_bcount; 22552417Sluoqi biodone(bp); 22652417Sluoqi return; 22752417Sluoqi} 22852417Sluoqi 22952417Sluoqi 23052417Sluoqi/* Setup supplies the backend dir, virtual device. 23152417Sluoqi 23252417SluoqiWe place an event channel and shared frame entries. 23352417SluoqiWe watch backend to wait if it's ok. */ 23452417Sluoqistatic int blkfront_probe(struct xenbus_device *dev, 23552417Sluoqi const struct xenbus_device_id *id) 23652417Sluoqi{ 23752417Sluoqi int err, vdevice, i; 23852417Sluoqi struct blkfront_info *info; 23952417Sluoqi 24052417Sluoqi /* FIXME: Use dynamic device id if this is not set. */ 24152417Sluoqi err = xenbus_scanf(XBT_NIL, dev->nodename, 24252417Sluoqi "virtual-device", "%i", &vdevice); 24352417Sluoqi if (err != 1) { 24452417Sluoqi xenbus_dev_fatal(dev, err, "reading virtual-device"); 24552417Sluoqi printf("couldn't find virtual device"); 24652417Sluoqi return (err); 24752417Sluoqi } 24852417Sluoqi 24952417Sluoqi info = malloc(sizeof(*info), M_DEVBUF, M_NOWAIT|M_ZERO); 25052417Sluoqi if (info == NULL) { 25152417Sluoqi xenbus_dev_fatal(dev, ENOMEM, "allocating info structure"); 25252417Sluoqi return ENOMEM; 25352417Sluoqi } 25452417Sluoqi 25552417Sluoqi /* 25652417Sluoqi * XXX debug only 25752417Sluoqi */ 25852417Sluoqi for (i = 0; i < sizeof(*info); i++) 25952417Sluoqi if (((uint8_t *)info)[i] != 0) 26052417Sluoqi panic("non-null memory"); 26152417Sluoqi 26252417Sluoqi info->shadow_free = 0; 26352417Sluoqi info->xbdev = dev; 26452417Sluoqi info->vdevice = vdevice; 26552417Sluoqi info->connected = BLKIF_STATE_DISCONNECTED; 26652417Sluoqi 26752417Sluoqi /* work queue needed ? */ 26852417Sluoqi for (i = 0; i < BLK_RING_SIZE; i++) 26952417Sluoqi info->shadow[i].req.id = i+1; 27052417Sluoqi info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 27152417Sluoqi 27252417Sluoqi /* Front end dir is a number, which is used as the id. */ 27352417Sluoqi info->handle = strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); 27452417Sluoqi dev->dev_driver_data = info; 27552417Sluoqi 27652417Sluoqi err = talk_to_backend(dev, info); 27752417Sluoqi if (err) { 27852417Sluoqi free(info, M_DEVBUF); 27952417Sluoqi dev->dev_driver_data = NULL; 28052417Sluoqi return err; 28152417Sluoqi } 28252417Sluoqi 28352417Sluoqi return 0; 28452417Sluoqi} 28552417Sluoqi 28652417Sluoqi 28752417Sluoqistatic int blkfront_resume(struct xenbus_device *dev) 28852417Sluoqi{ 28952417Sluoqi struct blkfront_info *info = dev->dev_driver_data; 29052417Sluoqi int err; 29152417Sluoqi 29252417Sluoqi DPRINTK("blkfront_resume: %s\n", dev->nodename); 29352417Sluoqi 29452417Sluoqi blkif_free(info, 1); 29552417Sluoqi 29652417Sluoqi err = talk_to_backend(dev, info); 29752417Sluoqi if (!err) 29852417Sluoqi blkif_recover(info); 29952417Sluoqi 30052417Sluoqi return err; 30152417Sluoqi} 30252417Sluoqi 30352417Sluoqi/* Common code used when first setting up, and when resuming. */ 30452417Sluoqistatic int talk_to_backend(struct xenbus_device *dev, 30552417Sluoqi struct blkfront_info *info) 30652417Sluoqi{ 30752417Sluoqi const char *message = NULL; 30852417Sluoqi struct xenbus_transaction xbt; 30952417Sluoqi int err; 31052417Sluoqi 31152417Sluoqi /* Create shared ring, alloc event channel. */ 31252417Sluoqi err = setup_blkring(dev, info); 31352417Sluoqi if (err) 31452417Sluoqi goto out; 31552417Sluoqi 31652417Sluoqi again: 31752417Sluoqi err = xenbus_transaction_start(&xbt); 31852417Sluoqi if (err) { 31952417Sluoqi xenbus_dev_fatal(dev, err, "starting transaction"); 32052417Sluoqi goto destroy_blkring; 32152417Sluoqi } 32252417Sluoqi 32352417Sluoqi err = xenbus_printf(xbt, dev->nodename, 32452417Sluoqi "ring-ref","%u", info->ring_ref); 32574370Sken if (err) { 32652417Sluoqi message = "writing ring-ref"; 32752417Sluoqi goto abort_transaction; 32874370Sken } 32952417Sluoqi err = xenbus_printf(xbt, dev->nodename, 33052417Sluoqi "event-channel", "%u", irq_to_evtchn_port(info->irq)); 33152417Sluoqi if (err) { 33252417Sluoqi message = "writing event-channel"; 33352417Sluoqi goto abort_transaction; 33452417Sluoqi } 33552417Sluoqi 336 err = xenbus_transaction_end(xbt, 0); 337 if (err) { 338 if (err == -EAGAIN) 339 goto again; 340 xenbus_dev_fatal(dev, err, "completing transaction"); 341 goto destroy_blkring; 342 } 343 xenbus_switch_state(dev, XenbusStateInitialised); 344 345 return 0; 346 347 abort_transaction: 348 xenbus_transaction_end(xbt, 1); 349 if (message) 350 xenbus_dev_fatal(dev, err, "%s", message); 351 destroy_blkring: 352 blkif_free(info, 0); 353 out: 354 return err; 355} 356 357static int 358setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) 359{ 360 blkif_sring_t *sring; 361 int err; 362 363 info->ring_ref = GRANT_INVALID_REF; 364 365 sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); 366 if (sring == NULL) { 367 xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring"); 368 return ENOMEM; 369 } 370 SHARED_RING_INIT(sring); 371 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); 372 373 err = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> PAGE_SHIFT)); 374 if (err < 0) { 375 free(sring, M_DEVBUF); 376 info->ring.sring = NULL; 377 goto fail; 378 } 379 info->ring_ref = err; 380 381 err = bind_listening_port_to_irqhandler(dev->otherend_id, 382 "xbd", (driver_intr_t *)blkif_int, info, 383 INTR_TYPE_BIO | INTR_MPSAFE, NULL); 384 if (err <= 0) { 385 xenbus_dev_fatal(dev, err, 386 "bind_evtchn_to_irqhandler failed"); 387 goto fail; 388 } 389 info->irq = err; 390 391 return 0; 392 fail: 393 blkif_free(info, 0); 394 return err; 395} 396 397 398/** 399 * Callback received when the backend's state changes. 400 */ 401static void backend_changed(struct xenbus_device *dev, 402 XenbusState backend_state) 403{ 404 struct blkfront_info *info = dev->dev_driver_data; 405 406 DPRINTK("blkfront:backend_changed.\n"); 407 408 switch (backend_state) { 409 case XenbusStateUnknown: 410 case XenbusStateInitialising: 411 case XenbusStateInitWait: 412 case XenbusStateInitialised: 413 case XenbusStateClosed: 414 case XenbusStateReconfigured: 415 case XenbusStateReconfiguring: 416 break; 417 418 case XenbusStateConnected: 419 connect(info); 420 break; 421 422 case XenbusStateClosing: 423 if (info->users > 0) 424 xenbus_dev_error(dev, -EBUSY, 425 "Device in use; refusing to close"); 426 else 427 blkfront_closing(dev); 428#ifdef notyet 429 bd = bdget(info->dev); 430 if (bd == NULL) 431 xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); 432 433 down(&bd->bd_sem); 434 if (info->users > 0) 435 xenbus_dev_error(dev, -EBUSY, 436 "Device in use; refusing to close"); 437 else 438 blkfront_closing(dev); 439 up(&bd->bd_sem); 440 bdput(bd); 441#endif 442 } 443} 444 445/* 446** Invoked when the backend is finally 'ready' (and has told produced 447** the details about the physical device - #sectors, size, etc). 448*/ 449static void 450connect(struct blkfront_info *info) 451{ 452 unsigned long sectors, sector_size; 453 unsigned int binfo; 454 int err; 455 456 if( (info->connected == BLKIF_STATE_CONNECTED) || 457 (info->connected == BLKIF_STATE_SUSPENDED) ) 458 return; 459 460 DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend); 461 462 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 463 "sectors", "%lu", §ors, 464 "info", "%u", &binfo, 465 "sector-size", "%lu", §or_size, 466 NULL); 467 if (err) { 468 xenbus_dev_fatal(info->xbdev, err, 469 "reading backend fields at %s", 470 info->xbdev->otherend); 471 return; 472 } 473 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 474 "feature-barrier", "%lu", &info->feature_barrier, 475 NULL); 476 if (err) 477 info->feature_barrier = 0; 478 479 xlvbd_add(sectors, info->vdevice, binfo, sector_size, info); 480 481 (void)xenbus_switch_state(info->xbdev, XenbusStateConnected); 482 483 /* Kick pending requests. */ 484 mtx_lock(&blkif_io_lock); 485 info->connected = BLKIF_STATE_CONNECTED; 486 kick_pending_request_queues(info); 487 mtx_unlock(&blkif_io_lock); 488 info->is_ready = 1; 489 490#if 0 491 add_disk(info->gd); 492#endif 493} 494 495/** 496 * Handle the change of state of the backend to Closing. We must delete our 497 * device-layer structures now, to ensure that writes are flushed through to 498 * the backend. Once is this done, we can switch to Closed in 499 * acknowledgement. 500 */ 501static void blkfront_closing(struct xenbus_device *dev) 502{ 503 struct blkfront_info *info = dev->dev_driver_data; 504 505 DPRINTK("blkfront_closing: %s removed\n", dev->nodename); 506 507 if (info->mi) { 508 DPRINTK("Calling xlvbd_del\n"); 509 xlvbd_del(info); 510 info->mi = NULL; 511 } 512 513 xenbus_switch_state(dev, XenbusStateClosed); 514} 515 516 517static int blkfront_remove(struct xenbus_device *dev) 518{ 519 struct blkfront_info *info = dev->dev_driver_data; 520 521 DPRINTK("blkfront_remove: %s removed\n", dev->nodename); 522 523 blkif_free(info, 0); 524 525 free(info, M_DEVBUF); 526 527 return 0; 528} 529 530 531static inline int 532GET_ID_FROM_FREELIST(struct blkfront_info *info) 533{ 534 unsigned long nfree = info->shadow_free; 535 536 KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree)); 537 info->shadow_free = info->shadow[nfree].req.id; 538 info->shadow[nfree].req.id = 0x0fffffee; /* debug */ 539 return nfree; 540} 541 542static inline void 543ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id) 544{ 545 info->shadow[id].req.id = info->shadow_free; 546 info->shadow[id].request = 0; 547 info->shadow_free = id; 548} 549 550static inline void 551flush_requests(struct blkfront_info *info) 552{ 553 int notify; 554 555 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); 556 557 if (notify) 558 notify_remote_via_irq(info->irq); 559} 560 561static void 562kick_pending_request_queues(struct blkfront_info *info) 563{ 564 /* XXX check if we can't simplify */ 565#if 0 566 if (!RING_FULL(&info->ring)) { 567 /* Re-enable calldowns. */ 568 blk_start_queue(info->rq); 569 /* Kick things off immediately. */ 570 do_blkif_request(info->rq); 571 } 572#endif 573 if (!RING_FULL(&info->ring)) { 574#if 0 575 sc = LIST_FIRST(&xbsl_head); 576 LIST_REMOVE(sc, entry); 577 /* Re-enable calldowns. */ 578 blk_start_queue(di->rq); 579#endif 580 /* Kick things off immediately. */ 581 xb_startio(info->sc); 582 } 583} 584 585#if 0 586/* XXX */ 587static void blkif_restart_queue(void *arg) 588{ 589 struct blkfront_info *info = (struct blkfront_info *)arg; 590 591 mtx_lock(&blkif_io_lock); 592 kick_pending_request_queues(info); 593 mtx_unlock(&blkif_io_lock); 594} 595#endif 596 597static void blkif_restart_queue_callback(void *arg) 598{ 599#if 0 600 struct blkfront_info *info = (struct blkfront_info *)arg; 601 /* XXX BSD equiv ? */ 602 603 schedule_work(&info->work); 604#endif 605} 606 607static int 608blkif_open(struct disk *dp) 609{ 610 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 611 612 if (sc == NULL) { 613 printk("xb%d: not found", sc->xb_unit); 614 return (ENXIO); 615 } 616 617 sc->xb_flags |= XB_OPEN; 618 sc->xb_info->users++; 619 return (0); 620} 621 622static int 623blkif_close(struct disk *dp) 624{ 625 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 626 627 if (sc == NULL) 628 return (ENXIO); 629 sc->xb_flags &= ~XB_OPEN; 630 if (--(sc->xb_info->users) == 0) { 631 /* Check whether we have been instructed to close. We will 632 have ignored this request initially, as the device was 633 still mounted. */ 634 struct xenbus_device * dev = sc->xb_info->xbdev; 635 XenbusState state = xenbus_read_driver_state(dev->otherend); 636 637 if (state == XenbusStateClosing) 638 blkfront_closing(dev); 639 } 640 return (0); 641} 642 643static int 644blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) 645{ 646 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 647 648 if (sc == NULL) 649 return (ENXIO); 650 651 return (ENOTTY); 652} 653 654 655/* 656 * blkif_queue_request 657 * 658 * request block io 659 * 660 * id: for guest use only. 661 * operation: BLKIF_OP_{READ,WRITE,PROBE} 662 * buffer: buffer to read/write into. this should be a 663 * virtual address in the guest os. 664 */ 665static int blkif_queue_request(struct bio *bp) 666{ 667 caddr_t alignbuf; 668 vm_paddr_t buffer_ma; 669 blkif_request_t *ring_req; 670 unsigned long id; 671 uint64_t fsect, lsect; 672 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 673 struct blkfront_info *info = sc->xb_info; 674 int ref; 675 676 if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED)) 677 return 1; 678 679 if (gnttab_alloc_grant_references( 680 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 681 gnttab_request_free_callback( 682 &info->callback, 683 blkif_restart_queue_callback, 684 info, 685 BLKIF_MAX_SEGMENTS_PER_REQUEST); 686 return 1; 687 } 688 689 /* Check if the buffer is properly aligned */ 690 if ((vm_offset_t)bp->bio_data & PAGE_MASK) { 691 int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE : 692 PAGE_SIZE; 693 caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF, 694 M_NOWAIT); 695 696 alignbuf = (char *)roundup2((u_long)newbuf, align); 697 698 /* save a copy of the current buffer */ 699 bp->bio_driver1 = newbuf; 700 bp->bio_driver2 = alignbuf; 701 702 /* Copy the data for a write */ 703 if (bp->bio_cmd == BIO_WRITE) 704 bcopy(bp->bio_data, alignbuf, bp->bio_bcount); 705 } else 706 alignbuf = bp->bio_data; 707 708 /* Fill out a communications ring structure. */ 709 ring_req = RING_GET_REQUEST(&info->ring, 710 info->ring.req_prod_pvt); 711 id = GET_ID_FROM_FREELIST(info); 712 info->shadow[id].request = (unsigned long)bp; 713 714 ring_req->id = id; 715 ring_req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : 716 BLKIF_OP_WRITE; 717 718 ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno; 719 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; 720 721 ring_req->nr_segments = 0; /* XXX not doing scatter/gather since buffer 722 * chaining is not supported. 723 */ 724 725 buffer_ma = vtomach(alignbuf); 726 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; 727 lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1; 728 /* install a grant reference. */ 729 ref = gnttab_claim_grant_reference(&gref_head); 730 KASSERT( ref != -ENOSPC, ("grant_reference failed") ); 731 732 gnttab_grant_foreign_access_ref( 733 ref, 734 info->xbdev->otherend_id, 735 buffer_ma >> PAGE_SHIFT, 736 ring_req->operation & 1 ); /* ??? */ 737 info->shadow[id].frame[ring_req->nr_segments] = 738 buffer_ma >> PAGE_SHIFT; 739 740 ring_req->seg[ring_req->nr_segments] = 741 (struct blkif_request_segment) { 742 .gref = ref, 743 .first_sect = fsect, 744 .last_sect = lsect }; 745 746 ring_req->nr_segments++; 747 KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0, 748 ("XEN buffer must be sector aligned")); 749 KASSERT(lsect <= 7, 750 ("XEN disk driver data cannot cross a page boundary")); 751 752 buffer_ma &= ~PAGE_MASK; 753 754 info->ring.req_prod_pvt++; 755 756 /* Keep a private copy so we can reissue requests when recovering. */ 757 info->shadow[id].req = *ring_req; 758 759 gnttab_free_grant_references(gref_head); 760 761 return 0; 762} 763 764 765 766/* 767 * Dequeue buffers and place them in the shared communication ring. 768 * Return when no more requests can be accepted or all buffers have 769 * been queued. 770 * 771 * Signal XEN once the ring has been filled out. 772 */ 773static void 774xb_startio(struct xb_softc *sc) 775{ 776 struct bio *bp; 777 int queued = 0; 778 struct blkfront_info *info = sc->xb_info; 779 DPRINTK(""); 780 781 mtx_assert(&blkif_io_lock, MA_OWNED); 782 783 while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) { 784 785 if (RING_FULL(&info->ring)) 786 goto wait; 787 788 if (blkif_queue_request(bp)) { 789 wait: 790 bioq_insert_head(&sc->xb_bioq, bp); 791 break; 792 } 793 queued++; 794 } 795 796 if (queued != 0) 797 flush_requests(sc->xb_info); 798} 799 800static void 801blkif_int(void *xsc) 802{ 803 struct xb_softc *sc = NULL; 804 struct bio *bp; 805 blkif_response_t *bret; 806 RING_IDX i, rp; 807 struct blkfront_info *info = xsc; 808 DPRINTK(""); 809 810 TRACE_ENTER; 811 812 mtx_lock(&blkif_io_lock); 813 814 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { 815 mtx_unlock(&blkif_io_lock); 816 return; 817 } 818 819 again: 820 rp = info->ring.sring->rsp_prod; 821 rmb(); /* Ensure we see queued responses up to 'rp'. */ 822 823 for (i = info->ring.rsp_cons; i != rp; i++) { 824 unsigned long id; 825 826 bret = RING_GET_RESPONSE(&info->ring, i); 827 id = bret->id; 828 bp = (struct bio *)info->shadow[id].request; 829 830 blkif_completion(&info->shadow[id]); 831 832 ADD_ID_TO_FREELIST(info, id); 833 834 switch (bret->operation) { 835 case BLKIF_OP_READ: 836 /* had an unaligned buffer that needs to be copied */ 837 if (bp->bio_driver1) 838 bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount); 839 /* FALLTHROUGH */ 840 case BLKIF_OP_WRITE: 841 842 /* free the copy buffer */ 843 if (bp->bio_driver1) { 844 free(bp->bio_driver1, M_DEVBUF); 845 bp->bio_driver1 = NULL; 846 } 847 848 if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) { 849 printf("Bad return from blkdev data request: %x\n", 850 bret->status); 851 bp->bio_flags |= BIO_ERROR; 852 } 853 854 sc = (struct xb_softc *)bp->bio_disk->d_drv1; 855 856 if (bp->bio_flags & BIO_ERROR) 857 bp->bio_error = EIO; 858 else 859 bp->bio_resid = 0; 860 861 biodone(bp); 862 break; 863 default: 864 panic("received invalid operation"); 865 break; 866 } 867 } 868 869 info->ring.rsp_cons = i; 870 871 if (i != info->ring.req_prod_pvt) { 872 int more_to_do; 873 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); 874 if (more_to_do) 875 goto again; 876 } else { 877 info->ring.sring->rsp_event = i + 1; 878 } 879 880 kick_pending_request_queues(info); 881 882 mtx_unlock(&blkif_io_lock); 883} 884 885static void 886blkif_free(struct blkfront_info *info, int suspend) 887{ 888 889/* Prevent new requests being issued until we fix things up. */ 890 mtx_lock(&blkif_io_lock); 891 info->connected = suspend ? 892 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 893 mtx_unlock(&blkif_io_lock); 894 895 /* Free resources associated with old device channel. */ 896 if (info->ring_ref != GRANT_INVALID_REF) { 897 gnttab_end_foreign_access(info->ring_ref, 898 info->ring.sring); 899 info->ring_ref = GRANT_INVALID_REF; 900 info->ring.sring = NULL; 901 } 902 if (info->irq) 903 unbind_from_irqhandler(info->irq, info); 904 info->irq = 0; 905 906} 907 908static void 909blkif_completion(struct blk_shadow *s) 910{ 911 int i; 912 913 for (i = 0; i < s->req.nr_segments; i++) 914 gnttab_end_foreign_access(s->req.seg[i].gref, 0UL); 915} 916 917static void 918blkif_recover(struct blkfront_info *info) 919{ 920 int i, j; 921 blkif_request_t *req; 922 struct blk_shadow *copy; 923 924 /* Stage 1: Make a safe copy of the shadow state. */ 925 copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO); 926 PANIC_IF(copy == NULL); 927 memcpy(copy, info->shadow, sizeof(info->shadow)); 928 929 /* Stage 2: Set up free list. */ 930 memset(&info->shadow, 0, sizeof(info->shadow)); 931 for (i = 0; i < BLK_RING_SIZE; i++) 932 info->shadow[i].req.id = i+1; 933 info->shadow_free = info->ring.req_prod_pvt; 934 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 935 936 /* Stage 3: Find pending requests and requeue them. */ 937 for (i = 0; i < BLK_RING_SIZE; i++) { 938 /* Not in use? */ 939 if (copy[i].request == 0) 940 continue; 941 942 /* Grab a request slot and copy shadow state into it. */ 943 req = RING_GET_REQUEST( 944 &info->ring, info->ring.req_prod_pvt); 945 *req = copy[i].req; 946 947 /* We get a new request id, and must reset the shadow state. */ 948 req->id = GET_ID_FROM_FREELIST(info); 949 memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); 950 951 /* Rewrite any grant references invalidated by suspend/resume. */ 952 for (j = 0; j < req->nr_segments; j++) 953 gnttab_grant_foreign_access_ref( 954 req->seg[j].gref, 955 info->xbdev->otherend_id, 956 pfn_to_mfn(info->shadow[req->id].frame[j]), 957 0 /* assume not readonly */); 958 959 info->shadow[req->id].req = *req; 960 961 info->ring.req_prod_pvt++; 962 } 963 964 free(copy, M_DEVBUF); 965 966 xenbus_switch_state(info->xbdev, XenbusStateConnected); 967 968 /* Now safe for us to use the shared ring */ 969 mtx_lock(&blkif_io_lock); 970 info->connected = BLKIF_STATE_CONNECTED; 971 mtx_unlock(&blkif_io_lock); 972 973 /* Send off requeued requests */ 974 mtx_lock(&blkif_io_lock); 975 flush_requests(info); 976 977 /* Kick any other new requests queued since we resumed */ 978 kick_pending_request_queues(info); 979 mtx_unlock(&blkif_io_lock); 980} 981 982static int 983blkfront_is_ready(struct xenbus_device *dev) 984{ 985 struct blkfront_info *info = dev->dev_driver_data; 986 987 return info->is_ready; 988} 989 990static struct xenbus_device_id blkfront_ids[] = { 991 { "vbd" }, 992 { "" } 993}; 994 995 996static struct xenbus_driver blkfront = { 997 .name = "vbd", 998 .ids = blkfront_ids, 999 .probe = blkfront_probe, 1000 .remove = blkfront_remove, 1001 .resume = blkfront_resume, 1002 .otherend_changed = backend_changed, 1003 .is_ready = blkfront_is_ready, 1004}; 1005 1006 1007 1008static void 1009xenbus_init(void) 1010{ 1011 xenbus_register_frontend(&blkfront); 1012} 1013 1014MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */ 1015SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_SECOND, xenbus_init, NULL); 1016 1017 1018/* 1019 * Local variables: 1020 * mode: C 1021 * c-set-style: "BSD" 1022 * c-basic-offset: 8 1023 * tab-width: 4 1024 * indent-tabs-mode: t 1025 * End: 1026 */ 1027