blkfront.c revision 183375
1139749Simp/*-
252417Sluoqi * All rights reserved.
352417Sluoqi *
452417Sluoqi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
552417Sluoqi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
652417Sluoqi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
752417Sluoqi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
852417Sluoqi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
952417Sluoqi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1052417Sluoqi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1152417Sluoqi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
1252417Sluoqi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
1352417Sluoqi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
1452417Sluoqi * SUCH DAMAGE.
1552417Sluoqi *
1652417Sluoqi */
1752417Sluoqi
1852417Sluoqi/*
1952417Sluoqi * XenoBSD block device driver
2052417Sluoqi */
2152417Sluoqi
2252417Sluoqi#include <sys/cdefs.h>
2352417Sluoqi__FBSDID("$FreeBSD: head/sys/dev/xen/blkfront/blkfront.c 183375 2008-09-26 05:29:39Z kmacy $");
2452417Sluoqi
2552417Sluoqi#include <sys/param.h>
2652417Sluoqi#include <sys/systm.h>
2752417Sluoqi#include <sys/malloc.h>
2852417Sluoqi#include <sys/kernel.h>
2952417Sluoqi#include <vm/vm.h>
3052417Sluoqi#include <vm/pmap.h>
3152417Sluoqi
3252417Sluoqi#include <sys/bio.h>
3352417Sluoqi#include <sys/bus.h>
3452417Sluoqi#include <sys/conf.h>
3552417Sluoqi#include <sys/module.h>
3652417Sluoqi
3752417Sluoqi#include <machine/bus.h>
3852417Sluoqi#include <sys/rman.h>
3952417Sluoqi#include <machine/resource.h>
4052417Sluoqi#include <machine/intr_machdep.h>
4152417Sluoqi#include <machine/vmparam.h>
4252417Sluoqi
4352417Sluoqi#include <machine/xen/hypervisor.h>
4452417Sluoqi#include <machine/xen/xen-os.h>
4552417Sluoqi#include <machine/xen/xen_intr.h>
4652417Sluoqi#include <machine/xen/xenbus.h>
4752417Sluoqi#include <machine/xen/evtchn.h>
4852417Sluoqi#include <xen/interface/grant_table.h>
4952417Sluoqi
5052417Sluoqi#include <geom/geom_disk.h>
5152417Sluoqi#include <machine/xen/xenfunc.h>
5252417Sluoqi#include <xen/gnttab.h>
5352417Sluoqi
5452417Sluoqi#include <dev/xen/blkfront/block.h>
5552417Sluoqi
5652417Sluoqi#define    ASSERT(S)       KASSERT(S, (#S))
5752417Sluoqi/* prototypes */
5852417Sluoqistruct xb_softc;
5952417Sluoqistatic void xb_startio(struct xb_softc *sc);
6052417Sluoqistatic void connect(struct blkfront_info *);
6152417Sluoqistatic void blkfront_closing(struct xenbus_device *);
6252417Sluoqistatic int blkfront_remove(struct xenbus_device *);
6352417Sluoqistatic int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
6452417Sluoqistatic int setup_blkring(struct xenbus_device *, struct blkfront_info *);
6552417Sluoqistatic void blkif_int(void *);
6652417Sluoqi#if 0
6752417Sluoqistatic void blkif_restart_queue(void *arg);
6852417Sluoqi#endif
6952417Sluoqistatic void blkif_recover(struct blkfront_info *);
7052417Sluoqistatic void blkif_completion(struct blk_shadow *);
7152417Sluoqistatic void blkif_free(struct blkfront_info *, int);
7252417Sluoqi
7352417Sluoqi#define GRANT_INVALID_REF 0
7452417Sluoqi#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
7592370Sluoqi
7692370SluoqiLIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
7792370Sluoqi
7852417Sluoqi/* Control whether runtime update of vbds is enabled. */
7952417Sluoqi#define ENABLE_VBD_UPDATE 0
8052417Sluoqi
8152417Sluoqi#if ENABLE_VBD_UPDATE
8252417Sluoqistatic void vbd_update(void);
8352417Sluoqi#endif
8452417Sluoqi
8552417Sluoqi
8652417Sluoqi#define BLKIF_STATE_DISCONNECTED 0
8752417Sluoqi#define BLKIF_STATE_CONNECTED    1
8852417Sluoqi#define BLKIF_STATE_SUSPENDED    2
8952417Sluoqi
9052417Sluoqi#ifdef notyet
9152417Sluoqistatic char *blkif_state_name[] = {
9252417Sluoqi	[BLKIF_STATE_DISCONNECTED] = "disconnected",
9352417Sluoqi	[BLKIF_STATE_CONNECTED]    = "connected",
9452417Sluoqi	[BLKIF_STATE_SUSPENDED]    = "closed",
9552417Sluoqi};
9652417Sluoqi
9752417Sluoqistatic char * blkif_status_name[] = {
9852417Sluoqi	[BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
9952417Sluoqi	[BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
10052417Sluoqi	[BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
10152417Sluoqi	[BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
10252417Sluoqi};
10352417Sluoqi#endif
10452417Sluoqi#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
10552417Sluoqi#if 0
10652417Sluoqi#define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, __LINE__,##args)
10752417Sluoqi#else
10852417Sluoqi#define DPRINTK(fmt, args...)
10952417Sluoqi#endif
11052417Sluoqi
11152417Sluoqistatic grant_ref_t gref_head;
11252417Sluoqi#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
11352417Sluoqi    (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
11452417Sluoqi
11552417Sluoqistatic void kick_pending_request_queues(struct blkfront_info *);
11652417Sluoqistatic int blkif_open(struct disk *dp);
11752417Sluoqistatic int blkif_close(struct disk *dp);
11852417Sluoqistatic int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
11952417Sluoqistatic int blkif_queue_request(struct bio *bp);
12052417Sluoqistatic void xb_strategy(struct bio *bp);
12152417Sluoqi
12252417Sluoqi
12352417Sluoqi
12452417Sluoqi/* XXX move to xb_vbd.c when VBD update support is added */
12552417Sluoqi#define MAX_VBDS 64
12652417Sluoqi
12752417Sluoqi#define XBD_SECTOR_SIZE		512	/* XXX: assume for now */
12852417Sluoqi#define XBD_SECTOR_SHFT		9
12952417Sluoqi
13052417Sluoqistatic struct mtx blkif_io_lock;
13152417Sluoqi
13252417Sluoqistatic vm_paddr_t
13352417Sluoqipfn_to_mfn(vm_paddr_t pfn)
13452417Sluoqi{
13552417Sluoqi	return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
13652417Sluoqi}
13752417Sluoqi
13852417Sluoqi
13952417Sluoqiint
14052417Sluoqixlvbd_add(blkif_sector_t capacity, int unit, uint16_t vdisk_info, uint16_t sector_size,
14152417Sluoqi	  struct blkfront_info *info)
14252417Sluoqi{
14352417Sluoqi	struct xb_softc	*sc;
14452417Sluoqi	int			error = 0;
14552417Sluoqi	int unitno = unit - 767;
14652417Sluoqi
14752417Sluoqi	sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
14852417Sluoqi	sc->xb_unit = unitno;
14952417Sluoqi	sc->xb_info = info;
15052417Sluoqi	info->sc = sc;
15152417Sluoqi
15252417Sluoqi	memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
15352417Sluoqi	sc->xb_disk = disk_alloc();
15452417Sluoqi	sc->xb_disk->d_unit = unitno;
15552417Sluoqi	sc->xb_disk->d_open = blkif_open;
15652417Sluoqi	sc->xb_disk->d_close = blkif_close;
15752417Sluoqi	sc->xb_disk->d_ioctl = blkif_ioctl;
15852417Sluoqi	sc->xb_disk->d_strategy = xb_strategy;
15952417Sluoqi	sc->xb_disk->d_name = "xbd";
16052417Sluoqi	sc->xb_disk->d_drv1 = sc;
16152417Sluoqi	sc->xb_disk->d_sectorsize = sector_size;
16252417Sluoqi
16352417Sluoqi	/* XXX */
16452417Sluoqi	sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
16552417Sluoqi#if 0
16652417Sluoqi	sc->xb_disk->d_maxsize = DFLTPHYS;
16752417Sluoqi#else /* XXX: xen can't handle large single i/o requests */
16852417Sluoqi	sc->xb_disk->d_maxsize = 4096;
16952417Sluoqi#endif
17052417Sluoqi#ifdef notyet
17152417Sluoqi	XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
17252417Sluoqi		  xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
17352417Sluoqi		  sc->xb_disk->d_mediasize);
17452417Sluoqi#endif
17552417Sluoqi	sc->xb_disk->d_flags = 0;
17652417Sluoqi	disk_create(sc->xb_disk, DISK_VERSION_00);
17752417Sluoqi	bioq_init(&sc->xb_bioq);
17852417Sluoqi
17952417Sluoqi	return error;
18052417Sluoqi}
18152417Sluoqi
18252417Sluoqivoid
18352417Sluoqixlvbd_del(struct blkfront_info *info)
18452417Sluoqi{
18552417Sluoqi	struct xb_softc	*sc;
18652417Sluoqi
18752417Sluoqi	sc = info->sc;
18852417Sluoqi	disk_destroy(sc->xb_disk);
18952417Sluoqi}
19052417Sluoqi/************************ end VBD support *****************/
19152417Sluoqi
19252417Sluoqi/*
19352417Sluoqi * Read/write routine for a buffer.  Finds the proper unit, place it on
19452417Sluoqi * the sortq and kick the controller.
19552417Sluoqi */
19652417Sluoqistatic void
19752417Sluoqixb_strategy(struct bio *bp)
19852417Sluoqi{
19952417Sluoqi	struct xb_softc	*sc = (struct xb_softc *)bp->bio_disk->d_drv1;
20052417Sluoqi
20152417Sluoqi	/* bogus disk? */
20252417Sluoqi	if (sc == NULL) {
20352417Sluoqi		bp->bio_error = EINVAL;
20452417Sluoqi		bp->bio_flags |= BIO_ERROR;
20552417Sluoqi		goto bad;
20652417Sluoqi	}
20752417Sluoqi
20852417Sluoqi	DPRINTK("");
20952417Sluoqi
21052417Sluoqi	/*
21152417Sluoqi	 * Place it in the queue of disk activities for this disk
21252417Sluoqi	 */
21352417Sluoqi	mtx_lock(&blkif_io_lock);
21452417Sluoqi	bioq_disksort(&sc->xb_bioq, bp);
21552417Sluoqi
21652417Sluoqi	xb_startio(sc);
21752417Sluoqi	mtx_unlock(&blkif_io_lock);
21852417Sluoqi	return;
21952417Sluoqi
22052417Sluoqi bad:
22152417Sluoqi	/*
22252417Sluoqi	 * Correctly set the bio to indicate a failed tranfer.
22352417Sluoqi	 */
22452417Sluoqi	bp->bio_resid = bp->bio_bcount;
22552417Sluoqi	biodone(bp);
22652417Sluoqi	return;
22752417Sluoqi}
22852417Sluoqi
22952417Sluoqi
23052417Sluoqi/* Setup supplies the backend dir, virtual device.
23152417Sluoqi
23252417SluoqiWe place an event channel and shared frame entries.
23352417SluoqiWe watch backend to wait if it's ok. */
23452417Sluoqistatic int blkfront_probe(struct xenbus_device *dev,
23552417Sluoqi			  const struct xenbus_device_id *id)
23652417Sluoqi{
23752417Sluoqi	int err, vdevice, i;
23852417Sluoqi	struct blkfront_info *info;
23952417Sluoqi
24052417Sluoqi	/* FIXME: Use dynamic device id if this is not set. */
24152417Sluoqi	err = xenbus_scanf(XBT_NIL, dev->nodename,
24252417Sluoqi			   "virtual-device", "%i", &vdevice);
24352417Sluoqi	if (err != 1) {
24452417Sluoqi		xenbus_dev_fatal(dev, err, "reading virtual-device");
24552417Sluoqi		printf("couldn't find virtual device");
24652417Sluoqi		return (err);
24752417Sluoqi	}
24852417Sluoqi
24952417Sluoqi	info = malloc(sizeof(*info), M_DEVBUF, M_NOWAIT|M_ZERO);
25052417Sluoqi	if (info == NULL) {
25152417Sluoqi		xenbus_dev_fatal(dev, ENOMEM, "allocating info structure");
25252417Sluoqi		return ENOMEM;
25352417Sluoqi	}
25452417Sluoqi
25552417Sluoqi	/*
25652417Sluoqi	 * XXX debug only
25752417Sluoqi	 */
25852417Sluoqi	for (i = 0; i < sizeof(*info); i++)
25952417Sluoqi			if (((uint8_t *)info)[i] != 0)
26052417Sluoqi					panic("non-null memory");
26152417Sluoqi
26252417Sluoqi	info->shadow_free = 0;
26352417Sluoqi	info->xbdev = dev;
26452417Sluoqi	info->vdevice = vdevice;
26552417Sluoqi	info->connected = BLKIF_STATE_DISCONNECTED;
26652417Sluoqi
26752417Sluoqi	/* work queue needed ? */
26852417Sluoqi	for (i = 0; i < BLK_RING_SIZE; i++)
26952417Sluoqi		info->shadow[i].req.id = i+1;
27052417Sluoqi	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
27152417Sluoqi
27252417Sluoqi	/* Front end dir is a number, which is used as the id. */
27352417Sluoqi	info->handle = strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
27452417Sluoqi	dev->dev_driver_data = info;
27552417Sluoqi
27652417Sluoqi	err = talk_to_backend(dev, info);
27752417Sluoqi	if (err) {
27852417Sluoqi		free(info, M_DEVBUF);
27952417Sluoqi		dev->dev_driver_data = NULL;
28052417Sluoqi		return err;
28152417Sluoqi	}
28252417Sluoqi
28352417Sluoqi	return 0;
28452417Sluoqi}
28552417Sluoqi
28652417Sluoqi
28752417Sluoqistatic int blkfront_resume(struct xenbus_device *dev)
28852417Sluoqi{
28952417Sluoqi	struct blkfront_info *info = dev->dev_driver_data;
29052417Sluoqi	int err;
29152417Sluoqi
29252417Sluoqi	DPRINTK("blkfront_resume: %s\n", dev->nodename);
29352417Sluoqi
29452417Sluoqi	blkif_free(info, 1);
29552417Sluoqi
29652417Sluoqi	err = talk_to_backend(dev, info);
29752417Sluoqi	if (!err)
29852417Sluoqi		blkif_recover(info);
29952417Sluoqi
30052417Sluoqi	return err;
30152417Sluoqi}
30252417Sluoqi
30352417Sluoqi/* Common code used when first setting up, and when resuming. */
30452417Sluoqistatic int talk_to_backend(struct xenbus_device *dev,
30552417Sluoqi			   struct blkfront_info *info)
30652417Sluoqi{
30752417Sluoqi	const char *message = NULL;
30852417Sluoqi	struct xenbus_transaction xbt;
30952417Sluoqi	int err;
31052417Sluoqi
31152417Sluoqi	/* Create shared ring, alloc event channel. */
31252417Sluoqi	err = setup_blkring(dev, info);
31352417Sluoqi	if (err)
31452417Sluoqi		goto out;
31552417Sluoqi
31652417Sluoqi again:
31752417Sluoqi	err = xenbus_transaction_start(&xbt);
31852417Sluoqi	if (err) {
31952417Sluoqi		xenbus_dev_fatal(dev, err, "starting transaction");
32052417Sluoqi		goto destroy_blkring;
32152417Sluoqi	}
32252417Sluoqi
32352417Sluoqi	err = xenbus_printf(xbt, dev->nodename,
32452417Sluoqi			    "ring-ref","%u", info->ring_ref);
32574370Sken	if (err) {
32652417Sluoqi		message = "writing ring-ref";
32752417Sluoqi		goto abort_transaction;
32874370Sken	}
32952417Sluoqi	err = xenbus_printf(xbt, dev->nodename,
33052417Sluoqi		"event-channel", "%u", irq_to_evtchn_port(info->irq));
33152417Sluoqi	if (err) {
33252417Sluoqi		message = "writing event-channel";
33352417Sluoqi		goto abort_transaction;
33452417Sluoqi	}
33552417Sluoqi
336	err = xenbus_transaction_end(xbt, 0);
337	if (err) {
338		if (err == -EAGAIN)
339			goto again;
340		xenbus_dev_fatal(dev, err, "completing transaction");
341		goto destroy_blkring;
342	}
343	xenbus_switch_state(dev, XenbusStateInitialised);
344
345	return 0;
346
347 abort_transaction:
348	xenbus_transaction_end(xbt, 1);
349	if (message)
350		xenbus_dev_fatal(dev, err, "%s", message);
351 destroy_blkring:
352	blkif_free(info, 0);
353 out:
354	return err;
355}
356
357static int
358setup_blkring(struct xenbus_device *dev, struct blkfront_info *info)
359{
360	blkif_sring_t *sring;
361	int err;
362
363	info->ring_ref = GRANT_INVALID_REF;
364
365	sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
366	if (sring == NULL) {
367		xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring");
368		return ENOMEM;
369	}
370	SHARED_RING_INIT(sring);
371	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
372
373	err = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> PAGE_SHIFT));
374	if (err < 0) {
375		free(sring, M_DEVBUF);
376		info->ring.sring = NULL;
377		goto fail;
378	}
379	info->ring_ref = err;
380
381	err = bind_listening_port_to_irqhandler(dev->otherend_id,
382		"xbd", (driver_intr_t *)blkif_int, info,
383					INTR_TYPE_BIO | INTR_MPSAFE, NULL);
384	if (err <= 0) {
385		xenbus_dev_fatal(dev, err,
386				 "bind_evtchn_to_irqhandler failed");
387		goto fail;
388	}
389	info->irq = err;
390
391	return 0;
392 fail:
393	blkif_free(info, 0);
394	return err;
395}
396
397
398/**
399 * Callback received when the backend's state changes.
400 */
401static void backend_changed(struct xenbus_device *dev,
402			    XenbusState backend_state)
403{
404	struct blkfront_info *info = dev->dev_driver_data;
405
406	DPRINTK("blkfront:backend_changed.\n");
407
408	switch (backend_state) {
409	case XenbusStateUnknown:
410	case XenbusStateInitialising:
411	case XenbusStateInitWait:
412	case XenbusStateInitialised:
413	case XenbusStateClosed:
414	case XenbusStateReconfigured:
415	case XenbusStateReconfiguring:
416		break;
417
418	case XenbusStateConnected:
419		connect(info);
420		break;
421
422	case XenbusStateClosing:
423		if (info->users > 0)
424			xenbus_dev_error(dev, -EBUSY,
425					 "Device in use; refusing to close");
426		else
427			blkfront_closing(dev);
428#ifdef notyet
429		bd = bdget(info->dev);
430		if (bd == NULL)
431			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
432
433		down(&bd->bd_sem);
434		if (info->users > 0)
435			xenbus_dev_error(dev, -EBUSY,
436					 "Device in use; refusing to close");
437		else
438			blkfront_closing(dev);
439		up(&bd->bd_sem);
440		bdput(bd);
441#endif
442	}
443}
444
445/*
446** Invoked when the backend is finally 'ready' (and has told produced
447** the details about the physical device - #sectors, size, etc).
448*/
449static void
450connect(struct blkfront_info *info)
451{
452	unsigned long sectors, sector_size;
453	unsigned int binfo;
454	int err;
455
456        if( (info->connected == BLKIF_STATE_CONNECTED) ||
457	    (info->connected == BLKIF_STATE_SUSPENDED) )
458		return;
459
460	DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
461
462	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
463			    "sectors", "%lu", &sectors,
464			    "info", "%u", &binfo,
465			    "sector-size", "%lu", &sector_size,
466			    NULL);
467	if (err) {
468		xenbus_dev_fatal(info->xbdev, err,
469				 "reading backend fields at %s",
470				 info->xbdev->otherend);
471		return;
472	}
473	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
474			    "feature-barrier", "%lu", &info->feature_barrier,
475			    NULL);
476	if (err)
477		info->feature_barrier = 0;
478
479	xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
480
481	(void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
482
483	/* Kick pending requests. */
484	mtx_lock(&blkif_io_lock);
485	info->connected = BLKIF_STATE_CONNECTED;
486	kick_pending_request_queues(info);
487	mtx_unlock(&blkif_io_lock);
488	info->is_ready = 1;
489
490#if 0
491	add_disk(info->gd);
492#endif
493}
494
495/**
496 * Handle the change of state of the backend to Closing.  We must delete our
497 * device-layer structures now, to ensure that writes are flushed through to
498 * the backend.  Once is this done, we can switch to Closed in
499 * acknowledgement.
500 */
501static void blkfront_closing(struct xenbus_device *dev)
502{
503	struct blkfront_info *info = dev->dev_driver_data;
504
505	DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
506
507	if (info->mi) {
508		DPRINTK("Calling xlvbd_del\n");
509		xlvbd_del(info);
510		info->mi = NULL;
511	}
512
513	xenbus_switch_state(dev, XenbusStateClosed);
514}
515
516
517static int blkfront_remove(struct xenbus_device *dev)
518{
519	struct blkfront_info *info = dev->dev_driver_data;
520
521	DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
522
523	blkif_free(info, 0);
524
525	free(info, M_DEVBUF);
526
527	return 0;
528}
529
530
531static inline int
532GET_ID_FROM_FREELIST(struct blkfront_info *info)
533{
534	unsigned long nfree = info->shadow_free;
535
536	KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree));
537	info->shadow_free = info->shadow[nfree].req.id;
538	info->shadow[nfree].req.id = 0x0fffffee; /* debug */
539	return nfree;
540}
541
542static inline void
543ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id)
544{
545	info->shadow[id].req.id  = info->shadow_free;
546	info->shadow[id].request = 0;
547	info->shadow_free = id;
548}
549
550static inline void
551flush_requests(struct blkfront_info *info)
552{
553	int notify;
554
555	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
556
557	if (notify)
558		notify_remote_via_irq(info->irq);
559}
560
561static void
562kick_pending_request_queues(struct blkfront_info *info)
563{
564	/* XXX check if we can't simplify */
565#if 0
566	if (!RING_FULL(&info->ring)) {
567		/* Re-enable calldowns. */
568		blk_start_queue(info->rq);
569		/* Kick things off immediately. */
570		do_blkif_request(info->rq);
571	}
572#endif
573	if (!RING_FULL(&info->ring)) {
574#if 0
575		sc = LIST_FIRST(&xbsl_head);
576		LIST_REMOVE(sc, entry);
577		/* Re-enable calldowns. */
578		blk_start_queue(di->rq);
579#endif
580		/* Kick things off immediately. */
581		xb_startio(info->sc);
582	}
583}
584
585#if 0
586/* XXX */
587static void blkif_restart_queue(void *arg)
588{
589	struct blkfront_info *info = (struct blkfront_info *)arg;
590
591	mtx_lock(&blkif_io_lock);
592	kick_pending_request_queues(info);
593	mtx_unlock(&blkif_io_lock);
594}
595#endif
596
597static void blkif_restart_queue_callback(void *arg)
598{
599#if 0
600	struct blkfront_info *info = (struct blkfront_info *)arg;
601	/* XXX BSD equiv ? */
602
603	schedule_work(&info->work);
604#endif
605}
606
607static int
608blkif_open(struct disk *dp)
609{
610	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
611
612	if (sc == NULL) {
613		printk("xb%d: not found", sc->xb_unit);
614		return (ENXIO);
615	}
616
617	sc->xb_flags |= XB_OPEN;
618	sc->xb_info->users++;
619	return (0);
620}
621
622static int
623blkif_close(struct disk *dp)
624{
625	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
626
627	if (sc == NULL)
628		return (ENXIO);
629	sc->xb_flags &= ~XB_OPEN;
630	if (--(sc->xb_info->users) == 0) {
631		/* Check whether we have been instructed to close.  We will
632		   have ignored this request initially, as the device was
633		   still mounted. */
634		struct xenbus_device * dev = sc->xb_info->xbdev;
635		XenbusState state = xenbus_read_driver_state(dev->otherend);
636
637		if (state == XenbusStateClosing)
638			blkfront_closing(dev);
639	}
640	return (0);
641}
642
643static int
644blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
645{
646	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
647
648	if (sc == NULL)
649		return (ENXIO);
650
651	return (ENOTTY);
652}
653
654
655/*
656 * blkif_queue_request
657 *
658 * request block io
659 *
660 * id: for guest use only.
661 * operation: BLKIF_OP_{READ,WRITE,PROBE}
662 * buffer: buffer to read/write into. this should be a
663 *   virtual address in the guest os.
664 */
665static int blkif_queue_request(struct bio *bp)
666{
667	caddr_t alignbuf;
668	vm_paddr_t buffer_ma;
669	blkif_request_t     *ring_req;
670	unsigned long id;
671	uint64_t fsect, lsect;
672	struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
673	struct blkfront_info *info = sc->xb_info;
674	int ref;
675
676	if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED))
677		return 1;
678
679	if (gnttab_alloc_grant_references(
680		    BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
681		gnttab_request_free_callback(
682			&info->callback,
683			blkif_restart_queue_callback,
684			info,
685			BLKIF_MAX_SEGMENTS_PER_REQUEST);
686		return 1;
687	}
688
689	/* Check if the buffer is properly aligned */
690	if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
691		int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE :
692			PAGE_SIZE;
693		caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF,
694					M_NOWAIT);
695
696		alignbuf = (char *)roundup2((u_long)newbuf, align);
697
698		/* save a copy of the current buffer */
699		bp->bio_driver1 = newbuf;
700		bp->bio_driver2 = alignbuf;
701
702		/* Copy the data for a write */
703		if (bp->bio_cmd == BIO_WRITE)
704			bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
705	} else
706		alignbuf = bp->bio_data;
707
708	/* Fill out a communications ring structure. */
709	ring_req 	         = RING_GET_REQUEST(&info->ring,
710						    info->ring.req_prod_pvt);
711	id		         = GET_ID_FROM_FREELIST(info);
712	info->shadow[id].request = (unsigned long)bp;
713
714	ring_req->id 	         = id;
715	ring_req->operation 	 = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
716		BLKIF_OP_WRITE;
717
718	ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno;
719	ring_req->handle 	  = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
720
721	ring_req->nr_segments  = 0;	/* XXX not doing scatter/gather since buffer
722					 * chaining is not supported.
723					 */
724
725	buffer_ma = vtomach(alignbuf);
726	fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
727	lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
728	/* install a grant reference. */
729	ref = gnttab_claim_grant_reference(&gref_head);
730	KASSERT( ref != -ENOSPC, ("grant_reference failed") );
731
732	gnttab_grant_foreign_access_ref(
733		ref,
734		info->xbdev->otherend_id,
735		buffer_ma >> PAGE_SHIFT,
736		ring_req->operation & 1 ); /* ??? */
737	info->shadow[id].frame[ring_req->nr_segments] =
738		buffer_ma >> PAGE_SHIFT;
739
740	ring_req->seg[ring_req->nr_segments] =
741		(struct blkif_request_segment) {
742			.gref       = ref,
743			.first_sect = fsect,
744			.last_sect  = lsect };
745
746	ring_req->nr_segments++;
747	KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
748		("XEN buffer must be sector aligned"));
749	KASSERT(lsect <= 7,
750		("XEN disk driver data cannot cross a page boundary"));
751
752	buffer_ma &= ~PAGE_MASK;
753
754	info->ring.req_prod_pvt++;
755
756	/* Keep a private copy so we can reissue requests when recovering. */
757	info->shadow[id].req = *ring_req;
758
759	gnttab_free_grant_references(gref_head);
760
761	return 0;
762}
763
764
765
766/*
767 * Dequeue buffers and place them in the shared communication ring.
768 * Return when no more requests can be accepted or all buffers have
769 * been queued.
770 *
771 * Signal XEN once the ring has been filled out.
772 */
773static void
774xb_startio(struct xb_softc *sc)
775{
776	struct bio		*bp;
777	int			queued = 0;
778	struct blkfront_info *info = sc->xb_info;
779	DPRINTK("");
780
781	mtx_assert(&blkif_io_lock, MA_OWNED);
782
783	while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) {
784
785		if (RING_FULL(&info->ring))
786			goto wait;
787
788		if (blkif_queue_request(bp)) {
789		wait:
790			bioq_insert_head(&sc->xb_bioq, bp);
791			break;
792		}
793		queued++;
794	}
795
796	if (queued != 0)
797		flush_requests(sc->xb_info);
798}
799
800static void
801blkif_int(void *xsc)
802{
803	struct xb_softc *sc = NULL;
804	struct bio *bp;
805	blkif_response_t *bret;
806	RING_IDX i, rp;
807	struct blkfront_info *info = xsc;
808	DPRINTK("");
809
810	TRACE_ENTER;
811
812	mtx_lock(&blkif_io_lock);
813
814	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
815		mtx_unlock(&blkif_io_lock);
816		return;
817	}
818
819 again:
820	rp = info->ring.sring->rsp_prod;
821	rmb(); /* Ensure we see queued responses up to 'rp'. */
822
823	for (i = info->ring.rsp_cons; i != rp; i++) {
824		unsigned long id;
825
826		bret = RING_GET_RESPONSE(&info->ring, i);
827		id   = bret->id;
828		bp   = (struct bio *)info->shadow[id].request;
829
830		blkif_completion(&info->shadow[id]);
831
832		ADD_ID_TO_FREELIST(info, id);
833
834		switch (bret->operation) {
835		case BLKIF_OP_READ:
836			/* had an unaligned buffer that needs to be copied */
837			if (bp->bio_driver1)
838				bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount);
839			/* FALLTHROUGH */
840		case BLKIF_OP_WRITE:
841
842			/* free the copy buffer */
843			if (bp->bio_driver1) {
844				free(bp->bio_driver1, M_DEVBUF);
845				bp->bio_driver1 = NULL;
846			}
847
848			if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
849					printf("Bad return from blkdev data request: %x\n",
850					  bret->status);
851				bp->bio_flags |= BIO_ERROR;
852			}
853
854			sc = (struct xb_softc *)bp->bio_disk->d_drv1;
855
856			if (bp->bio_flags & BIO_ERROR)
857				bp->bio_error = EIO;
858			else
859				bp->bio_resid = 0;
860
861			biodone(bp);
862			break;
863		default:
864			panic("received invalid operation");
865			break;
866		}
867	}
868
869	info->ring.rsp_cons = i;
870
871	if (i != info->ring.req_prod_pvt) {
872		int more_to_do;
873		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
874		if (more_to_do)
875			goto again;
876	} else {
877		info->ring.sring->rsp_event = i + 1;
878	}
879
880	kick_pending_request_queues(info);
881
882	mtx_unlock(&blkif_io_lock);
883}
884
885static void
886blkif_free(struct blkfront_info *info, int suspend)
887{
888
889/* Prevent new requests being issued until we fix things up. */
890	mtx_lock(&blkif_io_lock);
891	info->connected = suspend ?
892		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
893	mtx_unlock(&blkif_io_lock);
894
895	/* Free resources associated with old device channel. */
896	if (info->ring_ref != GRANT_INVALID_REF) {
897		gnttab_end_foreign_access(info->ring_ref,
898					  info->ring.sring);
899		info->ring_ref = GRANT_INVALID_REF;
900		info->ring.sring = NULL;
901	}
902	if (info->irq)
903		unbind_from_irqhandler(info->irq, info);
904	info->irq = 0;
905
906}
907
908static void
909blkif_completion(struct blk_shadow *s)
910{
911	int i;
912
913	for (i = 0; i < s->req.nr_segments; i++)
914		gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
915}
916
917static void
918blkif_recover(struct blkfront_info *info)
919{
920	int i, j;
921	blkif_request_t *req;
922	struct blk_shadow *copy;
923
924	/* Stage 1: Make a safe copy of the shadow state. */
925	copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
926	PANIC_IF(copy == NULL);
927	memcpy(copy, info->shadow, sizeof(info->shadow));
928
929	/* Stage 2: Set up free list. */
930	memset(&info->shadow, 0, sizeof(info->shadow));
931	for (i = 0; i < BLK_RING_SIZE; i++)
932		info->shadow[i].req.id = i+1;
933	info->shadow_free = info->ring.req_prod_pvt;
934	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
935
936	/* Stage 3: Find pending requests and requeue them. */
937	for (i = 0; i < BLK_RING_SIZE; i++) {
938		/* Not in use? */
939		if (copy[i].request == 0)
940			continue;
941
942		/* Grab a request slot and copy shadow state into it. */
943		req = RING_GET_REQUEST(
944			&info->ring, info->ring.req_prod_pvt);
945		*req = copy[i].req;
946
947		/* We get a new request id, and must reset the shadow state. */
948		req->id = GET_ID_FROM_FREELIST(info);
949		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
950
951		/* Rewrite any grant references invalidated by suspend/resume. */
952		for (j = 0; j < req->nr_segments; j++)
953			gnttab_grant_foreign_access_ref(
954				req->seg[j].gref,
955				info->xbdev->otherend_id,
956				pfn_to_mfn(info->shadow[req->id].frame[j]),
957				0 /* assume not readonly */);
958
959		info->shadow[req->id].req = *req;
960
961		info->ring.req_prod_pvt++;
962	}
963
964	free(copy, M_DEVBUF);
965
966	xenbus_switch_state(info->xbdev, XenbusStateConnected);
967
968	/* Now safe for us to use the shared ring */
969	mtx_lock(&blkif_io_lock);
970	info->connected = BLKIF_STATE_CONNECTED;
971	mtx_unlock(&blkif_io_lock);
972
973	/* Send off requeued requests */
974	mtx_lock(&blkif_io_lock);
975	flush_requests(info);
976
977	/* Kick any other new requests queued since we resumed */
978	kick_pending_request_queues(info);
979	mtx_unlock(&blkif_io_lock);
980}
981
982static int
983blkfront_is_ready(struct xenbus_device *dev)
984{
985	struct blkfront_info *info = dev->dev_driver_data;
986
987	return info->is_ready;
988}
989
990static struct xenbus_device_id blkfront_ids[] = {
991	{ "vbd" },
992	{ "" }
993};
994
995
996static struct xenbus_driver blkfront = {
997	.name             = "vbd",
998	.ids              = blkfront_ids,
999	.probe            = blkfront_probe,
1000	.remove           = blkfront_remove,
1001	.resume           = blkfront_resume,
1002	.otherend_changed = backend_changed,
1003	.is_ready		  = blkfront_is_ready,
1004};
1005
1006
1007
1008static void
1009xenbus_init(void)
1010{
1011	xenbus_register_frontend(&blkfront);
1012}
1013
1014MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */
1015SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_SECOND, xenbus_init, NULL);
1016
1017
1018/*
1019 * Local variables:
1020 * mode: C
1021 * c-set-style: "BSD"
1022 * c-basic-offset: 8
1023 * tab-width: 4
1024 * indent-tabs-mode: t
1025 * End:
1026 */
1027