blkfront.c revision 189699
1/*-
2 * All rights reserved.
3 *
4 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
5 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
6 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
7 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
8 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
10 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
11 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
12 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
13 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
14 * SUCH DAMAGE.
15 *
16 */
17
18/*
19 * XenoBSD block device driver
20 */
21
22#include <sys/cdefs.h>
23__FBSDID("$FreeBSD: head/sys/dev/xen/blkfront/blkfront.c 189699 2009-03-11 15:30:12Z dfr $");
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/malloc.h>
28#include <sys/kernel.h>
29#include <vm/vm.h>
30#include <vm/pmap.h>
31
32#include <sys/bio.h>
33#include <sys/bus.h>
34#include <sys/conf.h>
35#include <sys/module.h>
36
37#include <machine/bus.h>
38#include <sys/rman.h>
39#include <machine/resource.h>
40#include <machine/intr_machdep.h>
41#include <machine/vmparam.h>
42
43#include <machine/xen/xen-os.h>
44#include <machine/xen/xenfunc.h>
45#include <xen/hypervisor.h>
46#include <xen/xen_intr.h>
47#include <xen/evtchn.h>
48#include <xen/gnttab.h>
49#include <xen/interface/grant_table.h>
50#include <xen/interface/io/protocols.h>
51#include <xen/xenbus/xenbusvar.h>
52
53#include <geom/geom_disk.h>
54
55#include <dev/xen/blkfront/block.h>
56
57#include "xenbus_if.h"
58
59#define    ASSERT(S)       KASSERT(S, (#S))
60/* prototypes */
61struct xb_softc;
62static void xb_startio(struct xb_softc *sc);
63static void connect(device_t, struct blkfront_info *);
64static void blkfront_closing(device_t);
65static int blkfront_detach(device_t);
66static int talk_to_backend(device_t, struct blkfront_info *);
67static int setup_blkring(device_t, struct blkfront_info *);
68static void blkif_int(void *);
69#if 0
70static void blkif_restart_queue(void *arg);
71#endif
72static void blkif_recover(struct blkfront_info *);
73static void blkif_completion(struct blk_shadow *);
74static void blkif_free(struct blkfront_info *, int);
75
76#define GRANT_INVALID_REF 0
77#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
78
79LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
80
81/* Control whether runtime update of vbds is enabled. */
82#define ENABLE_VBD_UPDATE 0
83
84#if ENABLE_VBD_UPDATE
85static void vbd_update(void);
86#endif
87
88
89#define BLKIF_STATE_DISCONNECTED 0
90#define BLKIF_STATE_CONNECTED    1
91#define BLKIF_STATE_SUSPENDED    2
92
93#ifdef notyet
94static char *blkif_state_name[] = {
95	[BLKIF_STATE_DISCONNECTED] = "disconnected",
96	[BLKIF_STATE_CONNECTED]    = "connected",
97	[BLKIF_STATE_SUSPENDED]    = "closed",
98};
99
100static char * blkif_status_name[] = {
101	[BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
102	[BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
103	[BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
104	[BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
105};
106#endif
107#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
108#if 0
109#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
110#else
111#define DPRINTK(fmt, args...)
112#endif
113
114static grant_ref_t gref_head;
115#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
116    (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
117
118static void kick_pending_request_queues(struct blkfront_info *);
119static int blkif_open(struct disk *dp);
120static int blkif_close(struct disk *dp);
121static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
122static int blkif_queue_request(struct bio *bp);
123static void xb_strategy(struct bio *bp);
124
125
126
127/* XXX move to xb_vbd.c when VBD update support is added */
128#define MAX_VBDS 64
129
130#define XBD_SECTOR_SIZE		512	/* XXX: assume for now */
131#define XBD_SECTOR_SHFT		9
132
133static struct mtx blkif_io_lock;
134
135static vm_paddr_t
136pfn_to_mfn(vm_paddr_t pfn)
137{
138	return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
139}
140
141/*
142 * Translate Linux major/minor to an appropriate name and unit
143 * number. For HVM guests, this allows us to use the same drive names
144 * with blkfront as the emulated drives, easing transition slightly.
145 */
146static void
147blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name)
148{
149	static struct vdev_info {
150		int major;
151		int shift;
152		int base;
153		const char *name;
154	} info[] = {
155		{3,	6,	0,	"ad"},	/* ide0 */
156		{22,	6,	2,	"ad"},	/* ide1 */
157		{33,	6,	4,	"ad"},	/* ide2 */
158		{34,	6,	6,	"ad"},	/* ide3 */
159		{56,	6,	8,	"ad"},	/* ide4 */
160		{57,	6,	10,	"ad"},	/* ide5 */
161		{88,	6,	12,	"ad"},	/* ide6 */
162		{89,	6,	14,	"ad"},	/* ide7 */
163		{90,	6,	16,	"ad"},	/* ide8 */
164		{91,	6,	18,	"ad"},	/* ide9 */
165
166		{8,	4,	0,	"da"},	/* scsi disk0 */
167		{65,	4,	16,	"da"},	/* scsi disk1 */
168		{66,	4,	32,	"da"},	/* scsi disk2 */
169		{67,	4,	48,	"da"},	/* scsi disk3 */
170		{68,	4,	64,	"da"},	/* scsi disk4 */
171		{69,	4,	80,	"da"},	/* scsi disk5 */
172		{70,	4,	96,	"da"},	/* scsi disk6 */
173		{71,	4,	112,	"da"},	/* scsi disk7 */
174		{128,	4,	128,	"da"},	/* scsi disk8 */
175		{129,	4,	144,	"da"},	/* scsi disk9 */
176		{130,	4,	160,	"da"},	/* scsi disk10 */
177		{131,	4,	176,	"da"},	/* scsi disk11 */
178		{132,	4,	192,	"da"},	/* scsi disk12 */
179		{133,	4,	208,	"da"},	/* scsi disk13 */
180		{134,	4,	224,	"da"},	/* scsi disk14 */
181		{135,	4,	240,	"da"},	/* scsi disk15 */
182
183		{202,	4,	0,	"xbd"},	/* xbd */
184
185		{0,	0,	0,	NULL},
186	};
187	int major = vdevice >> 8;
188	int minor = vdevice & 0xff;
189	int i;
190
191	if (vdevice & (1 << 28)) {
192		*unit = (vdevice & ((1 << 28) - 1)) >> 8;
193		*name = "xbd";
194	}
195
196	for (i = 0; info[i].major; i++) {
197		if (info[i].major == major) {
198			*unit = info[i].base + (minor >> info[i].shift);
199			*name = info[i].name;
200			return;
201		}
202	}
203
204	*unit = minor >> 4;
205	*name = "xbd";
206}
207
208int
209xlvbd_add(device_t dev, blkif_sector_t capacity,
210    int vdevice, uint16_t vdisk_info, uint16_t sector_size,
211    struct blkfront_info *info)
212{
213	struct xb_softc	*sc;
214	int	unit, error = 0;
215	const char *name;
216
217	blkfront_vdevice_to_unit(vdevice, &unit, &name);
218
219	sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
220	sc->xb_unit = unit;
221	sc->xb_info = info;
222	info->sc = sc;
223
224	if (strcmp(name, "xbd"))
225		device_printf(dev, "attaching as %s%d\n", name, unit);
226
227	memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
228	sc->xb_disk = disk_alloc();
229	sc->xb_disk->d_unit = sc->xb_unit;
230	sc->xb_disk->d_open = blkif_open;
231	sc->xb_disk->d_close = blkif_close;
232	sc->xb_disk->d_ioctl = blkif_ioctl;
233	sc->xb_disk->d_strategy = xb_strategy;
234	sc->xb_disk->d_name = name;
235	sc->xb_disk->d_drv1 = sc;
236	sc->xb_disk->d_sectorsize = sector_size;
237
238	/* XXX */
239	sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
240#if 0
241	sc->xb_disk->d_maxsize = DFLTPHYS;
242#else /* XXX: xen can't handle large single i/o requests */
243	sc->xb_disk->d_maxsize = 4096;
244#endif
245#ifdef notyet
246	XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
247		  xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
248		  sc->xb_disk->d_mediasize);
249#endif
250	sc->xb_disk->d_flags = 0;
251	disk_create(sc->xb_disk, DISK_VERSION_00);
252	bioq_init(&sc->xb_bioq);
253
254	return error;
255}
256
257void
258xlvbd_del(struct blkfront_info *info)
259{
260	struct xb_softc	*sc;
261
262	sc = info->sc;
263	disk_destroy(sc->xb_disk);
264}
265/************************ end VBD support *****************/
266
267/*
268 * Read/write routine for a buffer.  Finds the proper unit, place it on
269 * the sortq and kick the controller.
270 */
271static void
272xb_strategy(struct bio *bp)
273{
274	struct xb_softc	*sc = (struct xb_softc *)bp->bio_disk->d_drv1;
275
276	/* bogus disk? */
277	if (sc == NULL) {
278		bp->bio_error = EINVAL;
279		bp->bio_flags |= BIO_ERROR;
280		goto bad;
281	}
282
283	DPRINTK("");
284
285	/*
286	 * Place it in the queue of disk activities for this disk
287	 */
288	mtx_lock(&blkif_io_lock);
289	bioq_disksort(&sc->xb_bioq, bp);
290
291	xb_startio(sc);
292	mtx_unlock(&blkif_io_lock);
293	return;
294
295 bad:
296	/*
297	 * Correctly set the bio to indicate a failed tranfer.
298	 */
299	bp->bio_resid = bp->bio_bcount;
300	biodone(bp);
301	return;
302}
303
304static int
305blkfront_probe(device_t dev)
306{
307
308	if (!strcmp(xenbus_get_type(dev), "vbd")) {
309		device_set_desc(dev, "Virtual Block Device");
310		device_quiet(dev);
311		return (0);
312	}
313
314	return (ENXIO);
315}
316
317/*
318 * Setup supplies the backend dir, virtual device.  We place an event
319 * channel and shared frame entries.  We watch backend to wait if it's
320 * ok.
321 */
322static int
323blkfront_attach(device_t dev)
324{
325	int error, vdevice, i, unit;
326	struct blkfront_info *info;
327	const char *name;
328
329	/* FIXME: Use dynamic device id if this is not set. */
330	error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
331	    "virtual-device", NULL, "%i", &vdevice);
332	if (error) {
333		xenbus_dev_fatal(dev, error, "reading virtual-device");
334		printf("couldn't find virtual device");
335		return (error);
336	}
337
338	blkfront_vdevice_to_unit(vdevice, &unit, &name);
339	if (!strcmp(name, "xbd"))
340		device_set_unit(dev, unit);
341
342	info = device_get_softc(dev);
343
344	/*
345	 * XXX debug only
346	 */
347	for (i = 0; i < sizeof(*info); i++)
348			if (((uint8_t *)info)[i] != 0)
349					panic("non-null memory");
350
351	info->shadow_free = 0;
352	info->xbdev = dev;
353	info->vdevice = vdevice;
354	info->connected = BLKIF_STATE_DISCONNECTED;
355
356	/* work queue needed ? */
357	for (i = 0; i < BLK_RING_SIZE; i++)
358		info->shadow[i].req.id = i+1;
359	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
360
361	/* Front end dir is a number, which is used as the id. */
362	info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0);
363
364	error = talk_to_backend(dev, info);
365	if (error)
366		return (error);
367
368	return (0);
369}
370
371static int
372blkfront_suspend(device_t dev)
373{
374	struct blkfront_info *info = device_get_softc(dev);
375
376	/* Prevent new requests being issued until we fix things up. */
377	mtx_lock(&blkif_io_lock);
378	info->connected = BLKIF_STATE_SUSPENDED;
379	mtx_unlock(&blkif_io_lock);
380
381	return (0);
382}
383
384static int
385blkfront_resume(device_t dev)
386{
387	struct blkfront_info *info = device_get_softc(dev);
388	int err;
389
390	DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
391
392	blkif_free(info, 1);
393	err = talk_to_backend(dev, info);
394	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
395		blkif_recover(info);
396
397	return (err);
398}
399
400/* Common code used when first setting up, and when resuming. */
401static int
402talk_to_backend(device_t dev, struct blkfront_info *info)
403{
404	const char *message = NULL;
405	struct xenbus_transaction xbt;
406	int err;
407
408	/* Create shared ring, alloc event channel. */
409	err = setup_blkring(dev, info);
410	if (err)
411		goto out;
412
413 again:
414	err = xenbus_transaction_start(&xbt);
415	if (err) {
416		xenbus_dev_fatal(dev, err, "starting transaction");
417		goto destroy_blkring;
418	}
419
420	err = xenbus_printf(xbt, xenbus_get_node(dev),
421			    "ring-ref","%u", info->ring_ref);
422	if (err) {
423		message = "writing ring-ref";
424		goto abort_transaction;
425	}
426	err = xenbus_printf(xbt, xenbus_get_node(dev),
427		"event-channel", "%u", irq_to_evtchn_port(info->irq));
428	if (err) {
429		message = "writing event-channel";
430		goto abort_transaction;
431	}
432	err = xenbus_printf(xbt, xenbus_get_node(dev),
433		"protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
434	if (err) {
435		message = "writing protocol";
436		goto abort_transaction;
437	}
438
439	err = xenbus_transaction_end(xbt, 0);
440	if (err) {
441		if (err == EAGAIN)
442			goto again;
443		xenbus_dev_fatal(dev, err, "completing transaction");
444		goto destroy_blkring;
445	}
446	xenbus_set_state(dev, XenbusStateInitialised);
447
448	return 0;
449
450 abort_transaction:
451	xenbus_transaction_end(xbt, 1);
452	if (message)
453		xenbus_dev_fatal(dev, err, "%s", message);
454 destroy_blkring:
455	blkif_free(info, 0);
456 out:
457	return err;
458}
459
460static int
461setup_blkring(device_t dev, struct blkfront_info *info)
462{
463	blkif_sring_t *sring;
464	int error;
465
466	info->ring_ref = GRANT_INVALID_REF;
467
468	sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
469	if (sring == NULL) {
470		xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring");
471		return ENOMEM;
472	}
473	SHARED_RING_INIT(sring);
474	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
475
476	error = xenbus_grant_ring(dev,
477	    (vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref);
478	if (error) {
479		free(sring, M_DEVBUF);
480		info->ring.sring = NULL;
481		goto fail;
482	}
483
484	error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
485	    "xbd", (driver_intr_t *)blkif_int, info,
486	    INTR_TYPE_BIO | INTR_MPSAFE, &info->irq);
487	if (error) {
488		xenbus_dev_fatal(dev, error,
489		    "bind_evtchn_to_irqhandler failed");
490		goto fail;
491	}
492
493	return (0);
494 fail:
495	blkif_free(info, 0);
496	return (error);
497}
498
499
500/**
501 * Callback received when the backend's state changes.
502 */
503static void
504blkfront_backend_changed(device_t dev, XenbusState backend_state)
505{
506	struct blkfront_info *info = device_get_softc(dev);
507
508	DPRINTK("backend_state=%d\n", backend_state);
509
510	switch (backend_state) {
511	case XenbusStateUnknown:
512	case XenbusStateInitialising:
513	case XenbusStateInitWait:
514	case XenbusStateInitialised:
515	case XenbusStateClosed:
516	case XenbusStateReconfigured:
517	case XenbusStateReconfiguring:
518		break;
519
520	case XenbusStateConnected:
521		connect(dev, info);
522		break;
523
524	case XenbusStateClosing:
525		if (info->users > 0)
526			xenbus_dev_error(dev, -EBUSY,
527					 "Device in use; refusing to close");
528		else
529			blkfront_closing(dev);
530#ifdef notyet
531		bd = bdget(info->dev);
532		if (bd == NULL)
533			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
534
535		down(&bd->bd_sem);
536		if (info->users > 0)
537			xenbus_dev_error(dev, -EBUSY,
538					 "Device in use; refusing to close");
539		else
540			blkfront_closing(dev);
541		up(&bd->bd_sem);
542		bdput(bd);
543#endif
544	}
545}
546
547/*
548** Invoked when the backend is finally 'ready' (and has told produced
549** the details about the physical device - #sectors, size, etc).
550*/
551static void
552connect(device_t dev, struct blkfront_info *info)
553{
554	unsigned long sectors, sector_size;
555	unsigned int binfo;
556	int err;
557
558        if( (info->connected == BLKIF_STATE_CONNECTED) ||
559	    (info->connected == BLKIF_STATE_SUSPENDED) )
560		return;
561
562	DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
563
564	err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
565			    "sectors", "%lu", &sectors,
566			    "info", "%u", &binfo,
567			    "sector-size", "%lu", &sector_size,
568			    NULL);
569	if (err) {
570		xenbus_dev_fatal(dev, err,
571		    "reading backend fields at %s",
572		    xenbus_get_otherend_path(dev));
573		return;
574	}
575	err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
576			    "feature-barrier", "%lu", &info->feature_barrier,
577			    NULL);
578	if (err)
579		info->feature_barrier = 0;
580
581	device_printf(dev, "%juMB <%s> at %s",
582	    (uintmax_t) sectors / (1048576 / sector_size),
583	    device_get_desc(dev),
584	    xenbus_get_node(dev));
585	bus_print_child_footer(device_get_parent(dev), dev);
586
587	xlvbd_add(dev, sectors, info->vdevice, binfo, sector_size, info);
588
589	(void)xenbus_set_state(dev, XenbusStateConnected);
590
591	/* Kick pending requests. */
592	mtx_lock(&blkif_io_lock);
593	info->connected = BLKIF_STATE_CONNECTED;
594	kick_pending_request_queues(info);
595	mtx_unlock(&blkif_io_lock);
596	info->is_ready = 1;
597
598#if 0
599	add_disk(info->gd);
600#endif
601}
602
603/**
604 * Handle the change of state of the backend to Closing.  We must delete our
605 * device-layer structures now, to ensure that writes are flushed through to
606 * the backend.  Once is this done, we can switch to Closed in
607 * acknowledgement.
608 */
609static void
610blkfront_closing(device_t dev)
611{
612	struct blkfront_info *info = device_get_softc(dev);
613
614	DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev));
615
616	if (info->mi) {
617		DPRINTK("Calling xlvbd_del\n");
618		xlvbd_del(info);
619		info->mi = NULL;
620	}
621
622	xenbus_set_state(dev, XenbusStateClosed);
623}
624
625
626static int
627blkfront_detach(device_t dev)
628{
629	struct blkfront_info *info = device_get_softc(dev);
630
631	DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev));
632
633	blkif_free(info, 0);
634
635	return 0;
636}
637
638
639static inline int
640GET_ID_FROM_FREELIST(struct blkfront_info *info)
641{
642	unsigned long nfree = info->shadow_free;
643
644	KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree));
645	info->shadow_free = info->shadow[nfree].req.id;
646	info->shadow[nfree].req.id = 0x0fffffee; /* debug */
647	return nfree;
648}
649
650static inline void
651ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id)
652{
653	info->shadow[id].req.id  = info->shadow_free;
654	info->shadow[id].request = 0;
655	info->shadow_free = id;
656}
657
658static inline void
659flush_requests(struct blkfront_info *info)
660{
661	int notify;
662
663	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
664
665	if (notify)
666		notify_remote_via_irq(info->irq);
667}
668
669static void
670kick_pending_request_queues(struct blkfront_info *info)
671{
672	/* XXX check if we can't simplify */
673#if 0
674	if (!RING_FULL(&info->ring)) {
675		/* Re-enable calldowns. */
676		blk_start_queue(info->rq);
677		/* Kick things off immediately. */
678		do_blkif_request(info->rq);
679	}
680#endif
681	if (!RING_FULL(&info->ring)) {
682#if 0
683		sc = LIST_FIRST(&xbsl_head);
684		LIST_REMOVE(sc, entry);
685		/* Re-enable calldowns. */
686		blk_start_queue(di->rq);
687#endif
688		/* Kick things off immediately. */
689		xb_startio(info->sc);
690	}
691}
692
693#if 0
694/* XXX */
695static void blkif_restart_queue(void *arg)
696{
697	struct blkfront_info *info = (struct blkfront_info *)arg;
698
699	mtx_lock(&blkif_io_lock);
700	kick_pending_request_queues(info);
701	mtx_unlock(&blkif_io_lock);
702}
703#endif
704
705static void blkif_restart_queue_callback(void *arg)
706{
707#if 0
708	struct blkfront_info *info = (struct blkfront_info *)arg;
709	/* XXX BSD equiv ? */
710
711	schedule_work(&info->work);
712#endif
713}
714
715static int
716blkif_open(struct disk *dp)
717{
718	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
719
720	if (sc == NULL) {
721		printf("xb%d: not found", sc->xb_unit);
722		return (ENXIO);
723	}
724
725	sc->xb_flags |= XB_OPEN;
726	sc->xb_info->users++;
727	return (0);
728}
729
730static int
731blkif_close(struct disk *dp)
732{
733	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
734
735	if (sc == NULL)
736		return (ENXIO);
737	sc->xb_flags &= ~XB_OPEN;
738	if (--(sc->xb_info->users) == 0) {
739		/* Check whether we have been instructed to close.  We will
740		   have ignored this request initially, as the device was
741		   still mounted. */
742		device_t dev = sc->xb_info->xbdev;
743		XenbusState state =
744			xenbus_read_driver_state(xenbus_get_otherend_path(dev));
745
746		if (state == XenbusStateClosing)
747			blkfront_closing(dev);
748	}
749	return (0);
750}
751
752static int
753blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
754{
755	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
756
757	if (sc == NULL)
758		return (ENXIO);
759
760	return (ENOTTY);
761}
762
763
764/*
765 * blkif_queue_request
766 *
767 * request block io
768 *
769 * id: for guest use only.
770 * operation: BLKIF_OP_{READ,WRITE,PROBE}
771 * buffer: buffer to read/write into. this should be a
772 *   virtual address in the guest os.
773 */
774static int blkif_queue_request(struct bio *bp)
775{
776	caddr_t alignbuf;
777	vm_paddr_t buffer_ma;
778	blkif_request_t     *ring_req;
779	unsigned long id;
780	uint64_t fsect, lsect;
781	struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
782	struct blkfront_info *info = sc->xb_info;
783	int ref;
784
785	if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED))
786		return 1;
787
788	if (gnttab_alloc_grant_references(
789		    BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
790		gnttab_request_free_callback(
791			&info->callback,
792			blkif_restart_queue_callback,
793			info,
794			BLKIF_MAX_SEGMENTS_PER_REQUEST);
795		return 1;
796	}
797
798	/* Check if the buffer is properly aligned */
799	if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
800		int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE :
801			PAGE_SIZE;
802		caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF,
803					M_NOWAIT);
804
805		alignbuf = (char *)roundup2((u_long)newbuf, align);
806
807		/* save a copy of the current buffer */
808		bp->bio_driver1 = newbuf;
809		bp->bio_driver2 = alignbuf;
810
811		/* Copy the data for a write */
812		if (bp->bio_cmd == BIO_WRITE)
813			bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
814	} else
815		alignbuf = bp->bio_data;
816
817	/* Fill out a communications ring structure. */
818	ring_req 	         = RING_GET_REQUEST(&info->ring,
819						    info->ring.req_prod_pvt);
820	id		         = GET_ID_FROM_FREELIST(info);
821	info->shadow[id].request = (unsigned long)bp;
822
823	ring_req->id 	         = id;
824	ring_req->operation 	 = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
825		BLKIF_OP_WRITE;
826
827	ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno;
828	ring_req->handle 	  = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
829
830	ring_req->nr_segments  = 0;	/* XXX not doing scatter/gather since buffer
831					 * chaining is not supported.
832					 */
833
834	buffer_ma = vtomach(alignbuf);
835	fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
836	lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
837	/* install a grant reference. */
838	ref = gnttab_claim_grant_reference(&gref_head);
839	KASSERT( ref != -ENOSPC, ("grant_reference failed") );
840
841	gnttab_grant_foreign_access_ref(
842		ref,
843		xenbus_get_otherend_id(info->xbdev),
844		buffer_ma >> PAGE_SHIFT,
845		ring_req->operation & 1 ); /* ??? */
846	info->shadow[id].frame[ring_req->nr_segments] =
847		buffer_ma >> PAGE_SHIFT;
848
849	ring_req->seg[ring_req->nr_segments] =
850		(struct blkif_request_segment) {
851			.gref       = ref,
852			.first_sect = fsect,
853			.last_sect  = lsect };
854
855	ring_req->nr_segments++;
856	KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
857		("XEN buffer must be sector aligned"));
858	KASSERT(lsect <= 7,
859		("XEN disk driver data cannot cross a page boundary"));
860
861	buffer_ma &= ~PAGE_MASK;
862
863	info->ring.req_prod_pvt++;
864
865	/* Keep a private copy so we can reissue requests when recovering. */
866	info->shadow[id].req = *ring_req;
867
868	gnttab_free_grant_references(gref_head);
869
870	return 0;
871}
872
873
874
875/*
876 * Dequeue buffers and place them in the shared communication ring.
877 * Return when no more requests can be accepted or all buffers have
878 * been queued.
879 *
880 * Signal XEN once the ring has been filled out.
881 */
882static void
883xb_startio(struct xb_softc *sc)
884{
885	struct bio		*bp;
886	int			queued = 0;
887	struct blkfront_info *info = sc->xb_info;
888	DPRINTK("");
889
890	mtx_assert(&blkif_io_lock, MA_OWNED);
891
892	while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) {
893
894		if (RING_FULL(&info->ring))
895			goto wait;
896
897		if (blkif_queue_request(bp)) {
898		wait:
899			bioq_insert_head(&sc->xb_bioq, bp);
900			break;
901		}
902		queued++;
903	}
904
905	if (queued != 0)
906		flush_requests(sc->xb_info);
907}
908
909static void
910blkif_int(void *xsc)
911{
912	struct xb_softc *sc = NULL;
913	struct bio *bp;
914	blkif_response_t *bret;
915	RING_IDX i, rp;
916	struct blkfront_info *info = xsc;
917	DPRINTK("");
918
919	TRACE_ENTER;
920
921	mtx_lock(&blkif_io_lock);
922
923	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
924		mtx_unlock(&blkif_io_lock);
925		return;
926	}
927
928 again:
929	rp = info->ring.sring->rsp_prod;
930	rmb(); /* Ensure we see queued responses up to 'rp'. */
931
932	for (i = info->ring.rsp_cons; i != rp; i++) {
933		unsigned long id;
934
935		bret = RING_GET_RESPONSE(&info->ring, i);
936		id   = bret->id;
937		bp   = (struct bio *)info->shadow[id].request;
938
939		blkif_completion(&info->shadow[id]);
940
941		ADD_ID_TO_FREELIST(info, id);
942
943		switch (bret->operation) {
944		case BLKIF_OP_READ:
945			/* had an unaligned buffer that needs to be copied */
946			if (bp->bio_driver1)
947				bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount);
948			/* FALLTHROUGH */
949		case BLKIF_OP_WRITE:
950
951			/* free the copy buffer */
952			if (bp->bio_driver1) {
953				free(bp->bio_driver1, M_DEVBUF);
954				bp->bio_driver1 = NULL;
955			}
956
957			if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
958					printf("Bad return from blkdev data request: %x\n",
959					  bret->status);
960				bp->bio_flags |= BIO_ERROR;
961			}
962
963			sc = (struct xb_softc *)bp->bio_disk->d_drv1;
964
965			if (bp->bio_flags & BIO_ERROR)
966				bp->bio_error = EIO;
967			else
968				bp->bio_resid = 0;
969
970			biodone(bp);
971			break;
972		default:
973			panic("received invalid operation");
974			break;
975		}
976	}
977
978	info->ring.rsp_cons = i;
979
980	if (i != info->ring.req_prod_pvt) {
981		int more_to_do;
982		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
983		if (more_to_do)
984			goto again;
985	} else {
986		info->ring.sring->rsp_event = i + 1;
987	}
988
989	kick_pending_request_queues(info);
990
991	mtx_unlock(&blkif_io_lock);
992}
993
994static void
995blkif_free(struct blkfront_info *info, int suspend)
996{
997
998/* Prevent new requests being issued until we fix things up. */
999	mtx_lock(&blkif_io_lock);
1000	info->connected = suspend ?
1001		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
1002	mtx_unlock(&blkif_io_lock);
1003
1004	/* Free resources associated with old device channel. */
1005	if (info->ring_ref != GRANT_INVALID_REF) {
1006		gnttab_end_foreign_access(info->ring_ref,
1007					  info->ring.sring);
1008		info->ring_ref = GRANT_INVALID_REF;
1009		info->ring.sring = NULL;
1010	}
1011	if (info->irq)
1012		unbind_from_irqhandler(info->irq);
1013	info->irq = 0;
1014
1015}
1016
1017static void
1018blkif_completion(struct blk_shadow *s)
1019{
1020	int i;
1021
1022	for (i = 0; i < s->req.nr_segments; i++)
1023		gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
1024}
1025
1026static void
1027blkif_recover(struct blkfront_info *info)
1028{
1029	int i, j;
1030	blkif_request_t *req;
1031	struct blk_shadow *copy;
1032
1033	if (!info->sc)
1034		return;
1035
1036	/* Stage 1: Make a safe copy of the shadow state. */
1037	copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
1038	memcpy(copy, info->shadow, sizeof(info->shadow));
1039
1040	/* Stage 2: Set up free list. */
1041	memset(&info->shadow, 0, sizeof(info->shadow));
1042	for (i = 0; i < BLK_RING_SIZE; i++)
1043		info->shadow[i].req.id = i+1;
1044	info->shadow_free = info->ring.req_prod_pvt;
1045	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
1046
1047	/* Stage 3: Find pending requests and requeue them. */
1048	for (i = 0; i < BLK_RING_SIZE; i++) {
1049		/* Not in use? */
1050		if (copy[i].request == 0)
1051			continue;
1052
1053		/* Grab a request slot and copy shadow state into it. */
1054		req = RING_GET_REQUEST(
1055			&info->ring, info->ring.req_prod_pvt);
1056		*req = copy[i].req;
1057
1058		/* We get a new request id, and must reset the shadow state. */
1059		req->id = GET_ID_FROM_FREELIST(info);
1060		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
1061
1062		/* Rewrite any grant references invalidated by suspend/resume. */
1063		for (j = 0; j < req->nr_segments; j++)
1064			gnttab_grant_foreign_access_ref(
1065				req->seg[j].gref,
1066				xenbus_get_otherend_id(info->xbdev),
1067				pfn_to_mfn(info->shadow[req->id].frame[j]),
1068				0 /* assume not readonly */);
1069
1070		info->shadow[req->id].req = *req;
1071
1072		info->ring.req_prod_pvt++;
1073	}
1074
1075	free(copy, M_DEVBUF);
1076
1077	xenbus_set_state(info->xbdev, XenbusStateConnected);
1078
1079	/* Now safe for us to use the shared ring */
1080	mtx_lock(&blkif_io_lock);
1081	info->connected = BLKIF_STATE_CONNECTED;
1082	mtx_unlock(&blkif_io_lock);
1083
1084	/* Send off requeued requests */
1085	mtx_lock(&blkif_io_lock);
1086	flush_requests(info);
1087
1088	/* Kick any other new requests queued since we resumed */
1089	kick_pending_request_queues(info);
1090	mtx_unlock(&blkif_io_lock);
1091}
1092
1093/* ** Driver registration ** */
1094static device_method_t blkfront_methods[] = {
1095	/* Device interface */
1096	DEVMETHOD(device_probe,         blkfront_probe),
1097	DEVMETHOD(device_attach,        blkfront_attach),
1098	DEVMETHOD(device_detach,        blkfront_detach),
1099	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1100	DEVMETHOD(device_suspend,       blkfront_suspend),
1101	DEVMETHOD(device_resume,        blkfront_resume),
1102
1103	/* Xenbus interface */
1104	DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed),
1105
1106	{ 0, 0 }
1107};
1108
1109static driver_t blkfront_driver = {
1110	"xbd",
1111	blkfront_methods,
1112	sizeof(struct blkfront_info),
1113};
1114devclass_t blkfront_devclass;
1115
1116DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0);
1117
1118MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */
1119
1120