blkfront.c revision 186557
11556Srgrimes/*-
250471Speter * All rights reserved.
31556Srgrimes *
41556Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51556Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
6119837Sru * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
71556Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
81556Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
10 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
11 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
12 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
13 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
14 * SUCH DAMAGE.
15 *
16 */
17
18/*
19 * XenoBSD block device driver
20 */
21
22#include <sys/cdefs.h>
23__FBSDID("$FreeBSD: head/sys/dev/xen/blkfront/blkfront.c 186557 2008-12-29 06:31:03Z kmacy $");
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/malloc.h>
28#include <sys/kernel.h>
29#include <vm/vm.h>
30#include <vm/pmap.h>
31
32#include <sys/bio.h>
33#include <sys/bus.h>
34#include <sys/conf.h>
35#include <sys/module.h>
36
37#include <machine/bus.h>
38#include <sys/rman.h>
39#include <machine/resource.h>
40#include <machine/intr_machdep.h>
41#include <machine/vmparam.h>
42
43#include <xen/hypervisor.h>
44#include <machine/xen/xen-os.h>
45#include <xen/xen_intr.h>
46#include <xen/evtchn.h>
47#include <xen/interface/grant_table.h>
48#include <xen/interface/io/protocols.h>
49#include <xen/xenbus/xenbusvar.h>
50
51#include <geom/geom_disk.h>
52#include <machine/xen/xenfunc.h>
53#include <xen/gnttab.h>
54
55#include <dev/xen/blkfront/block.h>
56
57#include "xenbus_if.h"
58
59#define    ASSERT(S)       KASSERT(S, (#S))
60/* prototypes */
61struct xb_softc;
62static void xb_startio(struct xb_softc *sc);
63static void connect(device_t, struct blkfront_info *);
64static void blkfront_closing(device_t);
65static int blkfront_detach(device_t);
66static int talk_to_backend(device_t, struct blkfront_info *);
67static int setup_blkring(device_t, struct blkfront_info *);
68static void blkif_int(void *);
69#if 0
70static void blkif_restart_queue(void *arg);
71#endif
72static void blkif_recover(struct blkfront_info *);
73static void blkif_completion(struct blk_shadow *);
74static void blkif_free(struct blkfront_info *, int);
75
76#define GRANT_INVALID_REF 0
77#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
78
79LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
80
81/* Control whether runtime update of vbds is enabled. */
82#define ENABLE_VBD_UPDATE 0
83
84#if ENABLE_VBD_UPDATE
85static void vbd_update(void);
86#endif
87
88
89#define BLKIF_STATE_DISCONNECTED 0
90#define BLKIF_STATE_CONNECTED    1
91#define BLKIF_STATE_SUSPENDED    2
92
93#ifdef notyet
94static char *blkif_state_name[] = {
95	[BLKIF_STATE_DISCONNECTED] = "disconnected",
96	[BLKIF_STATE_CONNECTED]    = "connected",
97	[BLKIF_STATE_SUSPENDED]    = "closed",
98};
99
100static char * blkif_status_name[] = {
101	[BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
102	[BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
103	[BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
104	[BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
105};
106#endif
107#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
108#if 0
109#define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, __LINE__,##args)
110#else
111#define DPRINTK(fmt, args...)
112#endif
113
114static grant_ref_t gref_head;
115#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
116    (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
117
118static void kick_pending_request_queues(struct blkfront_info *);
119static int blkif_open(struct disk *dp);
120static int blkif_close(struct disk *dp);
121static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
122static int blkif_queue_request(struct bio *bp);
123static void xb_strategy(struct bio *bp);
124
125
126
127/* XXX move to xb_vbd.c when VBD update support is added */
128#define MAX_VBDS 64
129
130#define XBD_SECTOR_SIZE		512	/* XXX: assume for now */
131#define XBD_SECTOR_SHFT		9
132
133static struct mtx blkif_io_lock;
134
135static vm_paddr_t
136pfn_to_mfn(vm_paddr_t pfn)
137{
138	return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
139}
140
141
142/*
143 * Translate Linux major/minor to an appropriate name and unit
144 * number. For HVM guests, this allows us to use the same drive names
145 * with blkfront as the emulated drives, easing transition slightly.
146 */
147static void
148blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name)
149{
150	static struct vdev_info {
151		int major;
152		int shift;
153		int base;
154		const char *name;
155	} info[] = {
156		{3,	6,	0,	"ad"},	/* ide0 */
157		{22,	6,	2,	"ad"},	/* ide1 */
158		{33,	6,	4,	"ad"},	/* ide2 */
159		{34,	6,	6,	"ad"},	/* ide3 */
160		{56,	6,	8,	"ad"},	/* ide4 */
161		{57,	6,	10,	"ad"},	/* ide5 */
162		{88,	6,	12,	"ad"},	/* ide6 */
163		{89,	6,	14,	"ad"},	/* ide7 */
164		{90,	6,	16,	"ad"},	/* ide8 */
165		{91,	6,	18,	"ad"},	/* ide9 */
166
167		{8,	4,	0,	"da"},	/* scsi disk0 */
168		{65,	4,	16,	"da"},	/* scsi disk1 */
169		{66,	4,	32,	"da"},	/* scsi disk2 */
170		{67,	4,	48,	"da"},	/* scsi disk3 */
171		{68,	4,	64,	"da"},	/* scsi disk4 */
172		{69,	4,	80,	"da"},	/* scsi disk5 */
173		{70,	4,	96,	"da"},	/* scsi disk6 */
174		{71,	4,	112,	"da"},	/* scsi disk7 */
175		{128,	4,	128,	"da"},	/* scsi disk8 */
176		{129,	4,	144,	"da"},	/* scsi disk9 */
177		{130,	4,	160,	"da"},	/* scsi disk10 */
178		{131,	4,	176,	"da"},	/* scsi disk11 */
179		{132,	4,	192,	"da"},	/* scsi disk12 */
180		{133,	4,	208,	"da"},	/* scsi disk13 */
181		{134,	4,	224,	"da"},	/* scsi disk14 */
182		{135,	4,	240,	"da"},	/* scsi disk15 */
183
184		{202,	4,	0,	"xbd"},	/* xbd */
185
186		{0,	0,	0,	NULL},
187	};
188	int major = vdevice >> 8;
189	int minor = vdevice & 0xff;
190	int i;
191
192	if (vdevice & (1 << 28)) {
193		*unit = (vdevice & ((1 << 28) - 1)) >> 8;
194		*name = "xbd";
195	}
196
197	for (i = 0; info[i].major; i++) {
198		if (info[i].major == major) {
199			*unit = info[i].base + (minor >> info[i].shift);
200			*name = info[i].name;
201			return;
202		}
203	}
204
205	*unit = minor >> 4;
206	*name = "xbd";
207}
208
209int
210xlvbd_add(device_t dev, blkif_sector_t capacity,
211    int vdevice, uint16_t vdisk_info, uint16_t sector_size,
212    struct blkfront_info *info)
213{
214	struct xb_softc	*sc;
215	int	unit, error = 0;
216	const char *name;
217
218	blkfront_vdevice_to_unit(vdevice, &unit, &name);
219
220	sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
221	sc->xb_unit = unit;
222	sc->xb_info = info;
223	info->sc = sc;
224
225	if (strcmp(name, "xbd"))
226		device_printf(dev, "attaching as %s%d\n", name, unit);
227
228	memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
229	sc->xb_disk = disk_alloc();
230	sc->xb_disk->d_unit = sc->xb_unit;
231	sc->xb_disk->d_open = blkif_open;
232	sc->xb_disk->d_close = blkif_close;
233	sc->xb_disk->d_ioctl = blkif_ioctl;
234	sc->xb_disk->d_strategy = xb_strategy;
235	sc->xb_disk->d_name = name;
236	sc->xb_disk->d_drv1 = sc;
237	sc->xb_disk->d_sectorsize = sector_size;
238
239	/* XXX */
240	sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
241#if 0
242	sc->xb_disk->d_maxsize = DFLTPHYS;
243#else /* XXX: xen can't handle large single i/o requests */
244	sc->xb_disk->d_maxsize = 4096;
245#endif
246#ifdef notyet
247	XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
248		  xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
249		  sc->xb_disk->d_mediasize);
250#endif
251	sc->xb_disk->d_flags = 0;
252	disk_create(sc->xb_disk, DISK_VERSION_00);
253	bioq_init(&sc->xb_bioq);
254
255	return error;
256}
257
258void
259xlvbd_del(struct blkfront_info *info)
260{
261	struct xb_softc	*sc;
262
263	sc = info->sc;
264	disk_destroy(sc->xb_disk);
265}
266/************************ end VBD support *****************/
267
268/*
269 * Read/write routine for a buffer.  Finds the proper unit, place it on
270 * the sortq and kick the controller.
271 */
272static void
273xb_strategy(struct bio *bp)
274{
275	struct xb_softc	*sc = (struct xb_softc *)bp->bio_disk->d_drv1;
276
277	/* bogus disk? */
278	if (sc == NULL) {
279		bp->bio_error = EINVAL;
280		bp->bio_flags |= BIO_ERROR;
281		goto bad;
282	}
283
284	DPRINTK("");
285
286	/*
287	 * Place it in the queue of disk activities for this disk
288	 */
289	mtx_lock(&blkif_io_lock);
290	bioq_disksort(&sc->xb_bioq, bp);
291
292	xb_startio(sc);
293	mtx_unlock(&blkif_io_lock);
294	return;
295
296 bad:
297	/*
298	 * Correctly set the bio to indicate a failed tranfer.
299	 */
300	bp->bio_resid = bp->bio_bcount;
301	biodone(bp);
302	return;
303}
304
305static int
306blkfront_probe(device_t dev)
307{
308
309	if (!strcmp(xenbus_get_type(dev), "vbd")) {
310		device_set_desc(dev, "Virtual Block Device");
311		device_quiet(dev);
312		return (0);
313	}
314
315	return (ENXIO);
316}
317
318/*
319 * Setup supplies the backend dir, virtual device.  We place an event
320 * channel and shared frame entries.  We watch backend to wait if it's
321 * ok.
322 */
323static int
324blkfront_attach(device_t dev)
325{
326	int err, vdevice, i, unit;
327	struct blkfront_info *info;
328	const char *name;
329
330	/* FIXME: Use dynamic device id if this is not set. */
331	err = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
332	    "virtual-device", NULL, "%i", &vdevice);
333	if (err) {
334		xenbus_dev_fatal(dev, err, "reading virtual-device");
335		printf("couldn't find virtual device");
336		return (err);
337	}
338
339	blkfront_vdevice_to_unit(vdevice, &unit, &name);
340	if (!strcmp(name, "xbd"))
341		device_set_unit(dev, unit);
342
343	info = device_get_softc(dev);
344
345	/*
346	 * XXX debug only
347	 */
348	for (i = 0; i < sizeof(*info); i++)
349			if (((uint8_t *)info)[i] != 0)
350					panic("non-null memory");
351
352	info->shadow_free = 0;
353	info->xbdev = dev;
354	info->vdevice = vdevice;
355	info->connected = BLKIF_STATE_DISCONNECTED;
356
357	/* work queue needed ? */
358	for (i = 0; i < BLK_RING_SIZE; i++)
359		info->shadow[i].req.id = i+1;
360	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
361
362	/* Front end dir is a number, which is used as the id. */
363	info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0);
364
365	err = talk_to_backend(dev, info);
366	if (err)
367		return (err);
368
369	return (0);
370}
371
372static int
373blkfront_resume(device_t dev)
374{
375	struct blkfront_info *info = device_get_softc(dev);
376	int err;
377
378	DPRINTK("blkfront_resume: %s\n", dev->nodename);
379
380	blkif_free(info, 1);
381
382	err = talk_to_backend(dev, info);
383
384	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
385		blkif_recover(info);
386
387	return err;
388}
389
390/* Common code used when first setting up, and when resuming. */
391static int
392talk_to_backend(device_t dev, struct blkfront_info *info)
393{
394	const char *message = NULL;
395	struct xenbus_transaction xbt;
396	int err;
397
398	/* Create shared ring, alloc event channel. */
399	err = setup_blkring(dev, info);
400	if (err)
401		goto out;
402
403 again:
404	err = xenbus_transaction_start(&xbt);
405	if (err) {
406		xenbus_dev_fatal(dev, err, "starting transaction");
407		goto destroy_blkring;
408	}
409
410	err = xenbus_printf(xbt, xenbus_get_node(dev),
411			    "ring-ref","%u", info->ring_ref);
412	if (err) {
413		message = "writing ring-ref";
414		goto abort_transaction;
415	}
416	err = xenbus_printf(xbt, xenbus_get_node(dev),
417		"event-channel", "%u", irq_to_evtchn_port(info->irq));
418	if (err) {
419		message = "writing event-channel";
420		goto abort_transaction;
421	}
422	err = xenbus_printf(xbt, xenbus_get_node(dev),
423		"protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
424	if (err) {
425		message = "writing protocol";
426		goto abort_transaction;
427	}
428	err = xenbus_transaction_end(xbt, 0);
429	if (err) {
430		if (err == EAGAIN)
431			goto again;
432		xenbus_dev_fatal(dev, err, "completing transaction");
433		goto destroy_blkring;
434	}
435	xenbus_set_state(dev, XenbusStateInitialised);
436
437	return 0;
438
439 abort_transaction:
440	xenbus_transaction_end(xbt, 1);
441	if (message)
442		xenbus_dev_fatal(dev, err, "%s", message);
443 destroy_blkring:
444	blkif_free(info, 0);
445 out:
446	return err;
447}
448
449static int
450setup_blkring(device_t dev, struct blkfront_info *info)
451{
452	blkif_sring_t *sring;
453	int error;
454
455	info->ring_ref = GRANT_INVALID_REF;
456
457	sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
458	if (sring == NULL) {
459		xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring");
460		return ENOMEM;
461	}
462	SHARED_RING_INIT(sring);
463	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
464
465	error = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> PAGE_SHIFT),
466		&info->ring_ref);
467	if (error) {
468		free(sring, M_DEVBUF);
469		info->ring.sring = NULL;
470		goto fail;
471	}
472
473	error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
474		"xbd", (driver_intr_t *)blkif_int, info,
475					INTR_TYPE_BIO | INTR_MPSAFE, &info->irq);
476	if (error) {
477		xenbus_dev_fatal(dev, error,
478				 "bind_evtchn_to_irqhandler failed");
479		goto fail;
480	}
481
482	return (0);
483 fail:
484	blkif_free(info, 0);
485	return (error);
486}
487
488
489/**
490 * Callback received when the backend's state changes.
491 */
492static void
493blkfront_backend_changed(device_t dev, XenbusState backend_state)
494{
495	struct blkfront_info *info = device_get_softc(dev);
496
497	DPRINTK("blkfront:backend_changed.\n");
498
499	switch (backend_state) {
500	case XenbusStateUnknown:
501	case XenbusStateInitialising:
502	case XenbusStateInitWait:
503	case XenbusStateInitialised:
504	case XenbusStateClosed:
505	case XenbusStateReconfigured:
506	case XenbusStateReconfiguring:
507		break;
508
509	case XenbusStateConnected:
510		connect(dev, info);
511		break;
512
513	case XenbusStateClosing:
514		if (info->users > 0)
515			xenbus_dev_error(dev, -EBUSY,
516					 "Device in use; refusing to close");
517		else
518			blkfront_closing(dev);
519#ifdef notyet
520		bd = bdget(info->dev);
521		if (bd == NULL)
522			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
523
524		down(&bd->bd_sem);
525		if (info->users > 0)
526			xenbus_dev_error(dev, -EBUSY,
527					 "Device in use; refusing to close");
528		else
529			blkfront_closing(dev);
530		up(&bd->bd_sem);
531		bdput(bd);
532#endif
533	}
534}
535
536/*
537** Invoked when the backend is finally 'ready' (and has told produced
538** the details about the physical device - #sectors, size, etc).
539*/
540static void
541connect(device_t dev, struct blkfront_info *info)
542{
543	unsigned long sectors, sector_size;
544	unsigned int binfo;
545	int err;
546
547        if( (info->connected == BLKIF_STATE_CONNECTED) ||
548	    (info->connected == BLKIF_STATE_SUSPENDED) )
549		return;
550
551	DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
552
553	err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
554			    "sectors", "%lu", &sectors,
555			    "info", "%u", &binfo,
556			    "sector-size", "%lu", &sector_size,
557			    NULL);
558	if (err) {
559		xenbus_dev_fatal(dev, err,
560		    "reading backend fields at %s",
561		    xenbus_get_otherend_path(dev));
562		return;
563	}
564	err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
565			    "feature-barrier", "%lu", &info->feature_barrier,
566			    NULL);
567	if (err)
568		info->feature_barrier = 0;
569
570	device_printf(dev, "%juMB <%s> at %s",
571	    (uintmax_t) sectors / (1048576 / sector_size),
572	    device_get_desc(dev),
573	    xenbus_get_node(dev));
574	bus_print_child_footer(device_get_parent(dev), dev);
575
576	xlvbd_add(dev, sectors, info->vdevice, binfo, sector_size, info);
577
578	(void)xenbus_set_state(dev, XenbusStateConnected);
579
580	/* Kick pending requests. */
581	mtx_lock(&blkif_io_lock);
582	info->connected = BLKIF_STATE_CONNECTED;
583	kick_pending_request_queues(info);
584	mtx_unlock(&blkif_io_lock);
585	info->is_ready = 1;
586
587#if 0
588	add_disk(info->gd);
589#endif
590}
591
592/**
593 * Handle the change of state of the backend to Closing.  We must delete our
594 * device-layer structures now, to ensure that writes are flushed through to
595 * the backend.  Once is this done, we can switch to Closed in
596 * acknowledgement.
597 */
598static void
599blkfront_closing(device_t dev)
600{
601	struct blkfront_info *info = device_get_softc(dev);
602
603	DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev));
604
605	if (info->mi) {
606		DPRINTK("Calling xlvbd_del\n");
607		xlvbd_del(info);
608		info->mi = NULL;
609	}
610
611	xenbus_set_state(dev, XenbusStateClosed);
612}
613
614
615static int
616blkfront_detach(device_t dev)
617{
618	struct blkfront_info *info = device_get_softc(dev);
619
620	DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev));
621
622	blkif_free(info, 0);
623
624	return 0;
625}
626
627
628static inline int
629GET_ID_FROM_FREELIST(struct blkfront_info *info)
630{
631	unsigned long nfree = info->shadow_free;
632
633	KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree));
634	info->shadow_free = info->shadow[nfree].req.id;
635	info->shadow[nfree].req.id = 0x0fffffee; /* debug */
636	return nfree;
637}
638
639static inline void
640ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id)
641{
642	info->shadow[id].req.id  = info->shadow_free;
643	info->shadow[id].request = 0;
644	info->shadow_free = id;
645}
646
647static inline void
648flush_requests(struct blkfront_info *info)
649{
650	int notify;
651
652	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
653
654	if (notify)
655		notify_remote_via_irq(info->irq);
656}
657
658static void
659kick_pending_request_queues(struct blkfront_info *info)
660{
661	/* XXX check if we can't simplify */
662#if 0
663	if (!RING_FULL(&info->ring)) {
664		/* Re-enable calldowns. */
665		blk_start_queue(info->rq);
666		/* Kick things off immediately. */
667		do_blkif_request(info->rq);
668	}
669#endif
670	if (!RING_FULL(&info->ring)) {
671#if 0
672		sc = LIST_FIRST(&xbsl_head);
673		LIST_REMOVE(sc, entry);
674		/* Re-enable calldowns. */
675		blk_start_queue(di->rq);
676#endif
677		/* Kick things off immediately. */
678		xb_startio(info->sc);
679	}
680}
681
682#if 0
683/* XXX */
684static void blkif_restart_queue(void *arg)
685{
686	struct blkfront_info *info = (struct blkfront_info *)arg;
687
688	mtx_lock(&blkif_io_lock);
689	kick_pending_request_queues(info);
690	mtx_unlock(&blkif_io_lock);
691}
692#endif
693
694static void blkif_restart_queue_callback(void *arg)
695{
696#if 0
697	struct blkfront_info *info = (struct blkfront_info *)arg;
698	/* XXX BSD equiv ? */
699
700	schedule_work(&info->work);
701#endif
702}
703
704static int
705blkif_open(struct disk *dp)
706{
707	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
708
709	if (sc == NULL) {
710		printk("xb%d: not found", sc->xb_unit);
711		return (ENXIO);
712	}
713
714	sc->xb_flags |= XB_OPEN;
715	sc->xb_info->users++;
716	return (0);
717}
718
719static int
720blkif_close(struct disk *dp)
721{
722	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
723
724	if (sc == NULL)
725		return (ENXIO);
726	sc->xb_flags &= ~XB_OPEN;
727	if (--(sc->xb_info->users) == 0) {
728		/* Check whether we have been instructed to close.  We will
729		   have ignored this request initially, as the device was
730		   still mounted. */
731		device_t dev = sc->xb_info->xbdev;
732		XenbusState state =
733			xenbus_read_driver_state(xenbus_get_otherend_path(dev));
734
735		if (state == XenbusStateClosing)
736			blkfront_closing(dev);
737	}
738	return (0);
739}
740
741static int
742blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
743{
744	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
745
746	if (sc == NULL)
747		return (ENXIO);
748
749	return (ENOTTY);
750}
751
752
753/*
754 * blkif_queue_request
755 *
756 * request block io
757 *
758 * id: for guest use only.
759 * operation: BLKIF_OP_{READ,WRITE,PROBE}
760 * buffer: buffer to read/write into. this should be a
761 *   virtual address in the guest os.
762 */
763static int blkif_queue_request(struct bio *bp)
764{
765	caddr_t alignbuf;
766	vm_paddr_t buffer_ma;
767	blkif_request_t     *ring_req;
768	unsigned long id;
769	uint64_t fsect, lsect;
770	struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
771	struct blkfront_info *info = sc->xb_info;
772	int ref;
773
774	if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED))
775		return 1;
776
777	if (gnttab_alloc_grant_references(
778		    BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
779		gnttab_request_free_callback(
780			&info->callback,
781			blkif_restart_queue_callback,
782			info,
783			BLKIF_MAX_SEGMENTS_PER_REQUEST);
784		return 1;
785	}
786
787	/* Check if the buffer is properly aligned */
788	if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
789		int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE :
790			PAGE_SIZE;
791		caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF,
792					M_NOWAIT);
793
794		alignbuf = (char *)roundup2((u_long)newbuf, align);
795
796		/* save a copy of the current buffer */
797		bp->bio_driver1 = newbuf;
798		bp->bio_driver2 = alignbuf;
799
800		/* Copy the data for a write */
801		if (bp->bio_cmd == BIO_WRITE)
802			bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
803	} else
804		alignbuf = bp->bio_data;
805
806	/* Fill out a communications ring structure. */
807	ring_req 	         = RING_GET_REQUEST(&info->ring,
808						    info->ring.req_prod_pvt);
809	id		         = GET_ID_FROM_FREELIST(info);
810	info->shadow[id].request = (unsigned long)bp;
811
812	ring_req->id 	         = id;
813	ring_req->operation 	 = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
814		BLKIF_OP_WRITE;
815
816	ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno;
817	ring_req->handle 	  = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
818
819	ring_req->nr_segments  = 0;	/* XXX not doing scatter/gather since buffer
820					 * chaining is not supported.
821					 */
822
823	buffer_ma = vtomach(alignbuf);
824	fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
825	lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
826	/* install a grant reference. */
827	ref = gnttab_claim_grant_reference(&gref_head);
828	KASSERT( ref != -ENOSPC, ("grant_reference failed") );
829
830	gnttab_grant_foreign_access_ref(
831		ref,
832		xenbus_get_otherend_id(info->xbdev),
833		buffer_ma >> PAGE_SHIFT,
834		ring_req->operation & 1 ); /* ??? */
835	info->shadow[id].frame[ring_req->nr_segments] =
836		buffer_ma >> PAGE_SHIFT;
837
838	ring_req->seg[ring_req->nr_segments] =
839		(struct blkif_request_segment) {
840			.gref       = ref,
841			.first_sect = fsect,
842			.last_sect  = lsect };
843
844	ring_req->nr_segments++;
845	KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
846		("XEN buffer must be sector aligned"));
847	KASSERT(lsect <= 7,
848		("XEN disk driver data cannot cross a page boundary"));
849
850	buffer_ma &= ~PAGE_MASK;
851
852	info->ring.req_prod_pvt++;
853
854	/* Keep a private copy so we can reissue requests when recovering. */
855	info->shadow[id].req = *ring_req;
856
857	gnttab_free_grant_references(gref_head);
858
859	return 0;
860}
861
862
863
864/*
865 * Dequeue buffers and place them in the shared communication ring.
866 * Return when no more requests can be accepted or all buffers have
867 * been queued.
868 *
869 * Signal XEN once the ring has been filled out.
870 */
871static void
872xb_startio(struct xb_softc *sc)
873{
874	struct bio		*bp;
875	int			queued = 0;
876	struct blkfront_info *info = sc->xb_info;
877	DPRINTK("");
878
879	mtx_assert(&blkif_io_lock, MA_OWNED);
880
881	while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) {
882
883		if (RING_FULL(&info->ring))
884			goto wait;
885
886		if (blkif_queue_request(bp)) {
887		wait:
888			bioq_insert_head(&sc->xb_bioq, bp);
889			break;
890		}
891		queued++;
892	}
893
894	if (queued != 0)
895		flush_requests(sc->xb_info);
896}
897
898static void
899blkif_int(void *xsc)
900{
901	struct xb_softc *sc = NULL;
902	struct bio *bp;
903	blkif_response_t *bret;
904	RING_IDX i, rp;
905	struct blkfront_info *info = xsc;
906	DPRINTK("");
907
908	TRACE_ENTER;
909
910	mtx_lock(&blkif_io_lock);
911
912	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
913		mtx_unlock(&blkif_io_lock);
914		return;
915	}
916
917 again:
918	rp = info->ring.sring->rsp_prod;
919	rmb(); /* Ensure we see queued responses up to 'rp'. */
920
921	for (i = info->ring.rsp_cons; i != rp; i++) {
922		unsigned long id;
923
924		bret = RING_GET_RESPONSE(&info->ring, i);
925		id   = bret->id;
926		bp   = (struct bio *)info->shadow[id].request;
927
928		blkif_completion(&info->shadow[id]);
929
930		ADD_ID_TO_FREELIST(info, id);
931
932		switch (bret->operation) {
933		case BLKIF_OP_READ:
934			/* had an unaligned buffer that needs to be copied */
935			if (bp->bio_driver1)
936				bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount);
937			/* FALLTHROUGH */
938		case BLKIF_OP_WRITE:
939
940			/* free the copy buffer */
941			if (bp->bio_driver1) {
942				free(bp->bio_driver1, M_DEVBUF);
943				bp->bio_driver1 = NULL;
944			}
945
946			if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
947					printf("Bad return from blkdev data request: %x\n",
948					  bret->status);
949				bp->bio_flags |= BIO_ERROR;
950			}
951
952			sc = (struct xb_softc *)bp->bio_disk->d_drv1;
953
954			if (bp->bio_flags & BIO_ERROR)
955				bp->bio_error = EIO;
956			else
957				bp->bio_resid = 0;
958
959			biodone(bp);
960			break;
961		default:
962			panic("received invalid operation");
963			break;
964		}
965	}
966
967	info->ring.rsp_cons = i;
968
969	if (i != info->ring.req_prod_pvt) {
970		int more_to_do;
971		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
972		if (more_to_do)
973			goto again;
974	} else {
975		info->ring.sring->rsp_event = i + 1;
976	}
977
978	kick_pending_request_queues(info);
979
980	mtx_unlock(&blkif_io_lock);
981}
982
983static void
984blkif_free(struct blkfront_info *info, int suspend)
985{
986
987/* Prevent new requests being issued until we fix things up. */
988	mtx_lock(&blkif_io_lock);
989	info->connected = suspend ?
990		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
991	mtx_unlock(&blkif_io_lock);
992
993	/* Free resources associated with old device channel. */
994	if (info->ring_ref != GRANT_INVALID_REF) {
995		gnttab_end_foreign_access(info->ring_ref,
996					  info->ring.sring);
997		info->ring_ref = GRANT_INVALID_REF;
998		info->ring.sring = NULL;
999	}
1000	if (info->irq)
1001		unbind_from_irqhandler(info->irq);
1002	info->irq = 0;
1003
1004}
1005
1006static void
1007blkif_completion(struct blk_shadow *s)
1008{
1009	int i;
1010
1011	for (i = 0; i < s->req.nr_segments; i++)
1012		gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
1013}
1014
1015static void
1016blkif_recover(struct blkfront_info *info)
1017{
1018	int i, j;
1019	blkif_request_t *req;
1020	struct blk_shadow *copy;
1021
1022	/* Stage 1: Make a safe copy of the shadow state. */
1023	copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
1024	PANIC_IF(copy == NULL);
1025	memcpy(copy, info->shadow, sizeof(info->shadow));
1026
1027	/* Stage 2: Set up free list. */
1028	memset(&info->shadow, 0, sizeof(info->shadow));
1029	for (i = 0; i < BLK_RING_SIZE; i++)
1030		info->shadow[i].req.id = i+1;
1031	info->shadow_free = info->ring.req_prod_pvt;
1032	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
1033
1034	/* Stage 3: Find pending requests and requeue them. */
1035	for (i = 0; i < BLK_RING_SIZE; i++) {
1036		/* Not in use? */
1037		if (copy[i].request == 0)
1038			continue;
1039
1040		/* Grab a request slot and copy shadow state into it. */
1041		req = RING_GET_REQUEST(
1042			&info->ring, info->ring.req_prod_pvt);
1043		*req = copy[i].req;
1044
1045		/* We get a new request id, and must reset the shadow state. */
1046		req->id = GET_ID_FROM_FREELIST(info);
1047		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
1048
1049		/* Rewrite any grant references invalidated by suspend/resume. */
1050		for (j = 0; j < req->nr_segments; j++)
1051			gnttab_grant_foreign_access_ref(
1052				req->seg[j].gref,
1053				xenbus_get_otherend_id(info->xbdev),
1054				pfn_to_mfn(info->shadow[req->id].frame[j]),
1055				0 /* assume not readonly */);
1056
1057		info->shadow[req->id].req = *req;
1058
1059		info->ring.req_prod_pvt++;
1060	}
1061
1062	free(copy, M_DEVBUF);
1063
1064	xenbus_set_state(info->xbdev, XenbusStateConnected);
1065
1066	/* Now safe for us to use the shared ring */
1067	mtx_lock(&blkif_io_lock);
1068	info->connected = BLKIF_STATE_CONNECTED;
1069	mtx_unlock(&blkif_io_lock);
1070
1071	/* Send off requeued requests */
1072	mtx_lock(&blkif_io_lock);
1073	flush_requests(info);
1074
1075	/* Kick any other new requests queued since we resumed */
1076	kick_pending_request_queues(info);
1077	mtx_unlock(&blkif_io_lock);
1078}
1079
1080/* ** Driver registration ** */
1081static device_method_t blkfront_methods[] = {
1082	/* Device interface */
1083	DEVMETHOD(device_probe,         blkfront_probe),
1084	DEVMETHOD(device_attach,        blkfront_attach),
1085	DEVMETHOD(device_detach,        blkfront_detach),
1086	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1087	DEVMETHOD(device_suspend,       bus_generic_suspend),
1088	DEVMETHOD(device_resume,        blkfront_resume),
1089
1090	/* Xenbus interface */
1091	DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed),
1092
1093	{ 0, 0 }
1094};
1095
1096static driver_t blkfront_driver = {
1097	"xbd",
1098	blkfront_methods,
1099	sizeof(struct blkfront_info),
1100};
1101devclass_t blkfront_devclass;
1102
1103DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0);
1104
1105MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */
1106
1107