blkfront.c revision 196661
1181643Skmacy/*-
2181643Skmacy * All rights reserved.
3181643Skmacy *
4181643Skmacy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
5181643Skmacy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
6181643Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
7181643Skmacy * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
8181643Skmacy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9181643Skmacy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
10181643Skmacy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
11181643Skmacy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
12181643Skmacy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
13181643Skmacy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
14181643Skmacy * SUCH DAMAGE.
15181643Skmacy *
16181643Skmacy */
17181643Skmacy
18181643Skmacy/*
19196661Skmacy * XenBSD block device driver
20196661Skmacy *
21196661Skmacy * Copyright (c) 2009 Frank Suchomel, Citrix
22181643Skmacy */
23181643Skmacy
24181643Skmacy#include <sys/cdefs.h>
25181643Skmacy__FBSDID("$FreeBSD: head/sys/dev/xen/blkfront/blkfront.c 196661 2009-08-30 20:45:24Z kmacy $");
26181643Skmacy
27181643Skmacy#include <sys/param.h>
28181643Skmacy#include <sys/systm.h>
29181643Skmacy#include <sys/malloc.h>
30181643Skmacy#include <sys/kernel.h>
31181643Skmacy#include <vm/vm.h>
32181643Skmacy#include <vm/pmap.h>
33181643Skmacy
34181643Skmacy#include <sys/bio.h>
35181643Skmacy#include <sys/bus.h>
36181643Skmacy#include <sys/conf.h>
37181643Skmacy#include <sys/module.h>
38181643Skmacy
39181643Skmacy#include <machine/bus.h>
40181643Skmacy#include <sys/rman.h>
41181643Skmacy#include <machine/resource.h>
42181643Skmacy#include <machine/intr_machdep.h>
43181643Skmacy#include <machine/vmparam.h>
44181643Skmacy
45189699Sdfr#include <machine/xen/xen-os.h>
46189699Sdfr#include <machine/xen/xenfunc.h>
47186557Skmacy#include <xen/hypervisor.h>
48186557Skmacy#include <xen/xen_intr.h>
49186557Skmacy#include <xen/evtchn.h>
50189699Sdfr#include <xen/gnttab.h>
51181643Skmacy#include <xen/interface/grant_table.h>
52185605Skmacy#include <xen/interface/io/protocols.h>
53185605Skmacy#include <xen/xenbus/xenbusvar.h>
54181643Skmacy
55181643Skmacy#include <geom/geom_disk.h>
56181643Skmacy
57181643Skmacy#include <dev/xen/blkfront/block.h>
58181643Skmacy
59185605Skmacy#include "xenbus_if.h"
60185605Skmacy
61181643Skmacy#define    ASSERT(S)       KASSERT(S, (#S))
62181643Skmacy/* prototypes */
63181643Skmacystruct xb_softc;
64181643Skmacystatic void xb_startio(struct xb_softc *sc);
65185605Skmacystatic void connect(device_t, struct blkfront_info *);
66185605Skmacystatic void blkfront_closing(device_t);
67185605Skmacystatic int blkfront_detach(device_t);
68185605Skmacystatic int talk_to_backend(device_t, struct blkfront_info *);
69185605Skmacystatic int setup_blkring(device_t, struct blkfront_info *);
70181643Skmacystatic void blkif_int(void *);
71181643Skmacy#if 0
72181643Skmacystatic void blkif_restart_queue(void *arg);
73181643Skmacy#endif
74181643Skmacystatic void blkif_recover(struct blkfront_info *);
75181643Skmacystatic void blkif_completion(struct blk_shadow *);
76181643Skmacystatic void blkif_free(struct blkfront_info *, int);
77181643Skmacy
78181643Skmacy#define GRANT_INVALID_REF 0
79181643Skmacy#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
80181643Skmacy
81181643SkmacyLIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
82181643Skmacy
83181643Skmacy/* Control whether runtime update of vbds is enabled. */
84181643Skmacy#define ENABLE_VBD_UPDATE 0
85181643Skmacy
86181643Skmacy#if ENABLE_VBD_UPDATE
87181643Skmacystatic void vbd_update(void);
88181643Skmacy#endif
89181643Skmacy
90181643Skmacy
91181643Skmacy#define BLKIF_STATE_DISCONNECTED 0
92181643Skmacy#define BLKIF_STATE_CONNECTED    1
93181643Skmacy#define BLKIF_STATE_SUSPENDED    2
94181643Skmacy
95181643Skmacy#ifdef notyet
96181643Skmacystatic char *blkif_state_name[] = {
97181643Skmacy	[BLKIF_STATE_DISCONNECTED] = "disconnected",
98181643Skmacy	[BLKIF_STATE_CONNECTED]    = "connected",
99181643Skmacy	[BLKIF_STATE_SUSPENDED]    = "closed",
100181643Skmacy};
101181643Skmacy
102181643Skmacystatic char * blkif_status_name[] = {
103181643Skmacy	[BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
104181643Skmacy	[BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
105181643Skmacy	[BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
106181643Skmacy	[BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
107181643Skmacy};
108181643Skmacy#endif
109181643Skmacy#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
110181643Skmacy#if 0
111189699Sdfr#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
112181643Skmacy#else
113181643Skmacy#define DPRINTK(fmt, args...)
114181643Skmacy#endif
115181643Skmacy
116181643Skmacystatic grant_ref_t gref_head;
117181643Skmacy#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
118181643Skmacy    (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
119181643Skmacy
120181643Skmacystatic void kick_pending_request_queues(struct blkfront_info *);
121181643Skmacystatic int blkif_open(struct disk *dp);
122181643Skmacystatic int blkif_close(struct disk *dp);
123181643Skmacystatic int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
124181643Skmacystatic int blkif_queue_request(struct bio *bp);
125181643Skmacystatic void xb_strategy(struct bio *bp);
126181643Skmacy
127196661Skmacy// In order to quiesce the device during kernel dumps, outstanding requests to
128196661Skmacy// DOM0 for disk reads/writes need to be accounted for.
129196661Skmacystatic	int	blkif_queued_requests;
130196661Skmacystatic	int	xb_dump(void *, void *, vm_offset_t, off_t, size_t);
131181643Skmacy
132181643Skmacy
133181643Skmacy/* XXX move to xb_vbd.c when VBD update support is added */
134181643Skmacy#define MAX_VBDS 64
135181643Skmacy
136181643Skmacy#define XBD_SECTOR_SIZE		512	/* XXX: assume for now */
137181643Skmacy#define XBD_SECTOR_SHFT		9
138181643Skmacy
139181643Skmacystatic struct mtx blkif_io_lock;
140181643Skmacy
141181805Skmacystatic vm_paddr_t
142181805Skmacypfn_to_mfn(vm_paddr_t pfn)
143181643Skmacy{
144181643Skmacy	return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
145181643Skmacy}
146181643Skmacy
147185605Skmacy/*
148185605Skmacy * Translate Linux major/minor to an appropriate name and unit
149185605Skmacy * number. For HVM guests, this allows us to use the same drive names
150185605Skmacy * with blkfront as the emulated drives, easing transition slightly.
151185605Skmacy */
152185605Skmacystatic void
153185605Skmacyblkfront_vdevice_to_unit(int vdevice, int *unit, const char **name)
154185605Skmacy{
155185605Skmacy	static struct vdev_info {
156185605Skmacy		int major;
157185605Skmacy		int shift;
158185605Skmacy		int base;
159185605Skmacy		const char *name;
160185605Skmacy	} info[] = {
161185605Skmacy		{3,	6,	0,	"ad"},	/* ide0 */
162185605Skmacy		{22,	6,	2,	"ad"},	/* ide1 */
163185605Skmacy		{33,	6,	4,	"ad"},	/* ide2 */
164185605Skmacy		{34,	6,	6,	"ad"},	/* ide3 */
165185605Skmacy		{56,	6,	8,	"ad"},	/* ide4 */
166185605Skmacy		{57,	6,	10,	"ad"},	/* ide5 */
167185605Skmacy		{88,	6,	12,	"ad"},	/* ide6 */
168185605Skmacy		{89,	6,	14,	"ad"},	/* ide7 */
169185605Skmacy		{90,	6,	16,	"ad"},	/* ide8 */
170185605Skmacy		{91,	6,	18,	"ad"},	/* ide9 */
171185605Skmacy
172185605Skmacy		{8,	4,	0,	"da"},	/* scsi disk0 */
173185605Skmacy		{65,	4,	16,	"da"},	/* scsi disk1 */
174185605Skmacy		{66,	4,	32,	"da"},	/* scsi disk2 */
175185605Skmacy		{67,	4,	48,	"da"},	/* scsi disk3 */
176185605Skmacy		{68,	4,	64,	"da"},	/* scsi disk4 */
177185605Skmacy		{69,	4,	80,	"da"},	/* scsi disk5 */
178185605Skmacy		{70,	4,	96,	"da"},	/* scsi disk6 */
179185605Skmacy		{71,	4,	112,	"da"},	/* scsi disk7 */
180185605Skmacy		{128,	4,	128,	"da"},	/* scsi disk8 */
181185605Skmacy		{129,	4,	144,	"da"},	/* scsi disk9 */
182185605Skmacy		{130,	4,	160,	"da"},	/* scsi disk10 */
183185605Skmacy		{131,	4,	176,	"da"},	/* scsi disk11 */
184185605Skmacy		{132,	4,	192,	"da"},	/* scsi disk12 */
185185605Skmacy		{133,	4,	208,	"da"},	/* scsi disk13 */
186185605Skmacy		{134,	4,	224,	"da"},	/* scsi disk14 */
187185605Skmacy		{135,	4,	240,	"da"},	/* scsi disk15 */
188185605Skmacy
189185605Skmacy		{202,	4,	0,	"xbd"},	/* xbd */
190185605Skmacy
191185605Skmacy		{0,	0,	0,	NULL},
192185605Skmacy	};
193185605Skmacy	int major = vdevice >> 8;
194185605Skmacy	int minor = vdevice & 0xff;
195185605Skmacy	int i;
196185605Skmacy
197185605Skmacy	if (vdevice & (1 << 28)) {
198185605Skmacy		*unit = (vdevice & ((1 << 28) - 1)) >> 8;
199185605Skmacy		*name = "xbd";
200185605Skmacy	}
201185605Skmacy
202185605Skmacy	for (i = 0; info[i].major; i++) {
203185605Skmacy		if (info[i].major == major) {
204185605Skmacy			*unit = info[i].base + (minor >> info[i].shift);
205185605Skmacy			*name = info[i].name;
206185605Skmacy			return;
207185605Skmacy		}
208185605Skmacy	}
209185605Skmacy
210185605Skmacy	*unit = minor >> 4;
211185605Skmacy	*name = "xbd";
212185605Skmacy}
213185605Skmacy
214181643Skmacyint
215185605Skmacyxlvbd_add(device_t dev, blkif_sector_t capacity,
216185605Skmacy    int vdevice, uint16_t vdisk_info, uint16_t sector_size,
217185605Skmacy    struct blkfront_info *info)
218181643Skmacy{
219181643Skmacy	struct xb_softc	*sc;
220185605Skmacy	int	unit, error = 0;
221185605Skmacy	const char *name;
222186557Skmacy
223185605Skmacy	blkfront_vdevice_to_unit(vdevice, &unit, &name);
224182082Skmacy
225181643Skmacy	sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
226185605Skmacy	sc->xb_unit = unit;
227181643Skmacy	sc->xb_info = info;
228181643Skmacy	info->sc = sc;
229181643Skmacy
230185605Skmacy	if (strcmp(name, "xbd"))
231185605Skmacy		device_printf(dev, "attaching as %s%d\n", name, unit);
232185605Skmacy
233181643Skmacy	memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
234181643Skmacy	sc->xb_disk = disk_alloc();
235186557Skmacy	sc->xb_disk->d_unit = sc->xb_unit;
236181643Skmacy	sc->xb_disk->d_open = blkif_open;
237181643Skmacy	sc->xb_disk->d_close = blkif_close;
238181643Skmacy	sc->xb_disk->d_ioctl = blkif_ioctl;
239181643Skmacy	sc->xb_disk->d_strategy = xb_strategy;
240196661Skmacy	sc->xb_disk->d_dump = xb_dump;
241186557Skmacy	sc->xb_disk->d_name = name;
242181643Skmacy	sc->xb_disk->d_drv1 = sc;
243181643Skmacy	sc->xb_disk->d_sectorsize = sector_size;
244181643Skmacy
245181643Skmacy	/* XXX */
246181643Skmacy	sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
247181643Skmacy#if 0
248181643Skmacy	sc->xb_disk->d_maxsize = DFLTPHYS;
249181643Skmacy#else /* XXX: xen can't handle large single i/o requests */
250181643Skmacy	sc->xb_disk->d_maxsize = 4096;
251181643Skmacy#endif
252181643Skmacy#ifdef notyet
253181643Skmacy	XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
254181643Skmacy		  xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
255181643Skmacy		  sc->xb_disk->d_mediasize);
256181643Skmacy#endif
257181643Skmacy	sc->xb_disk->d_flags = 0;
258181643Skmacy	disk_create(sc->xb_disk, DISK_VERSION_00);
259181643Skmacy	bioq_init(&sc->xb_bioq);
260181643Skmacy
261181643Skmacy	return error;
262181643Skmacy}
263181643Skmacy
264181643Skmacyvoid
265181643Skmacyxlvbd_del(struct blkfront_info *info)
266181643Skmacy{
267181643Skmacy	struct xb_softc	*sc;
268181643Skmacy
269181643Skmacy	sc = info->sc;
270181643Skmacy	disk_destroy(sc->xb_disk);
271181643Skmacy}
272181643Skmacy/************************ end VBD support *****************/
273181643Skmacy
274181643Skmacy/*
275181643Skmacy * Read/write routine for a buffer.  Finds the proper unit, place it on
276181643Skmacy * the sortq and kick the controller.
277181643Skmacy */
278181643Skmacystatic void
279181643Skmacyxb_strategy(struct bio *bp)
280181643Skmacy{
281181643Skmacy	struct xb_softc	*sc = (struct xb_softc *)bp->bio_disk->d_drv1;
282181643Skmacy
283181643Skmacy	/* bogus disk? */
284181643Skmacy	if (sc == NULL) {
285181643Skmacy		bp->bio_error = EINVAL;
286181643Skmacy		bp->bio_flags |= BIO_ERROR;
287181643Skmacy		goto bad;
288181643Skmacy	}
289181643Skmacy
290181643Skmacy	DPRINTK("");
291181643Skmacy
292181643Skmacy	/*
293181643Skmacy	 * Place it in the queue of disk activities for this disk
294181643Skmacy	 */
295181643Skmacy	mtx_lock(&blkif_io_lock);
296196661Skmacy
297181643Skmacy	bioq_disksort(&sc->xb_bioq, bp);
298196661Skmacy	xb_startio(sc);
299181643Skmacy
300181643Skmacy	mtx_unlock(&blkif_io_lock);
301181643Skmacy	return;
302181643Skmacy
303181643Skmacy bad:
304181643Skmacy	/*
305181643Skmacy	 * Correctly set the bio to indicate a failed tranfer.
306181643Skmacy	 */
307181643Skmacy	bp->bio_resid = bp->bio_bcount;
308181643Skmacy	biodone(bp);
309181643Skmacy	return;
310181643Skmacy}
311181643Skmacy
312196661Skmacystatic void xb_quiesce(struct blkfront_info *info);
313196661Skmacy// Quiesce the disk writes for a dump file before allowing the next buffer.
314196661Skmacystatic void
315196661Skmacyxb_quiesce(struct blkfront_info *info)
316196661Skmacy{
317196661Skmacy	int		mtd;
318196661Skmacy
319196661Skmacy	// While there are outstanding requests
320196661Skmacy	while (blkif_queued_requests) {
321196661Skmacy		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, mtd);
322196661Skmacy		if (mtd) {
323196661Skmacy			// Recieved request completions, update queue.
324196661Skmacy			blkif_int(info);
325196661Skmacy		}
326196661Skmacy		if (blkif_queued_requests) {
327196661Skmacy			// Still pending requests, wait for the disk i/o to complete
328196661Skmacy			HYPERVISOR_block();
329196661Skmacy		}
330196661Skmacy	}
331196661Skmacy}
332196661Skmacy
333196661Skmacy// Some bio structures for dumping core
334196661Skmacy#define DUMP_BIO_NO 16				// 16 * 4KB = 64KB dump block
335196661Skmacystatic	struct bio		xb_dump_bp[DUMP_BIO_NO];
336196661Skmacy
337196661Skmacy// Kernel dump function for a paravirtualized disk device
338185605Skmacystatic int
339196661Skmacyxb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
340196661Skmacy        size_t length)
341196661Skmacy{
342196661Skmacy			int				 sbp;
343196661Skmacy  			int			     mbp;
344196661Skmacy			size_t			 chunk;
345196661Skmacy	struct	disk   			*dp = arg;
346196661Skmacy	struct	xb_softc		*sc = (struct xb_softc *) dp->d_drv1;
347196661Skmacy	        int	    		 rc = 0;
348196661Skmacy
349196661Skmacy	xb_quiesce(sc->xb_info);		// All quiet on the western front.
350196661Skmacy	if (length > 0) {
351196661Skmacy		// If this lock is held, then this module is failing, and a successful
352196661Skmacy		// kernel dump is highly unlikely anyway.
353196661Skmacy		mtx_lock(&blkif_io_lock);
354196661Skmacy		// Split the 64KB block into 16 4KB blocks
355196661Skmacy		for (sbp=0; length>0 && sbp<DUMP_BIO_NO; sbp++) {
356196661Skmacy			chunk = length > PAGE_SIZE ? PAGE_SIZE : length;
357196661Skmacy			xb_dump_bp[sbp].bio_disk   = dp;
358196661Skmacy			xb_dump_bp[sbp].bio_pblkno = offset / dp->d_sectorsize;
359196661Skmacy			xb_dump_bp[sbp].bio_bcount = chunk;
360196661Skmacy			xb_dump_bp[sbp].bio_resid  = chunk;
361196661Skmacy			xb_dump_bp[sbp].bio_data   = virtual;
362196661Skmacy			xb_dump_bp[sbp].bio_cmd    = BIO_WRITE;
363196661Skmacy			xb_dump_bp[sbp].bio_done   = NULL;
364196661Skmacy
365196661Skmacy			bioq_disksort(&sc->xb_bioq, &xb_dump_bp[sbp]);
366196661Skmacy
367196661Skmacy			length -= chunk;
368196661Skmacy			offset += chunk;
369196661Skmacy			virtual = (char *) virtual + chunk;
370196661Skmacy		}
371196661Skmacy		// Tell DOM0 to do the I/O
372196661Skmacy		xb_startio(sc);
373196661Skmacy		mtx_unlock(&blkif_io_lock);
374196661Skmacy
375196661Skmacy		// Must wait for the completion: the dump routine reuses the same
376196661Skmacy		//                               16 x 4KB buffer space.
377196661Skmacy		xb_quiesce(sc->xb_info);	// All quite on the eastern front
378196661Skmacy		// If there were any errors, bail out...
379196661Skmacy		for (mbp=0; mbp<sbp; mbp++) {
380196661Skmacy			if ((rc = xb_dump_bp[mbp].bio_error)) break;
381196661Skmacy		}
382196661Skmacy	}
383196661Skmacy	return (rc);
384196661Skmacy}
385196661Skmacy
386196661Skmacy
387196661Skmacystatic int
388185605Skmacyblkfront_probe(device_t dev)
389185605Skmacy{
390181643Skmacy
391185605Skmacy	if (!strcmp(xenbus_get_type(dev), "vbd")) {
392185605Skmacy		device_set_desc(dev, "Virtual Block Device");
393185605Skmacy		device_quiet(dev);
394185605Skmacy		return (0);
395185605Skmacy	}
396181643Skmacy
397185605Skmacy	return (ENXIO);
398185605Skmacy}
399185605Skmacy
400185605Skmacy/*
401185605Skmacy * Setup supplies the backend dir, virtual device.  We place an event
402185605Skmacy * channel and shared frame entries.  We watch backend to wait if it's
403185605Skmacy * ok.
404185605Skmacy */
405185605Skmacystatic int
406185605Skmacyblkfront_attach(device_t dev)
407181643Skmacy{
408189699Sdfr	int error, vdevice, i, unit;
409181643Skmacy	struct blkfront_info *info;
410185605Skmacy	const char *name;
411181643Skmacy
412181643Skmacy	/* FIXME: Use dynamic device id if this is not set. */
413189699Sdfr	error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
414186557Skmacy	    "virtual-device", NULL, "%i", &vdevice);
415189699Sdfr	if (error) {
416189699Sdfr		xenbus_dev_fatal(dev, error, "reading virtual-device");
417181805Skmacy		printf("couldn't find virtual device");
418189699Sdfr		return (error);
419181643Skmacy	}
420181643Skmacy
421185605Skmacy	blkfront_vdevice_to_unit(vdevice, &unit, &name);
422185605Skmacy	if (!strcmp(name, "xbd"))
423185605Skmacy		device_set_unit(dev, unit);
424185605Skmacy
425185605Skmacy	info = device_get_softc(dev);
426181643Skmacy
427181643Skmacy	/*
428181643Skmacy	 * XXX debug only
429181643Skmacy	 */
430181643Skmacy	for (i = 0; i < sizeof(*info); i++)
431181643Skmacy			if (((uint8_t *)info)[i] != 0)
432181643Skmacy					panic("non-null memory");
433181643Skmacy
434181643Skmacy	info->shadow_free = 0;
435181643Skmacy	info->xbdev = dev;
436181643Skmacy	info->vdevice = vdevice;
437181643Skmacy	info->connected = BLKIF_STATE_DISCONNECTED;
438181643Skmacy
439181643Skmacy	/* work queue needed ? */
440181643Skmacy	for (i = 0; i < BLK_RING_SIZE; i++)
441181643Skmacy		info->shadow[i].req.id = i+1;
442181643Skmacy	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
443181643Skmacy
444181643Skmacy	/* Front end dir is a number, which is used as the id. */
445185605Skmacy	info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0);
446181643Skmacy
447189699Sdfr	error = talk_to_backend(dev, info);
448189699Sdfr	if (error)
449189699Sdfr		return (error);
450181643Skmacy
451185605Skmacy	return (0);
452181643Skmacy}
453181643Skmacy
454185605Skmacystatic int
455189699Sdfrblkfront_suspend(device_t dev)
456189699Sdfr{
457189699Sdfr	struct blkfront_info *info = device_get_softc(dev);
458189699Sdfr
459189699Sdfr	/* Prevent new requests being issued until we fix things up. */
460189699Sdfr	mtx_lock(&blkif_io_lock);
461189699Sdfr	info->connected = BLKIF_STATE_SUSPENDED;
462189699Sdfr	mtx_unlock(&blkif_io_lock);
463189699Sdfr
464189699Sdfr	return (0);
465189699Sdfr}
466189699Sdfr
467189699Sdfrstatic int
468185605Skmacyblkfront_resume(device_t dev)
469181643Skmacy{
470185605Skmacy	struct blkfront_info *info = device_get_softc(dev);
471181643Skmacy	int err;
472181643Skmacy
473189699Sdfr	DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
474181643Skmacy
475181643Skmacy	blkif_free(info, 1);
476181643Skmacy	err = talk_to_backend(dev, info);
477186557Skmacy	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
478181643Skmacy		blkif_recover(info);
479181643Skmacy
480189699Sdfr	return (err);
481181643Skmacy}
482181643Skmacy
483181643Skmacy/* Common code used when first setting up, and when resuming. */
484185605Skmacystatic int
485185605Skmacytalk_to_backend(device_t dev, struct blkfront_info *info)
486181643Skmacy{
487181643Skmacy	const char *message = NULL;
488181643Skmacy	struct xenbus_transaction xbt;
489181643Skmacy	int err;
490181643Skmacy
491181643Skmacy	/* Create shared ring, alloc event channel. */
492181643Skmacy	err = setup_blkring(dev, info);
493181643Skmacy	if (err)
494181643Skmacy		goto out;
495181643Skmacy
496181643Skmacy again:
497181643Skmacy	err = xenbus_transaction_start(&xbt);
498181643Skmacy	if (err) {
499181643Skmacy		xenbus_dev_fatal(dev, err, "starting transaction");
500181643Skmacy		goto destroy_blkring;
501181643Skmacy	}
502181643Skmacy
503185605Skmacy	err = xenbus_printf(xbt, xenbus_get_node(dev),
504181643Skmacy			    "ring-ref","%u", info->ring_ref);
505181643Skmacy	if (err) {
506181643Skmacy		message = "writing ring-ref";
507181643Skmacy		goto abort_transaction;
508181643Skmacy	}
509185605Skmacy	err = xenbus_printf(xbt, xenbus_get_node(dev),
510181643Skmacy		"event-channel", "%u", irq_to_evtchn_port(info->irq));
511181643Skmacy	if (err) {
512181643Skmacy		message = "writing event-channel";
513181643Skmacy		goto abort_transaction;
514181643Skmacy	}
515185605Skmacy	err = xenbus_printf(xbt, xenbus_get_node(dev),
516185605Skmacy		"protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
517185605Skmacy	if (err) {
518185605Skmacy		message = "writing protocol";
519185605Skmacy		goto abort_transaction;
520185605Skmacy	}
521189699Sdfr
522181643Skmacy	err = xenbus_transaction_end(xbt, 0);
523181643Skmacy	if (err) {
524186557Skmacy		if (err == EAGAIN)
525181643Skmacy			goto again;
526181643Skmacy		xenbus_dev_fatal(dev, err, "completing transaction");
527181643Skmacy		goto destroy_blkring;
528181643Skmacy	}
529185605Skmacy	xenbus_set_state(dev, XenbusStateInitialised);
530181643Skmacy
531181643Skmacy	return 0;
532181643Skmacy
533181643Skmacy abort_transaction:
534181643Skmacy	xenbus_transaction_end(xbt, 1);
535181643Skmacy	if (message)
536181643Skmacy		xenbus_dev_fatal(dev, err, "%s", message);
537181643Skmacy destroy_blkring:
538181643Skmacy	blkif_free(info, 0);
539181643Skmacy out:
540181643Skmacy	return err;
541181643Skmacy}
542181643Skmacy
543181643Skmacystatic int
544185605Skmacysetup_blkring(device_t dev, struct blkfront_info *info)
545181643Skmacy{
546181643Skmacy	blkif_sring_t *sring;
547186557Skmacy	int error;
548181643Skmacy
549181643Skmacy	info->ring_ref = GRANT_INVALID_REF;
550181643Skmacy
551181643Skmacy	sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
552181643Skmacy	if (sring == NULL) {
553181643Skmacy		xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring");
554181643Skmacy		return ENOMEM;
555181643Skmacy	}
556181643Skmacy	SHARED_RING_INIT(sring);
557181643Skmacy	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
558181643Skmacy
559189699Sdfr	error = xenbus_grant_ring(dev,
560189699Sdfr	    (vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref);
561186557Skmacy	if (error) {
562181643Skmacy		free(sring, M_DEVBUF);
563181643Skmacy		info->ring.sring = NULL;
564181643Skmacy		goto fail;
565181643Skmacy	}
566181643Skmacy
567186557Skmacy	error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
568189699Sdfr	    "xbd", (driver_intr_t *)blkif_int, info,
569189699Sdfr	    INTR_TYPE_BIO | INTR_MPSAFE, &info->irq);
570186557Skmacy	if (error) {
571186557Skmacy		xenbus_dev_fatal(dev, error,
572189699Sdfr		    "bind_evtchn_to_irqhandler failed");
573181643Skmacy		goto fail;
574181643Skmacy	}
575181643Skmacy
576186557Skmacy	return (0);
577181643Skmacy fail:
578181643Skmacy	blkif_free(info, 0);
579186557Skmacy	return (error);
580181643Skmacy}
581181643Skmacy
582181643Skmacy
583181643Skmacy/**
584181643Skmacy * Callback received when the backend's state changes.
585181643Skmacy */
586194130Skmacystatic int
587185605Skmacyblkfront_backend_changed(device_t dev, XenbusState backend_state)
588181643Skmacy{
589185605Skmacy	struct blkfront_info *info = device_get_softc(dev);
590181643Skmacy
591189699Sdfr	DPRINTK("backend_state=%d\n", backend_state);
592181643Skmacy
593181643Skmacy	switch (backend_state) {
594181643Skmacy	case XenbusStateUnknown:
595181643Skmacy	case XenbusStateInitialising:
596181643Skmacy	case XenbusStateInitWait:
597181643Skmacy	case XenbusStateInitialised:
598181643Skmacy	case XenbusStateClosed:
599183375Skmacy	case XenbusStateReconfigured:
600183375Skmacy	case XenbusStateReconfiguring:
601181643Skmacy		break;
602181643Skmacy
603181643Skmacy	case XenbusStateConnected:
604185605Skmacy		connect(dev, info);
605181643Skmacy		break;
606181643Skmacy
607181643Skmacy	case XenbusStateClosing:
608181643Skmacy		if (info->users > 0)
609181643Skmacy			xenbus_dev_error(dev, -EBUSY,
610181643Skmacy					 "Device in use; refusing to close");
611181643Skmacy		else
612181643Skmacy			blkfront_closing(dev);
613181643Skmacy#ifdef notyet
614181643Skmacy		bd = bdget(info->dev);
615181643Skmacy		if (bd == NULL)
616181643Skmacy			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
617181643Skmacy
618181643Skmacy		down(&bd->bd_sem);
619181643Skmacy		if (info->users > 0)
620181643Skmacy			xenbus_dev_error(dev, -EBUSY,
621181643Skmacy					 "Device in use; refusing to close");
622181643Skmacy		else
623181643Skmacy			blkfront_closing(dev);
624181643Skmacy		up(&bd->bd_sem);
625181643Skmacy		bdput(bd);
626181643Skmacy#endif
627181643Skmacy	}
628194130Skmacy
629194130Skmacy	return (0);
630181643Skmacy}
631181643Skmacy
632181643Skmacy/*
633181643Skmacy** Invoked when the backend is finally 'ready' (and has told produced
634181643Skmacy** the details about the physical device - #sectors, size, etc).
635181643Skmacy*/
636181643Skmacystatic void
637185605Skmacyconnect(device_t dev, struct blkfront_info *info)
638181643Skmacy{
639181643Skmacy	unsigned long sectors, sector_size;
640181643Skmacy	unsigned int binfo;
641181643Skmacy	int err;
642181643Skmacy
643181643Skmacy        if( (info->connected == BLKIF_STATE_CONNECTED) ||
644181643Skmacy	    (info->connected == BLKIF_STATE_SUSPENDED) )
645181643Skmacy		return;
646181643Skmacy
647185605Skmacy	DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
648181643Skmacy
649185605Skmacy	err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
650181643Skmacy			    "sectors", "%lu", &sectors,
651181643Skmacy			    "info", "%u", &binfo,
652181643Skmacy			    "sector-size", "%lu", &sector_size,
653181643Skmacy			    NULL);
654181643Skmacy	if (err) {
655185605Skmacy		xenbus_dev_fatal(dev, err,
656185605Skmacy		    "reading backend fields at %s",
657185605Skmacy		    xenbus_get_otherend_path(dev));
658181643Skmacy		return;
659181643Skmacy	}
660185605Skmacy	err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
661181643Skmacy			    "feature-barrier", "%lu", &info->feature_barrier,
662181643Skmacy			    NULL);
663181643Skmacy	if (err)
664181643Skmacy		info->feature_barrier = 0;
665181643Skmacy
666185605Skmacy	device_printf(dev, "%juMB <%s> at %s",
667185605Skmacy	    (uintmax_t) sectors / (1048576 / sector_size),
668185605Skmacy	    device_get_desc(dev),
669185605Skmacy	    xenbus_get_node(dev));
670185605Skmacy	bus_print_child_footer(device_get_parent(dev), dev);
671181643Skmacy
672185605Skmacy	xlvbd_add(dev, sectors, info->vdevice, binfo, sector_size, info);
673181643Skmacy
674185605Skmacy	(void)xenbus_set_state(dev, XenbusStateConnected);
675185605Skmacy
676181643Skmacy	/* Kick pending requests. */
677181643Skmacy	mtx_lock(&blkif_io_lock);
678181643Skmacy	info->connected = BLKIF_STATE_CONNECTED;
679181643Skmacy	kick_pending_request_queues(info);
680181643Skmacy	mtx_unlock(&blkif_io_lock);
681181914Skmacy	info->is_ready = 1;
682181914Skmacy
683181643Skmacy#if 0
684181643Skmacy	add_disk(info->gd);
685181643Skmacy#endif
686181643Skmacy}
687181643Skmacy
688181643Skmacy/**
689181643Skmacy * Handle the change of state of the backend to Closing.  We must delete our
690181643Skmacy * device-layer structures now, to ensure that writes are flushed through to
691181643Skmacy * the backend.  Once is this done, we can switch to Closed in
692181643Skmacy * acknowledgement.
693181643Skmacy */
694185605Skmacystatic void
695185605Skmacyblkfront_closing(device_t dev)
696181643Skmacy{
697185605Skmacy	struct blkfront_info *info = device_get_softc(dev);
698181643Skmacy
699185605Skmacy	DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev));
700181643Skmacy
701181643Skmacy	if (info->mi) {
702181643Skmacy		DPRINTK("Calling xlvbd_del\n");
703181643Skmacy		xlvbd_del(info);
704181643Skmacy		info->mi = NULL;
705181643Skmacy	}
706181643Skmacy
707185605Skmacy	xenbus_set_state(dev, XenbusStateClosed);
708181643Skmacy}
709181643Skmacy
710181643Skmacy
711185605Skmacystatic int
712185605Skmacyblkfront_detach(device_t dev)
713181643Skmacy{
714185605Skmacy	struct blkfront_info *info = device_get_softc(dev);
715181643Skmacy
716185605Skmacy	DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev));
717181643Skmacy
718181643Skmacy	blkif_free(info, 0);
719181643Skmacy
720181643Skmacy	return 0;
721181643Skmacy}
722181643Skmacy
723181643Skmacy
724181643Skmacystatic inline int
725181643SkmacyGET_ID_FROM_FREELIST(struct blkfront_info *info)
726181643Skmacy{
727181643Skmacy	unsigned long nfree = info->shadow_free;
728181643Skmacy
729181643Skmacy	KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree));
730181643Skmacy	info->shadow_free = info->shadow[nfree].req.id;
731181643Skmacy	info->shadow[nfree].req.id = 0x0fffffee; /* debug */
732196661Skmacy	atomic_add_int(&blkif_queued_requests, 1);
733181643Skmacy	return nfree;
734181643Skmacy}
735181643Skmacy
736181643Skmacystatic inline void
737181643SkmacyADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id)
738181643Skmacy{
739181643Skmacy	info->shadow[id].req.id  = info->shadow_free;
740181643Skmacy	info->shadow[id].request = 0;
741181643Skmacy	info->shadow_free = id;
742196661Skmacy	atomic_subtract_int(&blkif_queued_requests, 1);
743181643Skmacy}
744181643Skmacy
745181643Skmacystatic inline void
746181643Skmacyflush_requests(struct blkfront_info *info)
747181643Skmacy{
748181643Skmacy	int notify;
749181643Skmacy
750181643Skmacy	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
751181643Skmacy
752181643Skmacy	if (notify)
753181643Skmacy		notify_remote_via_irq(info->irq);
754181643Skmacy}
755181643Skmacy
756181643Skmacystatic void
757181643Skmacykick_pending_request_queues(struct blkfront_info *info)
758181643Skmacy{
759181643Skmacy	/* XXX check if we can't simplify */
760181643Skmacy#if 0
761181643Skmacy	if (!RING_FULL(&info->ring)) {
762181643Skmacy		/* Re-enable calldowns. */
763181643Skmacy		blk_start_queue(info->rq);
764181643Skmacy		/* Kick things off immediately. */
765181643Skmacy		do_blkif_request(info->rq);
766181643Skmacy	}
767181643Skmacy#endif
768181643Skmacy	if (!RING_FULL(&info->ring)) {
769181643Skmacy#if 0
770181643Skmacy		sc = LIST_FIRST(&xbsl_head);
771181643Skmacy		LIST_REMOVE(sc, entry);
772181643Skmacy		/* Re-enable calldowns. */
773181643Skmacy		blk_start_queue(di->rq);
774181643Skmacy#endif
775181643Skmacy		/* Kick things off immediately. */
776181643Skmacy		xb_startio(info->sc);
777181643Skmacy	}
778181643Skmacy}
779181643Skmacy
780181643Skmacy#if 0
781181643Skmacy/* XXX */
782181643Skmacystatic void blkif_restart_queue(void *arg)
783181643Skmacy{
784181643Skmacy	struct blkfront_info *info = (struct blkfront_info *)arg;
785181643Skmacy
786181643Skmacy	mtx_lock(&blkif_io_lock);
787181643Skmacy	kick_pending_request_queues(info);
788181643Skmacy	mtx_unlock(&blkif_io_lock);
789181643Skmacy}
790181643Skmacy#endif
791181643Skmacy
792181643Skmacystatic void blkif_restart_queue_callback(void *arg)
793181643Skmacy{
794181643Skmacy#if 0
795181643Skmacy	struct blkfront_info *info = (struct blkfront_info *)arg;
796181643Skmacy	/* XXX BSD equiv ? */
797181643Skmacy
798181643Skmacy	schedule_work(&info->work);
799181643Skmacy#endif
800181643Skmacy}
801181643Skmacy
802181643Skmacystatic int
803181643Skmacyblkif_open(struct disk *dp)
804181643Skmacy{
805181643Skmacy	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
806181643Skmacy
807181643Skmacy	if (sc == NULL) {
808189699Sdfr		printf("xb%d: not found", sc->xb_unit);
809181643Skmacy		return (ENXIO);
810181643Skmacy	}
811181643Skmacy
812181643Skmacy	sc->xb_flags |= XB_OPEN;
813181643Skmacy	sc->xb_info->users++;
814181643Skmacy	return (0);
815181643Skmacy}
816181643Skmacy
817181643Skmacystatic int
818181643Skmacyblkif_close(struct disk *dp)
819181643Skmacy{
820181643Skmacy	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
821181643Skmacy
822181643Skmacy	if (sc == NULL)
823181643Skmacy		return (ENXIO);
824181643Skmacy	sc->xb_flags &= ~XB_OPEN;
825181643Skmacy	if (--(sc->xb_info->users) == 0) {
826181643Skmacy		/* Check whether we have been instructed to close.  We will
827181643Skmacy		   have ignored this request initially, as the device was
828181643Skmacy		   still mounted. */
829185605Skmacy		device_t dev = sc->xb_info->xbdev;
830185605Skmacy		XenbusState state =
831185605Skmacy			xenbus_read_driver_state(xenbus_get_otherend_path(dev));
832181643Skmacy
833181643Skmacy		if (state == XenbusStateClosing)
834181643Skmacy			blkfront_closing(dev);
835181643Skmacy	}
836181643Skmacy	return (0);
837181643Skmacy}
838181643Skmacy
839181643Skmacystatic int
840181643Skmacyblkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
841181643Skmacy{
842181643Skmacy	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
843181643Skmacy
844181643Skmacy	if (sc == NULL)
845181643Skmacy		return (ENXIO);
846181643Skmacy
847181643Skmacy	return (ENOTTY);
848181643Skmacy}
849181643Skmacy
850181643Skmacy
851181643Skmacy/*
852181643Skmacy * blkif_queue_request
853181643Skmacy *
854181643Skmacy * request block io
855181643Skmacy *
856181643Skmacy * id: for guest use only.
857181643Skmacy * operation: BLKIF_OP_{READ,WRITE,PROBE}
858181643Skmacy * buffer: buffer to read/write into. this should be a
859181643Skmacy *   virtual address in the guest os.
860181643Skmacy */
861181643Skmacystatic int blkif_queue_request(struct bio *bp)
862181643Skmacy{
863181643Skmacy	caddr_t alignbuf;
864181805Skmacy	vm_paddr_t buffer_ma;
865181643Skmacy	blkif_request_t     *ring_req;
866181643Skmacy	unsigned long id;
867181805Skmacy	uint64_t fsect, lsect;
868181643Skmacy	struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
869181643Skmacy	struct blkfront_info *info = sc->xb_info;
870181643Skmacy	int ref;
871181643Skmacy
872181643Skmacy	if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED))
873181643Skmacy		return 1;
874181643Skmacy
875181643Skmacy	if (gnttab_alloc_grant_references(
876181643Skmacy		    BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
877181643Skmacy		gnttab_request_free_callback(
878181643Skmacy			&info->callback,
879181643Skmacy			blkif_restart_queue_callback,
880181643Skmacy			info,
881181643Skmacy			BLKIF_MAX_SEGMENTS_PER_REQUEST);
882181643Skmacy		return 1;
883181643Skmacy	}
884181643Skmacy
885181643Skmacy	/* Check if the buffer is properly aligned */
886181643Skmacy	if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
887181643Skmacy		int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE :
888181643Skmacy			PAGE_SIZE;
889181643Skmacy		caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF,
890181643Skmacy					M_NOWAIT);
891181643Skmacy
892181643Skmacy		alignbuf = (char *)roundup2((u_long)newbuf, align);
893181643Skmacy
894181643Skmacy		/* save a copy of the current buffer */
895181643Skmacy		bp->bio_driver1 = newbuf;
896181643Skmacy		bp->bio_driver2 = alignbuf;
897181643Skmacy
898181643Skmacy		/* Copy the data for a write */
899181643Skmacy		if (bp->bio_cmd == BIO_WRITE)
900181643Skmacy			bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
901181643Skmacy	} else
902181643Skmacy		alignbuf = bp->bio_data;
903181643Skmacy
904181643Skmacy	/* Fill out a communications ring structure. */
905181643Skmacy	ring_req 	         = RING_GET_REQUEST(&info->ring,
906181643Skmacy						    info->ring.req_prod_pvt);
907181643Skmacy	id		         = GET_ID_FROM_FREELIST(info);
908181643Skmacy	info->shadow[id].request = (unsigned long)bp;
909181643Skmacy
910181643Skmacy	ring_req->id 	         = id;
911181643Skmacy	ring_req->operation 	 = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
912181643Skmacy		BLKIF_OP_WRITE;
913181643Skmacy
914181643Skmacy	ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno;
915181643Skmacy	ring_req->handle 	  = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
916181643Skmacy
917181643Skmacy	ring_req->nr_segments  = 0;	/* XXX not doing scatter/gather since buffer
918181643Skmacy					 * chaining is not supported.
919181643Skmacy					 */
920181643Skmacy
921181643Skmacy	buffer_ma = vtomach(alignbuf);
922181643Skmacy	fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
923181643Skmacy	lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
924181643Skmacy	/* install a grant reference. */
925181643Skmacy	ref = gnttab_claim_grant_reference(&gref_head);
926181643Skmacy	KASSERT( ref != -ENOSPC, ("grant_reference failed") );
927181643Skmacy
928181643Skmacy	gnttab_grant_foreign_access_ref(
929181643Skmacy		ref,
930185605Skmacy		xenbus_get_otherend_id(info->xbdev),
931181643Skmacy		buffer_ma >> PAGE_SHIFT,
932181643Skmacy		ring_req->operation & 1 ); /* ??? */
933181643Skmacy	info->shadow[id].frame[ring_req->nr_segments] =
934181643Skmacy		buffer_ma >> PAGE_SHIFT;
935181643Skmacy
936181643Skmacy	ring_req->seg[ring_req->nr_segments] =
937181643Skmacy		(struct blkif_request_segment) {
938181643Skmacy			.gref       = ref,
939181643Skmacy			.first_sect = fsect,
940181643Skmacy			.last_sect  = lsect };
941181643Skmacy
942181643Skmacy	ring_req->nr_segments++;
943181643Skmacy	KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
944181643Skmacy		("XEN buffer must be sector aligned"));
945181643Skmacy	KASSERT(lsect <= 7,
946181643Skmacy		("XEN disk driver data cannot cross a page boundary"));
947181643Skmacy
948181643Skmacy	buffer_ma &= ~PAGE_MASK;
949181643Skmacy
950181643Skmacy	info->ring.req_prod_pvt++;
951181643Skmacy
952181643Skmacy	/* Keep a private copy so we can reissue requests when recovering. */
953181643Skmacy	info->shadow[id].req = *ring_req;
954181643Skmacy
955181643Skmacy	gnttab_free_grant_references(gref_head);
956181643Skmacy
957181643Skmacy	return 0;
958181643Skmacy}
959181643Skmacy
960181643Skmacy
961181643Skmacy
962181643Skmacy/*
963181643Skmacy * Dequeue buffers and place them in the shared communication ring.
964181643Skmacy * Return when no more requests can be accepted or all buffers have
965181643Skmacy * been queued.
966181643Skmacy *
967181643Skmacy * Signal XEN once the ring has been filled out.
968181643Skmacy */
969181643Skmacystatic void
970181643Skmacyxb_startio(struct xb_softc *sc)
971181643Skmacy{
972181643Skmacy	struct bio		*bp;
973181643Skmacy	int			queued = 0;
974181643Skmacy	struct blkfront_info *info = sc->xb_info;
975181643Skmacy	DPRINTK("");
976181643Skmacy
977181643Skmacy	mtx_assert(&blkif_io_lock, MA_OWNED);
978181643Skmacy
979181643Skmacy	while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) {
980181643Skmacy
981181643Skmacy		if (RING_FULL(&info->ring))
982181643Skmacy			goto wait;
983181643Skmacy
984181643Skmacy		if (blkif_queue_request(bp)) {
985181643Skmacy		wait:
986181643Skmacy			bioq_insert_head(&sc->xb_bioq, bp);
987181643Skmacy			break;
988181643Skmacy		}
989181643Skmacy		queued++;
990181643Skmacy	}
991181643Skmacy
992181643Skmacy	if (queued != 0)
993181643Skmacy		flush_requests(sc->xb_info);
994181643Skmacy}
995181643Skmacy
996181643Skmacystatic void
997181643Skmacyblkif_int(void *xsc)
998181643Skmacy{
999181643Skmacy	struct xb_softc *sc = NULL;
1000181643Skmacy	struct bio *bp;
1001181643Skmacy	blkif_response_t *bret;
1002181643Skmacy	RING_IDX i, rp;
1003181643Skmacy	struct blkfront_info *info = xsc;
1004181643Skmacy	DPRINTK("");
1005181643Skmacy
1006181643Skmacy	TRACE_ENTER;
1007181643Skmacy
1008181643Skmacy	mtx_lock(&blkif_io_lock);
1009181643Skmacy
1010181643Skmacy	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
1011181643Skmacy		mtx_unlock(&blkif_io_lock);
1012181643Skmacy		return;
1013181643Skmacy	}
1014181643Skmacy
1015181643Skmacy again:
1016181643Skmacy	rp = info->ring.sring->rsp_prod;
1017181643Skmacy	rmb(); /* Ensure we see queued responses up to 'rp'. */
1018181643Skmacy
1019181643Skmacy	for (i = info->ring.rsp_cons; i != rp; i++) {
1020181643Skmacy		unsigned long id;
1021181643Skmacy
1022181643Skmacy		bret = RING_GET_RESPONSE(&info->ring, i);
1023181643Skmacy		id   = bret->id;
1024181643Skmacy		bp   = (struct bio *)info->shadow[id].request;
1025181643Skmacy
1026181643Skmacy		blkif_completion(&info->shadow[id]);
1027181643Skmacy
1028181643Skmacy		ADD_ID_TO_FREELIST(info, id);
1029181643Skmacy
1030181643Skmacy		switch (bret->operation) {
1031181643Skmacy		case BLKIF_OP_READ:
1032181643Skmacy			/* had an unaligned buffer that needs to be copied */
1033181643Skmacy			if (bp->bio_driver1)
1034181643Skmacy				bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount);
1035181643Skmacy			/* FALLTHROUGH */
1036181643Skmacy		case BLKIF_OP_WRITE:
1037181643Skmacy
1038181643Skmacy			/* free the copy buffer */
1039181643Skmacy			if (bp->bio_driver1) {
1040181643Skmacy				free(bp->bio_driver1, M_DEVBUF);
1041181643Skmacy				bp->bio_driver1 = NULL;
1042181643Skmacy			}
1043181643Skmacy
1044181643Skmacy			if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
1045181805Skmacy					printf("Bad return from blkdev data request: %x\n",
1046181643Skmacy					  bret->status);
1047181643Skmacy				bp->bio_flags |= BIO_ERROR;
1048181643Skmacy			}
1049181643Skmacy
1050181643Skmacy			sc = (struct xb_softc *)bp->bio_disk->d_drv1;
1051181643Skmacy
1052181643Skmacy			if (bp->bio_flags & BIO_ERROR)
1053181643Skmacy				bp->bio_error = EIO;
1054181643Skmacy			else
1055181643Skmacy				bp->bio_resid = 0;
1056181643Skmacy
1057181643Skmacy			biodone(bp);
1058181643Skmacy			break;
1059181643Skmacy		default:
1060181643Skmacy			panic("received invalid operation");
1061181643Skmacy			break;
1062181643Skmacy		}
1063181643Skmacy	}
1064181643Skmacy
1065181643Skmacy	info->ring.rsp_cons = i;
1066181643Skmacy
1067181643Skmacy	if (i != info->ring.req_prod_pvt) {
1068181643Skmacy		int more_to_do;
1069181643Skmacy		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
1070181643Skmacy		if (more_to_do)
1071181643Skmacy			goto again;
1072181643Skmacy	} else {
1073181643Skmacy		info->ring.sring->rsp_event = i + 1;
1074181643Skmacy	}
1075181643Skmacy
1076181643Skmacy	kick_pending_request_queues(info);
1077181643Skmacy
1078181643Skmacy	mtx_unlock(&blkif_io_lock);
1079181643Skmacy}
1080181643Skmacy
1081181643Skmacystatic void
1082181643Skmacyblkif_free(struct blkfront_info *info, int suspend)
1083181643Skmacy{
1084181643Skmacy
1085181643Skmacy/* Prevent new requests being issued until we fix things up. */
1086181643Skmacy	mtx_lock(&blkif_io_lock);
1087181643Skmacy	info->connected = suspend ?
1088181643Skmacy		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
1089181643Skmacy	mtx_unlock(&blkif_io_lock);
1090181643Skmacy
1091181643Skmacy	/* Free resources associated with old device channel. */
1092181643Skmacy	if (info->ring_ref != GRANT_INVALID_REF) {
1093183375Skmacy		gnttab_end_foreign_access(info->ring_ref,
1094181643Skmacy					  info->ring.sring);
1095181643Skmacy		info->ring_ref = GRANT_INVALID_REF;
1096181643Skmacy		info->ring.sring = NULL;
1097181643Skmacy	}
1098181643Skmacy	if (info->irq)
1099186557Skmacy		unbind_from_irqhandler(info->irq);
1100181643Skmacy	info->irq = 0;
1101181643Skmacy
1102181643Skmacy}
1103181643Skmacy
1104181643Skmacystatic void
1105181643Skmacyblkif_completion(struct blk_shadow *s)
1106181643Skmacy{
1107181643Skmacy	int i;
1108181643Skmacy
1109181643Skmacy	for (i = 0; i < s->req.nr_segments; i++)
1110183375Skmacy		gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
1111181643Skmacy}
1112181643Skmacy
1113181643Skmacystatic void
1114181643Skmacyblkif_recover(struct blkfront_info *info)
1115181643Skmacy{
1116181643Skmacy	int i, j;
1117181643Skmacy	blkif_request_t *req;
1118181643Skmacy	struct blk_shadow *copy;
1119181643Skmacy
1120189699Sdfr	if (!info->sc)
1121189699Sdfr		return;
1122189699Sdfr
1123181643Skmacy	/* Stage 1: Make a safe copy of the shadow state. */
1124181643Skmacy	copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
1125181643Skmacy	memcpy(copy, info->shadow, sizeof(info->shadow));
1126181643Skmacy
1127181643Skmacy	/* Stage 2: Set up free list. */
1128181643Skmacy	memset(&info->shadow, 0, sizeof(info->shadow));
1129181643Skmacy	for (i = 0; i < BLK_RING_SIZE; i++)
1130181643Skmacy		info->shadow[i].req.id = i+1;
1131181643Skmacy	info->shadow_free = info->ring.req_prod_pvt;
1132181643Skmacy	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
1133181643Skmacy
1134181643Skmacy	/* Stage 3: Find pending requests and requeue them. */
1135181643Skmacy	for (i = 0; i < BLK_RING_SIZE; i++) {
1136181643Skmacy		/* Not in use? */
1137181643Skmacy		if (copy[i].request == 0)
1138181643Skmacy			continue;
1139181643Skmacy
1140181643Skmacy		/* Grab a request slot and copy shadow state into it. */
1141181643Skmacy		req = RING_GET_REQUEST(
1142181643Skmacy			&info->ring, info->ring.req_prod_pvt);
1143181643Skmacy		*req = copy[i].req;
1144181643Skmacy
1145181643Skmacy		/* We get a new request id, and must reset the shadow state. */
1146181643Skmacy		req->id = GET_ID_FROM_FREELIST(info);
1147181643Skmacy		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
1148181643Skmacy
1149181643Skmacy		/* Rewrite any grant references invalidated by suspend/resume. */
1150181643Skmacy		for (j = 0; j < req->nr_segments; j++)
1151181643Skmacy			gnttab_grant_foreign_access_ref(
1152181643Skmacy				req->seg[j].gref,
1153185605Skmacy				xenbus_get_otherend_id(info->xbdev),
1154181643Skmacy				pfn_to_mfn(info->shadow[req->id].frame[j]),
1155181643Skmacy				0 /* assume not readonly */);
1156181643Skmacy
1157181643Skmacy		info->shadow[req->id].req = *req;
1158181643Skmacy
1159181643Skmacy		info->ring.req_prod_pvt++;
1160181643Skmacy	}
1161181643Skmacy
1162181643Skmacy	free(copy, M_DEVBUF);
1163181643Skmacy
1164185605Skmacy	xenbus_set_state(info->xbdev, XenbusStateConnected);
1165181643Skmacy
1166181643Skmacy	/* Now safe for us to use the shared ring */
1167181643Skmacy	mtx_lock(&blkif_io_lock);
1168181643Skmacy	info->connected = BLKIF_STATE_CONNECTED;
1169181643Skmacy	mtx_unlock(&blkif_io_lock);
1170181643Skmacy
1171181643Skmacy	/* Send off requeued requests */
1172181643Skmacy	mtx_lock(&blkif_io_lock);
1173181643Skmacy	flush_requests(info);
1174181643Skmacy
1175181643Skmacy	/* Kick any other new requests queued since we resumed */
1176181643Skmacy	kick_pending_request_queues(info);
1177181643Skmacy	mtx_unlock(&blkif_io_lock);
1178181643Skmacy}
1179181643Skmacy
1180185605Skmacy/* ** Driver registration ** */
1181185605Skmacystatic device_method_t blkfront_methods[] = {
1182185605Skmacy	/* Device interface */
1183185605Skmacy	DEVMETHOD(device_probe,         blkfront_probe),
1184185605Skmacy	DEVMETHOD(device_attach,        blkfront_attach),
1185185605Skmacy	DEVMETHOD(device_detach,        blkfront_detach),
1186185605Skmacy	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1187189699Sdfr	DEVMETHOD(device_suspend,       blkfront_suspend),
1188185605Skmacy	DEVMETHOD(device_resume,        blkfront_resume),
1189185605Skmacy
1190185605Skmacy	/* Xenbus interface */
1191185605Skmacy	DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed),
1192181643Skmacy
1193185605Skmacy	{ 0, 0 }
1194185605Skmacy};
1195181643Skmacy
1196185605Skmacystatic driver_t blkfront_driver = {
1197185605Skmacy	"xbd",
1198185605Skmacy	blkfront_methods,
1199185605Skmacy	sizeof(struct blkfront_info),
1200185605Skmacy};
1201185605Skmacydevclass_t blkfront_devclass;
1202185605Skmacy
1203185605SkmacyDRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0);
1204181643Skmacy
1205181643SkmacyMTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */
1206181643Skmacy
1207