blkfront.c revision 181914
1/*-
2 * All rights reserved.
3 *
4 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
5 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
6 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
7 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
8 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
10 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
11 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
12 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
13 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
14 * SUCH DAMAGE.
15 *
16 */
17
18/*
19 * XenoBSD block device driver
20 */
21
22#include <sys/cdefs.h>
23__FBSDID("$FreeBSD: head/sys/dev/xen/blkfront/blkfront.c 181914 2008-08-20 09:22:37Z kmacy $");
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/malloc.h>
28#include <sys/kernel.h>
29#include <vm/vm.h>
30#include <vm/pmap.h>
31
32#include <sys/bio.h>
33#include <sys/bus.h>
34#include <sys/conf.h>
35#include <sys/module.h>
36
37#include <machine/bus.h>
38#include <sys/rman.h>
39#include <machine/resource.h>
40#include <machine/intr_machdep.h>
41#include <machine/vmparam.h>
42
43#include <machine/xen/hypervisor.h>
44#include <machine/xen/xen-os.h>
45#include <machine/xen/xen_intr.h>
46#include <machine/xen/xenbus.h>
47#include <machine/xen/evtchn.h>
48#include <xen/interface/grant_table.h>
49
50#include <geom/geom_disk.h>
51#include <machine/xen/xenfunc.h>
52#include <xen/gnttab.h>
53
54#include <dev/xen/blkfront/block.h>
55
56#define    ASSERT(S)       KASSERT(S, (#S))
57/* prototypes */
58struct xb_softc;
59static void xb_startio(struct xb_softc *sc);
60static void connect(struct blkfront_info *);
61static void blkfront_closing(struct xenbus_device *);
62static int blkfront_remove(struct xenbus_device *);
63static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
64static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
65static void blkif_int(void *);
66#if 0
67static void blkif_restart_queue(void *arg);
68#endif
69static void blkif_recover(struct blkfront_info *);
70static void blkif_completion(struct blk_shadow *);
71static void blkif_free(struct blkfront_info *, int);
72
73#define GRANT_INVALID_REF 0
74#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
75
76LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
77
78/* Control whether runtime update of vbds is enabled. */
79#define ENABLE_VBD_UPDATE 0
80
81#if ENABLE_VBD_UPDATE
82static void vbd_update(void);
83#endif
84
85
86#define BLKIF_STATE_DISCONNECTED 0
87#define BLKIF_STATE_CONNECTED    1
88#define BLKIF_STATE_SUSPENDED    2
89
90#ifdef notyet
91static char *blkif_state_name[] = {
92	[BLKIF_STATE_DISCONNECTED] = "disconnected",
93	[BLKIF_STATE_CONNECTED]    = "connected",
94	[BLKIF_STATE_SUSPENDED]    = "closed",
95};
96
97static char * blkif_status_name[] = {
98	[BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
99	[BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
100	[BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
101	[BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
102};
103#endif
104#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
105#if 0
106#define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, __LINE__,##args)
107#else
108#define DPRINTK(fmt, args...)
109#endif
110
111static grant_ref_t gref_head;
112#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
113    (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
114
115static void kick_pending_request_queues(struct blkfront_info *);
116static int blkif_open(struct disk *dp);
117static int blkif_close(struct disk *dp);
118static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
119static int blkif_queue_request(struct bio *bp);
120static void xb_strategy(struct bio *bp);
121
122
123
124/* XXX move to xb_vbd.c when VBD update support is added */
125#define MAX_VBDS 64
126
127#define XBD_SECTOR_SIZE		512	/* XXX: assume for now */
128#define XBD_SECTOR_SHFT		9
129
130static struct mtx blkif_io_lock;
131
132static vm_paddr_t
133pfn_to_mfn(vm_paddr_t pfn)
134{
135	return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
136}
137
138
139int
140xlvbd_add(blkif_sector_t capacity, int unit, uint16_t vdisk_info, uint16_t sector_size,
141	  struct blkfront_info *info)
142{
143	struct xb_softc	*sc;
144	int			error = 0;
145
146	sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
147	sc->xb_unit = unit;
148	sc->xb_info = info;
149	info->sc = sc;
150
151	memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
152	sc->xb_disk = disk_alloc();
153	sc->xb_disk->d_unit = unit;
154	sc->xb_disk->d_open = blkif_open;
155	sc->xb_disk->d_close = blkif_close;
156	sc->xb_disk->d_ioctl = blkif_ioctl;
157	sc->xb_disk->d_strategy = xb_strategy;
158	sc->xb_disk->d_name = "xbd";
159	sc->xb_disk->d_drv1 = sc;
160	sc->xb_disk->d_sectorsize = sector_size;
161
162	/* XXX */
163	sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
164#if 0
165	sc->xb_disk->d_maxsize = DFLTPHYS;
166#else /* XXX: xen can't handle large single i/o requests */
167	sc->xb_disk->d_maxsize = 4096;
168#endif
169#ifdef notyet
170	XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
171		  xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
172		  sc->xb_disk->d_mediasize);
173#endif
174	sc->xb_disk->d_flags = 0;
175	disk_create(sc->xb_disk, DISK_VERSION_00);
176	bioq_init(&sc->xb_bioq);
177
178	return error;
179}
180
181void
182xlvbd_del(struct blkfront_info *info)
183{
184	struct xb_softc	*sc;
185
186	sc = info->sc;
187	disk_destroy(sc->xb_disk);
188}
189/************************ end VBD support *****************/
190
191/*
192 * Read/write routine for a buffer.  Finds the proper unit, place it on
193 * the sortq and kick the controller.
194 */
195static void
196xb_strategy(struct bio *bp)
197{
198	struct xb_softc	*sc = (struct xb_softc *)bp->bio_disk->d_drv1;
199
200	/* bogus disk? */
201	if (sc == NULL) {
202		bp->bio_error = EINVAL;
203		bp->bio_flags |= BIO_ERROR;
204		goto bad;
205	}
206
207	DPRINTK("");
208
209	/*
210	 * Place it in the queue of disk activities for this disk
211	 */
212	mtx_lock(&blkif_io_lock);
213	bioq_disksort(&sc->xb_bioq, bp);
214
215	xb_startio(sc);
216	mtx_unlock(&blkif_io_lock);
217	return;
218
219 bad:
220	/*
221	 * Correctly set the bio to indicate a failed tranfer.
222	 */
223	bp->bio_resid = bp->bio_bcount;
224	biodone(bp);
225	return;
226}
227
228
229/* Setup supplies the backend dir, virtual device.
230
231We place an event channel and shared frame entries.
232We watch backend to wait if it's ok. */
233static int blkfront_probe(struct xenbus_device *dev,
234			  const struct xenbus_device_id *id)
235{
236	int err, vdevice, i;
237	struct blkfront_info *info;
238
239	/* FIXME: Use dynamic device id if this is not set. */
240	err = xenbus_scanf(XBT_NIL, dev->nodename,
241			   "virtual-device", "%i", &vdevice);
242	if (err != 1) {
243		xenbus_dev_fatal(dev, err, "reading virtual-device");
244		printf("couldn't find virtual device");
245		return (err);
246	}
247
248	info = malloc(sizeof(*info), M_DEVBUF, M_NOWAIT|M_ZERO);
249	if (info == NULL) {
250		xenbus_dev_fatal(dev, ENOMEM, "allocating info structure");
251		return ENOMEM;
252	}
253
254	/*
255	 * XXX debug only
256	 */
257	for (i = 0; i < sizeof(*info); i++)
258			if (((uint8_t *)info)[i] != 0)
259					panic("non-null memory");
260
261	info->shadow_free = 0;
262	info->xbdev = dev;
263	info->vdevice = vdevice;
264	info->connected = BLKIF_STATE_DISCONNECTED;
265
266	/* work queue needed ? */
267	for (i = 0; i < BLK_RING_SIZE; i++)
268		info->shadow[i].req.id = i+1;
269	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
270
271	/* Front end dir is a number, which is used as the id. */
272	info->handle = strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
273	dev->dev_driver_data = info;
274
275	err = talk_to_backend(dev, info);
276	if (err) {
277		free(info, M_DEVBUF);
278		dev->dev_driver_data = NULL;
279		return err;
280	}
281
282	return 0;
283}
284
285
286static int blkfront_resume(struct xenbus_device *dev)
287{
288	struct blkfront_info *info = dev->dev_driver_data;
289	int err;
290
291	DPRINTK("blkfront_resume: %s\n", dev->nodename);
292
293	blkif_free(info, 1);
294
295	err = talk_to_backend(dev, info);
296	if (!err)
297		blkif_recover(info);
298
299	return err;
300}
301
302/* Common code used when first setting up, and when resuming. */
303static int talk_to_backend(struct xenbus_device *dev,
304			   struct blkfront_info *info)
305{
306	const char *message = NULL;
307	struct xenbus_transaction xbt;
308	int err;
309
310	/* Create shared ring, alloc event channel. */
311	err = setup_blkring(dev, info);
312	if (err)
313		goto out;
314
315 again:
316	err = xenbus_transaction_start(&xbt);
317	if (err) {
318		xenbus_dev_fatal(dev, err, "starting transaction");
319		goto destroy_blkring;
320	}
321
322	err = xenbus_printf(xbt, dev->nodename,
323			    "ring-ref","%u", info->ring_ref);
324	if (err) {
325		message = "writing ring-ref";
326		goto abort_transaction;
327	}
328	err = xenbus_printf(xbt, dev->nodename,
329		"event-channel", "%u", irq_to_evtchn_port(info->irq));
330	if (err) {
331		message = "writing event-channel";
332		goto abort_transaction;
333	}
334
335	err = xenbus_transaction_end(xbt, 0);
336	if (err) {
337		if (err == -EAGAIN)
338			goto again;
339		xenbus_dev_fatal(dev, err, "completing transaction");
340		goto destroy_blkring;
341	}
342	xenbus_switch_state(dev, XenbusStateInitialised);
343
344	return 0;
345
346 abort_transaction:
347	xenbus_transaction_end(xbt, 1);
348	if (message)
349		xenbus_dev_fatal(dev, err, "%s", message);
350 destroy_blkring:
351	blkif_free(info, 0);
352 out:
353	return err;
354}
355
356static int
357setup_blkring(struct xenbus_device *dev, struct blkfront_info *info)
358{
359	blkif_sring_t *sring;
360	int err;
361
362	info->ring_ref = GRANT_INVALID_REF;
363
364	sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
365	if (sring == NULL) {
366		xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring");
367		return ENOMEM;
368	}
369	SHARED_RING_INIT(sring);
370	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
371
372	err = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> PAGE_SHIFT));
373	if (err < 0) {
374		free(sring, M_DEVBUF);
375		info->ring.sring = NULL;
376		goto fail;
377	}
378	info->ring_ref = err;
379
380	err = bind_listening_port_to_irqhandler(dev->otherend_id,
381		"xbd", (driver_intr_t *)blkif_int, info,
382					INTR_TYPE_BIO | INTR_MPSAFE, NULL);
383	if (err <= 0) {
384		xenbus_dev_fatal(dev, err,
385				 "bind_evtchn_to_irqhandler failed");
386		goto fail;
387	}
388	info->irq = err;
389
390	return 0;
391 fail:
392	blkif_free(info, 0);
393	return err;
394}
395
396
397/**
398 * Callback received when the backend's state changes.
399 */
400static void backend_changed(struct xenbus_device *dev,
401			    XenbusState backend_state)
402{
403	struct blkfront_info *info = dev->dev_driver_data;
404
405	DPRINTK("blkfront:backend_changed.\n");
406
407	switch (backend_state) {
408	case XenbusStateUnknown:
409	case XenbusStateInitialising:
410	case XenbusStateInitWait:
411	case XenbusStateInitialised:
412	case XenbusStateClosed:
413		break;
414
415	case XenbusStateConnected:
416		connect(info);
417		break;
418
419	case XenbusStateClosing:
420		if (info->users > 0)
421			xenbus_dev_error(dev, -EBUSY,
422					 "Device in use; refusing to close");
423		else
424			blkfront_closing(dev);
425#ifdef notyet
426		bd = bdget(info->dev);
427		if (bd == NULL)
428			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
429
430		down(&bd->bd_sem);
431		if (info->users > 0)
432			xenbus_dev_error(dev, -EBUSY,
433					 "Device in use; refusing to close");
434		else
435			blkfront_closing(dev);
436		up(&bd->bd_sem);
437		bdput(bd);
438#endif
439	}
440}
441
442/*
443** Invoked when the backend is finally 'ready' (and has told produced
444** the details about the physical device - #sectors, size, etc).
445*/
446static void
447connect(struct blkfront_info *info)
448{
449	unsigned long sectors, sector_size;
450	unsigned int binfo;
451	int err;
452
453        if( (info->connected == BLKIF_STATE_CONNECTED) ||
454	    (info->connected == BLKIF_STATE_SUSPENDED) )
455		return;
456
457	DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
458
459	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
460			    "sectors", "%lu", &sectors,
461			    "info", "%u", &binfo,
462			    "sector-size", "%lu", &sector_size,
463			    NULL);
464	if (err) {
465		xenbus_dev_fatal(info->xbdev, err,
466				 "reading backend fields at %s",
467				 info->xbdev->otherend);
468		return;
469	}
470	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
471			    "feature-barrier", "%lu", &info->feature_barrier,
472			    NULL);
473	if (err)
474		info->feature_barrier = 0;
475
476	xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
477
478	(void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
479
480	/* Kick pending requests. */
481	mtx_lock(&blkif_io_lock);
482	info->connected = BLKIF_STATE_CONNECTED;
483	kick_pending_request_queues(info);
484	mtx_unlock(&blkif_io_lock);
485	info->is_ready = 1;
486
487#if 0
488	add_disk(info->gd);
489#endif
490}
491
492/**
493 * Handle the change of state of the backend to Closing.  We must delete our
494 * device-layer structures now, to ensure that writes are flushed through to
495 * the backend.  Once is this done, we can switch to Closed in
496 * acknowledgement.
497 */
498static void blkfront_closing(struct xenbus_device *dev)
499{
500	struct blkfront_info *info = dev->dev_driver_data;
501
502	DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
503
504	if (info->mi) {
505		DPRINTK("Calling xlvbd_del\n");
506		xlvbd_del(info);
507		info->mi = NULL;
508	}
509
510	xenbus_switch_state(dev, XenbusStateClosed);
511}
512
513
514static int blkfront_remove(struct xenbus_device *dev)
515{
516	struct blkfront_info *info = dev->dev_driver_data;
517
518	DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
519
520	blkif_free(info, 0);
521
522	free(info, M_DEVBUF);
523
524	return 0;
525}
526
527
528static inline int
529GET_ID_FROM_FREELIST(struct blkfront_info *info)
530{
531	unsigned long nfree = info->shadow_free;
532
533	KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree));
534	info->shadow_free = info->shadow[nfree].req.id;
535	info->shadow[nfree].req.id = 0x0fffffee; /* debug */
536	return nfree;
537}
538
539static inline void
540ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id)
541{
542	info->shadow[id].req.id  = info->shadow_free;
543	info->shadow[id].request = 0;
544	info->shadow_free = id;
545}
546
547static inline void
548flush_requests(struct blkfront_info *info)
549{
550	int notify;
551
552	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
553
554	if (notify)
555		notify_remote_via_irq(info->irq);
556}
557
558static void
559kick_pending_request_queues(struct blkfront_info *info)
560{
561	/* XXX check if we can't simplify */
562#if 0
563	if (!RING_FULL(&info->ring)) {
564		/* Re-enable calldowns. */
565		blk_start_queue(info->rq);
566		/* Kick things off immediately. */
567		do_blkif_request(info->rq);
568	}
569#endif
570	if (!RING_FULL(&info->ring)) {
571#if 0
572		sc = LIST_FIRST(&xbsl_head);
573		LIST_REMOVE(sc, entry);
574		/* Re-enable calldowns. */
575		blk_start_queue(di->rq);
576#endif
577		/* Kick things off immediately. */
578		xb_startio(info->sc);
579	}
580}
581
582#if 0
583/* XXX */
584static void blkif_restart_queue(void *arg)
585{
586	struct blkfront_info *info = (struct blkfront_info *)arg;
587
588	mtx_lock(&blkif_io_lock);
589	kick_pending_request_queues(info);
590	mtx_unlock(&blkif_io_lock);
591}
592#endif
593
594static void blkif_restart_queue_callback(void *arg)
595{
596#if 0
597	struct blkfront_info *info = (struct blkfront_info *)arg;
598	/* XXX BSD equiv ? */
599
600	schedule_work(&info->work);
601#endif
602}
603
604static int
605blkif_open(struct disk *dp)
606{
607	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
608
609	if (sc == NULL) {
610		printk("xb%d: not found", sc->xb_unit);
611		return (ENXIO);
612	}
613
614	sc->xb_flags |= XB_OPEN;
615	sc->xb_info->users++;
616	return (0);
617}
618
619static int
620blkif_close(struct disk *dp)
621{
622	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
623
624	if (sc == NULL)
625		return (ENXIO);
626	sc->xb_flags &= ~XB_OPEN;
627	if (--(sc->xb_info->users) == 0) {
628		/* Check whether we have been instructed to close.  We will
629		   have ignored this request initially, as the device was
630		   still mounted. */
631		struct xenbus_device * dev = sc->xb_info->xbdev;
632		XenbusState state = xenbus_read_driver_state(dev->otherend);
633
634		if (state == XenbusStateClosing)
635			blkfront_closing(dev);
636	}
637	return (0);
638}
639
640static int
641blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
642{
643	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
644
645	if (sc == NULL)
646		return (ENXIO);
647
648	return (ENOTTY);
649}
650
651
652/*
653 * blkif_queue_request
654 *
655 * request block io
656 *
657 * id: for guest use only.
658 * operation: BLKIF_OP_{READ,WRITE,PROBE}
659 * buffer: buffer to read/write into. this should be a
660 *   virtual address in the guest os.
661 */
662static int blkif_queue_request(struct bio *bp)
663{
664	caddr_t alignbuf;
665	vm_paddr_t buffer_ma;
666	blkif_request_t     *ring_req;
667	unsigned long id;
668	uint64_t fsect, lsect;
669	struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
670	struct blkfront_info *info = sc->xb_info;
671	int ref;
672
673	if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED))
674		return 1;
675
676	if (gnttab_alloc_grant_references(
677		    BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
678		gnttab_request_free_callback(
679			&info->callback,
680			blkif_restart_queue_callback,
681			info,
682			BLKIF_MAX_SEGMENTS_PER_REQUEST);
683		return 1;
684	}
685
686	/* Check if the buffer is properly aligned */
687	if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
688		int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE :
689			PAGE_SIZE;
690		caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF,
691					M_NOWAIT);
692
693		alignbuf = (char *)roundup2((u_long)newbuf, align);
694
695		/* save a copy of the current buffer */
696		bp->bio_driver1 = newbuf;
697		bp->bio_driver2 = alignbuf;
698
699		/* Copy the data for a write */
700		if (bp->bio_cmd == BIO_WRITE)
701			bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
702	} else
703		alignbuf = bp->bio_data;
704
705	/* Fill out a communications ring structure. */
706	ring_req 	         = RING_GET_REQUEST(&info->ring,
707						    info->ring.req_prod_pvt);
708	id		         = GET_ID_FROM_FREELIST(info);
709	info->shadow[id].request = (unsigned long)bp;
710
711	ring_req->id 	         = id;
712	ring_req->operation 	 = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
713		BLKIF_OP_WRITE;
714
715	ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno;
716	ring_req->handle 	  = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
717
718	ring_req->nr_segments  = 0;	/* XXX not doing scatter/gather since buffer
719					 * chaining is not supported.
720					 */
721
722	buffer_ma = vtomach(alignbuf);
723	fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
724	lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
725	/* install a grant reference. */
726	ref = gnttab_claim_grant_reference(&gref_head);
727	KASSERT( ref != -ENOSPC, ("grant_reference failed") );
728
729	gnttab_grant_foreign_access_ref(
730		ref,
731		info->xbdev->otherend_id,
732		buffer_ma >> PAGE_SHIFT,
733		ring_req->operation & 1 ); /* ??? */
734	info->shadow[id].frame[ring_req->nr_segments] =
735		buffer_ma >> PAGE_SHIFT;
736
737	ring_req->seg[ring_req->nr_segments] =
738		(struct blkif_request_segment) {
739			.gref       = ref,
740			.first_sect = fsect,
741			.last_sect  = lsect };
742
743	ring_req->nr_segments++;
744	KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
745		("XEN buffer must be sector aligned"));
746	KASSERT(lsect <= 7,
747		("XEN disk driver data cannot cross a page boundary"));
748
749	buffer_ma &= ~PAGE_MASK;
750
751	info->ring.req_prod_pvt++;
752
753	/* Keep a private copy so we can reissue requests when recovering. */
754	info->shadow[id].req = *ring_req;
755
756	gnttab_free_grant_references(gref_head);
757
758	return 0;
759}
760
761
762
763/*
764 * Dequeue buffers and place them in the shared communication ring.
765 * Return when no more requests can be accepted or all buffers have
766 * been queued.
767 *
768 * Signal XEN once the ring has been filled out.
769 */
770static void
771xb_startio(struct xb_softc *sc)
772{
773	struct bio		*bp;
774	int			queued = 0;
775	struct blkfront_info *info = sc->xb_info;
776	DPRINTK("");
777
778	mtx_assert(&blkif_io_lock, MA_OWNED);
779
780	while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) {
781
782		if (RING_FULL(&info->ring))
783			goto wait;
784
785		if (blkif_queue_request(bp)) {
786		wait:
787			bioq_insert_head(&sc->xb_bioq, bp);
788			break;
789		}
790		queued++;
791	}
792
793	if (queued != 0)
794		flush_requests(sc->xb_info);
795}
796
797static void
798blkif_int(void *xsc)
799{
800	struct xb_softc *sc = NULL;
801	struct bio *bp;
802	blkif_response_t *bret;
803	RING_IDX i, rp;
804	struct blkfront_info *info = xsc;
805	DPRINTK("");
806
807	TRACE_ENTER;
808
809	mtx_lock(&blkif_io_lock);
810
811	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
812		mtx_unlock(&blkif_io_lock);
813		return;
814	}
815
816 again:
817	rp = info->ring.sring->rsp_prod;
818	rmb(); /* Ensure we see queued responses up to 'rp'. */
819
820	for (i = info->ring.rsp_cons; i != rp; i++) {
821		unsigned long id;
822
823		bret = RING_GET_RESPONSE(&info->ring, i);
824		id   = bret->id;
825		bp   = (struct bio *)info->shadow[id].request;
826
827		blkif_completion(&info->shadow[id]);
828
829		ADD_ID_TO_FREELIST(info, id);
830
831		switch (bret->operation) {
832		case BLKIF_OP_READ:
833			/* had an unaligned buffer that needs to be copied */
834			if (bp->bio_driver1)
835				bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount);
836			/* FALLTHROUGH */
837		case BLKIF_OP_WRITE:
838
839			/* free the copy buffer */
840			if (bp->bio_driver1) {
841				free(bp->bio_driver1, M_DEVBUF);
842				bp->bio_driver1 = NULL;
843			}
844
845			if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
846					printf("Bad return from blkdev data request: %x\n",
847					  bret->status);
848				bp->bio_flags |= BIO_ERROR;
849			}
850
851			sc = (struct xb_softc *)bp->bio_disk->d_drv1;
852
853			if (bp->bio_flags & BIO_ERROR)
854				bp->bio_error = EIO;
855			else
856				bp->bio_resid = 0;
857
858			biodone(bp);
859			break;
860		default:
861			panic("received invalid operation");
862			break;
863		}
864	}
865
866	info->ring.rsp_cons = i;
867
868	if (i != info->ring.req_prod_pvt) {
869		int more_to_do;
870		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
871		if (more_to_do)
872			goto again;
873	} else {
874		info->ring.sring->rsp_event = i + 1;
875	}
876
877	kick_pending_request_queues(info);
878
879	mtx_unlock(&blkif_io_lock);
880}
881
882static void
883blkif_free(struct blkfront_info *info, int suspend)
884{
885
886/* Prevent new requests being issued until we fix things up. */
887	mtx_lock(&blkif_io_lock);
888	info->connected = suspend ?
889		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
890	mtx_unlock(&blkif_io_lock);
891
892	/* Free resources associated with old device channel. */
893	if (info->ring_ref != GRANT_INVALID_REF) {
894		gnttab_end_foreign_access(info->ring_ref, 0,
895					  info->ring.sring);
896		info->ring_ref = GRANT_INVALID_REF;
897		info->ring.sring = NULL;
898	}
899	if (info->irq)
900		unbind_from_irqhandler(info->irq, info);
901	info->irq = 0;
902
903}
904
905static void
906blkif_completion(struct blk_shadow *s)
907{
908	int i;
909
910	for (i = 0; i < s->req.nr_segments; i++)
911		gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
912}
913
914static void
915blkif_recover(struct blkfront_info *info)
916{
917	int i, j;
918	blkif_request_t *req;
919	struct blk_shadow *copy;
920
921	/* Stage 1: Make a safe copy of the shadow state. */
922	copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
923	PANIC_IF(copy == NULL);
924	memcpy(copy, info->shadow, sizeof(info->shadow));
925
926	/* Stage 2: Set up free list. */
927	memset(&info->shadow, 0, sizeof(info->shadow));
928	for (i = 0; i < BLK_RING_SIZE; i++)
929		info->shadow[i].req.id = i+1;
930	info->shadow_free = info->ring.req_prod_pvt;
931	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
932
933	/* Stage 3: Find pending requests and requeue them. */
934	for (i = 0; i < BLK_RING_SIZE; i++) {
935		/* Not in use? */
936		if (copy[i].request == 0)
937			continue;
938
939		/* Grab a request slot and copy shadow state into it. */
940		req = RING_GET_REQUEST(
941			&info->ring, info->ring.req_prod_pvt);
942		*req = copy[i].req;
943
944		/* We get a new request id, and must reset the shadow state. */
945		req->id = GET_ID_FROM_FREELIST(info);
946		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
947
948		/* Rewrite any grant references invalidated by suspend/resume. */
949		for (j = 0; j < req->nr_segments; j++)
950			gnttab_grant_foreign_access_ref(
951				req->seg[j].gref,
952				info->xbdev->otherend_id,
953				pfn_to_mfn(info->shadow[req->id].frame[j]),
954				0 /* assume not readonly */);
955
956		info->shadow[req->id].req = *req;
957
958		info->ring.req_prod_pvt++;
959	}
960
961	free(copy, M_DEVBUF);
962
963	xenbus_switch_state(info->xbdev, XenbusStateConnected);
964
965	/* Now safe for us to use the shared ring */
966	mtx_lock(&blkif_io_lock);
967	info->connected = BLKIF_STATE_CONNECTED;
968	mtx_unlock(&blkif_io_lock);
969
970	/* Send off requeued requests */
971	mtx_lock(&blkif_io_lock);
972	flush_requests(info);
973
974	/* Kick any other new requests queued since we resumed */
975	kick_pending_request_queues(info);
976	mtx_unlock(&blkif_io_lock);
977}
978
979static int
980blkfront_is_ready(struct xenbus_device *dev)
981{
982	struct blkfront_info *info = dev->dev_driver_data;
983
984	return info->is_ready;
985}
986
987static struct xenbus_device_id blkfront_ids[] = {
988	{ "vbd" },
989	{ "" }
990};
991
992
993static struct xenbus_driver blkfront = {
994	.name             = "vbd",
995	.ids              = blkfront_ids,
996	.probe            = blkfront_probe,
997	.remove           = blkfront_remove,
998	.resume           = blkfront_resume,
999	.otherend_changed = backend_changed,
1000	.is_ready		  = blkfront_is_ready,
1001};
1002
1003
1004
1005static void
1006xenbus_init(void)
1007{
1008	xenbus_register_frontend(&blkfront);
1009}
1010
1011MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */
1012SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_SECOND, xenbus_init, NULL);
1013
1014
1015/*
1016 * Local variables:
1017 * mode: C
1018 * c-set-style: "BSD"
1019 * c-basic-offset: 8
1020 * tab-width: 4
1021 * indent-tabs-mode: t
1022 * End:
1023 */
1024