xdf.h revision 8863:94039d51dda4
167754Smsmith/*
267754Smsmith * CDDL HEADER START
367754Smsmith *
467754Smsmith * The contents of this file are subject to the terms of the
567754Smsmith * Common Development and Distribution License (the "License").
667754Smsmith * You may not use this file except in compliance with the License.
767754Smsmith *
867754Smsmith * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
967754Smsmith * or http://www.opensolaris.org/os/licensing.
1067754Smsmith * See the License for the specific language governing permissions
1167754Smsmith * and limitations under the License.
1267754Smsmith *
1367754Smsmith * When distributing Covered Code, include this CDDL HEADER in each
1467754Smsmith * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1567754Smsmith * If applicable, add the following below this CDDL HEADER, with the
1667754Smsmith * fields enclosed by brackets "[]" replaced with your own identifying
1767754Smsmith * information: Portions Copyright [yyyy] [name of copyright owner]
1867754Smsmith *
1967754Smsmith * CDDL HEADER END
2067754Smsmith */
2167754Smsmith
2267754Smsmith/*
2367754Smsmith * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
2467754Smsmith * Use is subject to license terms.
2567754Smsmith */
2667754Smsmith
2767754Smsmith
2867754Smsmith#ifndef _SYS_XDF_H
2967754Smsmith#define	_SYS_XDF_H
3067754Smsmith
3167754Smsmith#include <sys/ddi.h>
3267754Smsmith#include <sys/sunddi.h>
3367754Smsmith#include <sys/cmlb.h>
3467754Smsmith#include <sys/dkio.h>
3567754Smsmith
3667754Smsmith#include <sys/gnttab.h>
3767754Smsmith#include <xen/sys/xendev.h>
3867754Smsmith
3967754Smsmith#ifdef __cplusplus
4067754Smsmithextern "C" {
4167754Smsmith#endif
4267754Smsmith
4367754Smsmith
4467754Smsmith/*
4567754Smsmith * VBDs have standard 512 byte blocks
4667754Smsmith * A single blkif_request can transfer up to 11 pages of data, 1 page/segment
4767754Smsmith */
4867754Smsmith#define	XB_BSIZE	DEV_BSIZE
4967754Smsmith#define	XB_BMASK	(XB_BSIZE - 1)
5067754Smsmith#define	XB_BSHIFT	9
5167754Smsmith#define	XB_DTOB(bn)	((bn) << XB_BSHIFT)
5267754Smsmith
5367754Smsmith#define	XB_MAX_SEGLEN	(8 * XB_BSIZE)
5467754Smsmith#define	XB_SEGOFFSET	(XB_MAX_SEGLEN - 1)
5567754Smsmith#define	XB_MAX_XFER	(XB_MAX_SEGLEN * BLKIF_MAX_SEGMENTS_PER_REQUEST)
5667754Smsmith#define	XB_MAXPHYS	(XB_MAX_XFER * BLKIF_RING_SIZE)
5767754Smsmith
5867754Smsmith
5967754Smsmith/*
6067754Smsmith * Slice for absolute disk transaction.
6167754Smsmith *
6267754Smsmith * Hack Alert.  XB_SLICE_NONE is a magic value that can be written into the
6367754Smsmith * b_private field of buf structures passed to xdf_strategy().  When present
6467754Smsmith * it indicates that the I/O is using an absolute offset.  (ie, the I/O is
6567754Smsmith * not bound to any one partition.)  This magic value is currently used by
6667754Smsmith * the pv_cmdk driver.  This hack is shamelessly stolen from the sun4v vdc
6767754Smsmith * driver, another virtual disk device driver.  (Although in the case of
6867754Smsmith * vdc the hack is less egregious since it is self contained within the
6967754Smsmith * vdc driver, where as here it is used as an interface between the pv_cmdk
7067754Smsmith * driver and the xdf driver.)
7167754Smsmith */
7267754Smsmith#define	XB_SLICE_NONE		0xFF
7367754Smsmith
7467754Smsmith/*
7567754Smsmith * blkif status
7667754Smsmith */
7767754Smsmithtypedef enum xdf_state {
7867754Smsmith	/*
7967754Smsmith	 * initial state
8067754Smsmith	 */
8167754Smsmith	XD_UNKNOWN = 0,
8267754Smsmith	/*
8367754Smsmith	 * ring and evtchn alloced, xenbus state changed to
8467754Smsmith	 * XenbusStateInitialised, wait for backend to connect
8567754Smsmith	 */
8667754Smsmith	XD_INIT = 1,
8767754Smsmith	/*
8867754Smsmith	 * backend and frontend xenbus state has changed to
8967754Smsmith	 * XenbusStateConnected.  IO is now allowed, but we are not still
9067754Smsmith	 * fully initialized.
9167754Smsmith	 */
9267754Smsmith	XD_CONNECTED = 2,
9367754Smsmith	/*
9467754Smsmith	 * We're fully initialized and allowing regular IO.
9567754Smsmith	 */
9667754Smsmith	XD_READY = 3,
9767754Smsmith	/*
9867754Smsmith	 * vbd interface close request received from backend, no more I/O
9967754Smsmith	 * requestis allowed to be put into ring buffer, while interrupt handler
10067754Smsmith	 * is allowed to run to finish any outstanding I/O request, disconnect
10167754Smsmith	 * process is kicked off by changing xenbus state to XenbusStateClosed
10267754Smsmith	 */
10367754Smsmith	XD_CLOSING = 4,
10467754Smsmith	/*
10567754Smsmith	 * disconnection process finished, both backend and frontend's
10667754Smsmith	 * xenbus state has been changed to XenbusStateClosed, can be detached
10767754Smsmith	 */
10867754Smsmith	XD_CLOSED = 5,
10967754Smsmith	/*
11067754Smsmith	 * We're either being suspended or resuming from a suspend.  If we're
11167754Smsmith	 * in the process of suspending, we block all new IO, but but allow
11267754Smsmith	 * existing IO to drain.
11367754Smsmith	 */
11467754Smsmith	XD_SUSPEND = 6
11567754Smsmith} xdf_state_t;
11667754Smsmith
11767754Smsmith/*
11867754Smsmith * 16 partitions + fdisk
11967754Smsmith */
12067754Smsmith#define	XDF_PSHIFT	6
12167754Smsmith#define	XDF_PMASK	((1 << XDF_PSHIFT) - 1)
12267754Smsmith#define	XDF_PEXT	(1 << XDF_PSHIFT)
12367754Smsmith#define	XDF_MINOR(i, m) (((i) << XDF_PSHIFT) | (m))
12467754Smsmith#define	XDF_INST(m)	((m) >> XDF_PSHIFT)
12567754Smsmith#define	XDF_PART(m)	((m) & XDF_PMASK)
12667754Smsmith
12767754Smsmith/*
12867754Smsmith * one blkif_request_t will have one corresponding ge_slot_t
12967754Smsmith * where we save those grant table refs used in this blkif_request_t
13067754Smsmith *
13167754Smsmith * the id of this ge_slot_t will also be put into 'id' field in
13267754Smsmith * each blkif_request_t when sent out to the ring buffer.
13367754Smsmith */
13467754Smsmithtypedef struct ge_slot {
13567754Smsmith	list_node_t	gs_vreq_link;
13667754Smsmith	struct v_req	*gs_vreq;
13767754Smsmith	domid_t		gs_oeid;
13867754Smsmith	int		gs_isread;
13967754Smsmith	grant_ref_t	gs_ghead;
14067754Smsmith	int		gs_ngrefs;
14167754Smsmith	grant_ref_t	gs_ge[BLKIF_MAX_SEGMENTS_PER_REQUEST];
14267754Smsmith} ge_slot_t;
14367754Smsmith
14467754Smsmith/*
14567754Smsmith * vbd I/O request
14667754Smsmith *
14767754Smsmith * An instance of this structure is bound to each buf passed to
14867754Smsmith * the driver's strategy by setting the pointer into bp->av_back.
14967754Smsmith * The id of this vreq will also be put into 'id' field in each
15067754Smsmith * blkif_request_t when sent out to the ring buffer for one DMA
15167754Smsmith * window of this buf.
15267754Smsmith *
15367754Smsmith * Vreq mainly contains DMA information for this buf. In one vreq/buf,
15467754Smsmith * there could be more than one DMA window, each of which will be
15567754Smsmith * mapped to one blkif_request_t/ge_slot_t. Ge_slot_t contains all grant
15667754Smsmith * table entry information for this buf. The ge_slot_t for current DMA
15767754Smsmith * window is pointed to by v_gs in vreq.
15867754Smsmith *
15967754Smsmith * So, grant table entries will only be alloc'ed when the DMA window is
16067754Smsmith * about to be transferred via blkif_request_t to the ring buffer. And
16167754Smsmith * they will be freed right after the blkif_response_t is seen. By this
16267754Smsmith * means, we can make use of grant table entries more efficiently.
16367754Smsmith */
16467754Smsmithtypedef struct v_req {
16567754Smsmith	list_node_t	v_link;
16667754Smsmith	list_t		v_gs;
16767754Smsmith	int		v_status;
16867754Smsmith	buf_t		*v_buf;
16967754Smsmith	uint_t		v_ndmacs;
17067754Smsmith	uint_t		v_dmaw;
17167754Smsmith	uint_t		v_ndmaws;
17267754Smsmith	uint_t		v_nslots;
17367754Smsmith	uint64_t	v_blkno;
17467754Smsmith	ddi_dma_handle_t v_memdmahdl;
17567754Smsmith	ddi_acc_handle_t v_align;
17667754Smsmith	ddi_dma_handle_t v_dmahdl;
17767754Smsmith	ddi_dma_cookie_t v_dmac;
17867754Smsmith	caddr_t		v_abuf;
17967754Smsmith	uint8_t		v_flush_diskcache;
18067754Smsmith	boolean_t	v_runq;
18167754Smsmith} v_req_t;
18267754Smsmith
18367754Smsmith/*
18467754Smsmith * Status set and checked in vreq->v_status by vreq_setup()
18567754Smsmith *
18667754Smsmith * These flags will help us to continue the vreq setup work from last failure
18767754Smsmith * point, instead of starting from scratch after each failure.
18867754Smsmith */
18967754Smsmith#define	VREQ_INIT		0x0
19067754Smsmith#define	VREQ_INIT_DONE		0x1
19167754Smsmith#define	VREQ_DMAHDL_ALLOCED	0x2
19267754Smsmith#define	VREQ_MEMDMAHDL_ALLOCED	0x3
19367754Smsmith#define	VREQ_DMAMEM_ALLOCED	0x4
19467754Smsmith#define	VREQ_DMABUF_BOUND	0x5
19567754Smsmith#define	VREQ_GS_ALLOCED		0x6
19667754Smsmith#define	VREQ_DMAWIN_DONE	0x7
19767754Smsmith
19867754Smsmith/*
19967754Smsmith * virtual block device per-instance softstate
20067754Smsmith */
20167754Smsmithtypedef struct xdf {
20267754Smsmith	dev_info_t	*xdf_dip;
20367754Smsmith	char		*xdf_addr;
20467754Smsmith	ddi_iblock_cookie_t xdf_ibc; /* mutex iblock cookie */
20567754Smsmith	domid_t		xdf_peer; /* otherend's dom ID */
20667754Smsmith	xendev_ring_t	*xdf_xb_ring; /* I/O ring buffer */
20767754Smsmith	ddi_acc_handle_t xdf_xb_ring_hdl; /* access handler for ring buffer */
20867754Smsmith	list_t		xdf_vreq_act; /* active vreq list */
20967754Smsmith	buf_t		*xdf_f_act; /* active buf list head */
21067754Smsmith	buf_t		*xdf_l_act; /* active buf list tail */
21167754Smsmith	buf_t		*xdf_i_act; /* active buf list index */
21267754Smsmith	xdf_state_t	xdf_state; /* status of this virtual disk */
21367754Smsmith	boolean_t	xdf_suspending;
21467754Smsmith	ulong_t		xdf_vd_open[OTYPCNT];
21567754Smsmith	ulong_t		xdf_vd_lyropen[XDF_PEXT];
21667754Smsmith	ulong_t		xdf_connect_req;
21767754Smsmith	ulong_t		xdf_vd_exclopen;
21867754Smsmith	kmutex_t	xdf_iostat_lk; /* muxes lock for the iostat ptr */
21967754Smsmith	kmutex_t	xdf_dev_lk; /* mutex lock for I/O path */
22067754Smsmith	kmutex_t	xdf_cb_lk; /* mutex lock for event handling path */
22167754Smsmith	kcondvar_t	xdf_dev_cv; /* cv used in I/O path */
22267754Smsmith	uint_t		xdf_dinfo; /* disk info from backend xenstore */
22367754Smsmith	diskaddr_t	xdf_xdev_nblocks; /* total size in block */
22467754Smsmith	cmlb_geom_t	xdf_pgeom;
22567754Smsmith	boolean_t	xdf_pgeom_set;
22667754Smsmith	boolean_t	xdf_pgeom_fixed;
22767754Smsmith	kstat_t		*xdf_xdev_iostat;
22867754Smsmith	cmlb_handle_t	xdf_vd_lbl;
22967754Smsmith	ddi_softintr_t	xdf_softintr_id;
23067754Smsmith	timeout_id_t	xdf_timeout_id;
23167754Smsmith	struct gnttab_free_callback xdf_gnt_callback;
23267754Smsmith	boolean_t	xdf_feature_barrier;
23367754Smsmith	boolean_t	xdf_flush_supported;
23467754Smsmith	boolean_t	xdf_media_req_supported;
23567754Smsmith	boolean_t	xdf_wce;
23667754Smsmith	boolean_t	xdf_cmbl_reattach;
23767754Smsmith	char		*xdf_flush_mem;
23867754Smsmith	char		*xdf_cache_flush_block;
23967754Smsmith	int		xdf_evtchn;
24067754Smsmith	enum dkio_state	xdf_mstate;
24167754Smsmith	kcondvar_t	xdf_mstate_cv;
24267754Smsmith	kcondvar_t	xdf_hp_status_cv;
24367754Smsmith	struct buf	*xdf_ready_bp;
24467754Smsmith	ddi_taskq_t	*xdf_ready_tq;
24567754Smsmith	kthread_t	*xdf_ready_tq_thread;
24667754Smsmith	struct buf	*xdf_ready_tq_bp;
24767754Smsmith#ifdef	DEBUG
24867754Smsmith	int		xdf_dmacallback_num;
24967754Smsmith	kthread_t	*xdf_oe_change_thread;
25067754Smsmith#endif
25167754Smsmith} xdf_t;
25267754Smsmith
25367754Smsmith/*
25467754Smsmith * VBD I/O requests must be aligned on a 512-byte boundary and specify
25567754Smsmith * a transfer size which is a mutiple of 512-bytes
25667754Smsmith */
25767754Smsmith#define	ALIGNED_XFER(bp) \
25867754Smsmith	((((uintptr_t)((bp)->b_un.b_addr) & XB_BMASK) == 0) && \
25967754Smsmith	(((bp)->b_bcount & XB_BMASK) == 0))
26067754Smsmith
26167754Smsmith#define	U_INVAL(u)	(((u)->uio_loffset & (offset_t)(XB_BMASK)) || \
26267754Smsmith	((u)->uio_iov->iov_len & (offset_t)(XB_BMASK)))
26367754Smsmith
26467754Smsmith/* wrap pa_to_ma() for xdf to run in dom0 */
26567754Smsmith#define	PATOMA(addr)	(DOMAIN_IS_INITDOMAIN(xen_info) ? addr : pa_to_ma(addr))
26667754Smsmith
26767754Smsmith#define	XD_IS_RO(vbd)	VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_READONLY)
26867754Smsmith#define	XD_IS_CD(vbd)	VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_CDROM)
26967754Smsmith#define	XD_IS_RM(vbd)	VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_REMOVABLE)
27067754Smsmith#define	IS_READ(bp)	VOID2BOOLEAN((bp)->b_flags & B_READ)
27167754Smsmith#define	IS_ERROR(bp)	VOID2BOOLEAN((bp)->b_flags & B_ERROR)
27267754Smsmith
27367754Smsmith#define	XDF_UPDATE_IO_STAT(vdp, bp)					\
27467754Smsmith	{								\
27567754Smsmith		kstat_io_t *kip = KSTAT_IO_PTR((vdp)->xdf_xdev_iostat);	\
27667754Smsmith		size_t n_done = (bp)->b_bcount - (bp)->b_resid;		\
27767754Smsmith		if ((bp)->b_flags & B_READ) {				\
27867754Smsmith			kip->reads++;					\
27967754Smsmith			kip->nread += n_done;				\
28067754Smsmith		} else {                                                \
28167754Smsmith			kip->writes++;					\
28267754Smsmith			kip->nwritten += n_done;			\
28367754Smsmith		}							\
28467754Smsmith	}
28567754Smsmith
28667754Smsmith#ifdef DEBUG
28767754Smsmith#define	DPRINTF(flag, args)	{if (xdf_debug & (flag)) prom_printf args; }
28867754Smsmith#define	SETDMACBON(vbd)		{(vbd)->xdf_dmacallback_num++; }
28967754Smsmith#define	SETDMACBOFF(vbd)	{(vbd)->xdf_dmacallback_num--; }
29067754Smsmith#define	ISDMACBON(vbd)		((vbd)->xdf_dmacallback_num > 0)
29167754Smsmith#else
29267754Smsmith#define	DPRINTF(flag, args)
29367754Smsmith#define	SETDMACBON(vbd)
29467754Smsmith#define	SETDMACBOFF(vbd)
29567754Smsmith#define	ISDMACBON(vbd)
29667754Smsmith#endif /* DEBUG */
29767754Smsmith
29867754Smsmith#define	DDI_DBG		0x1
29967754Smsmith#define	DMA_DBG		0x2
30067754Smsmith#define	INTR_DBG	0x8
30167754Smsmith#define	IO_DBG		0x10
30267754Smsmith#define	IOCTL_DBG	0x20
30367754Smsmith#define	SUSRES_DBG	0x40
30467754Smsmith#define	LBL_DBG		0x80
30567754Smsmith
30667754Smsmith#if defined(XPV_HVM_DRIVER)
30767754Smsmithextern int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
30867754Smsmithextern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
30967754Smsmith    void *);
31067754Smsmithextern void xdfmin(struct buf *bp);
31167754Smsmithextern dev_info_t *xdf_hvm_hold(const char *);
31267754Smsmithextern boolean_t xdf_hvm_connect(dev_info_t *);
31367754Smsmithextern int xdf_hvm_setpgeom(dev_info_t *, cmlb_geom_t *);
31467754Smsmithextern int xdf_kstat_create(dev_info_t *, char *, int);
31567754Smsmithextern void xdf_kstat_delete(dev_info_t *);
31667754Smsmithextern boolean_t xdf_is_cd(dev_info_t *);
31767754Smsmithextern boolean_t xdf_is_rm(dev_info_t *);
31867754Smsmithextern boolean_t xdf_media_req_supported(dev_info_t *);
31967754Smsmith#endif /* XPV_HVM_DRIVER */
32067754Smsmith
32167754Smsmith#ifdef __cplusplus
32267754Smsmith}
32367754Smsmith#endif
32467754Smsmith
32567754Smsmith#endif	/* _SYS_XDF_H */
32667754Smsmith