pci_virtio_block.c revision 241744
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37
38#include <errno.h>
39#include <fcntl.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <stdint.h>
43#include <string.h>
44#include <strings.h>
45#include <unistd.h>
46#include <assert.h>
47#include <pthread.h>
48
49#include "fbsdrun.h"
50#include "pci_emul.h"
51#include "virtio.h"
52
53#define VTBLK_RINGSZ	64
54
55#define VTBLK_CFGSZ	28
56
57#define VTBLK_R_CFG		VTCFG_R_CFG0
58#define VTBLK_R_CFG_END		VTBLK_R_CFG + VTBLK_CFGSZ -1
59#define VTBLK_R_MAX		VTBLK_R_CFG_END
60
61#define VTBLK_REGSZ		VTBLK_R_MAX+1
62
63#define VTBLK_MAXSEGS	32
64
65#define VTBLK_S_OK	0
66#define VTBLK_S_IOERR	1
67
68/*
69 * Host capabilities
70 */
71#define VTBLK_S_HOSTCAPS      \
72  ( 0x00000004 |	/* host maximum request segments */ \
73    0x10000000 )	/* supports indirect descriptors */
74
75struct vring_hqueue {
76	/* Internal state */
77	uint16_t	hq_size;
78	uint16_t	hq_cur_aidx;		/* trails behind 'avail_idx' */
79
80	 /* Host-context pointers to the queue */
81	struct virtio_desc *hq_dtable;
82	uint16_t	*hq_avail_flags;
83	uint16_t	*hq_avail_idx;		/* monotonically increasing */
84	uint16_t	*hq_avail_ring;
85
86	uint16_t	*hq_used_flags;
87	uint16_t	*hq_used_idx;		/* monotonically increasing */
88	struct virtio_used *hq_used_ring;
89};
90
91/*
92 * Config space
93 */
94struct vtblk_config {
95	uint64_t	vbc_capacity;
96	uint32_t	vbc_size_max;
97	uint32_t	vbc_seg_max;
98	uint16_t	vbc_geom_c;
99	uint8_t		vbc_geom_h;
100	uint8_t		vbc_geom_s;
101	uint32_t	vbc_blk_size;
102	uint32_t	vbc_sectors_max;
103} __packed;
104CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ);
105
106/*
107 * Fixed-size block header
108 */
109struct virtio_blk_hdr {
110#define VBH_OP_READ	0
111#define VBH_OP_WRITE	1
112	uint32_t       	vbh_type;
113	uint32_t	vbh_ioprio;
114	uint64_t	vbh_sector;
115} __packed;
116
117/*
118 * Debug printf
119 */
120static int pci_vtblk_debug;
121#define DPRINTF(params) if (pci_vtblk_debug) printf params
122#define WPRINTF(params) printf params
123
124/*
125 * Per-device softc
126 */
127struct pci_vtblk_softc {
128	struct pci_devinst *vbsc_pi;
129	int		vbsc_fd;
130	int		vbsc_status;
131	int		vbsc_isr;
132	int		vbsc_lastq;
133	uint32_t	vbsc_features;
134	uint64_t	vbsc_pfn;
135	struct vring_hqueue vbsc_q;
136	struct vtblk_config vbsc_cfg;
137};
138
139/*
140 * Return the number of available descriptors in the vring taking care
141 * of the 16-bit index wraparound.
142 */
143static int
144hq_num_avail(struct vring_hqueue *hq)
145{
146	int ndesc;
147
148	if (*hq->hq_avail_idx >= hq->hq_cur_aidx)
149		ndesc = *hq->hq_avail_idx - hq->hq_cur_aidx;
150	else
151		ndesc = UINT16_MAX - hq->hq_cur_aidx + *hq->hq_avail_idx + 1;
152
153	assert(ndesc >= 0 && ndesc <= hq->hq_size);
154
155	return (ndesc);
156}
157
158static void
159pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value)
160{
161	if (value == 0) {
162		DPRINTF(("vtblk: device reset requested !\n"));
163	}
164
165	sc->vbsc_status = value;
166}
167
168static void
169pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq)
170{
171	struct iovec iov[VTBLK_MAXSEGS];
172	struct virtio_blk_hdr *vbh;
173	struct virtio_desc *vd, *vid;
174	struct virtio_used *vu;
175	uint8_t *status;
176	int i;
177	int err;
178	int iolen;
179	int nsegs;
180	int uidx, aidx, didx;
181	int writeop;
182	off_t offset;
183
184	uidx = *hq->hq_used_idx;
185	aidx = hq->hq_cur_aidx;
186	didx = hq->hq_avail_ring[aidx % hq->hq_size];
187	assert(didx >= 0 && didx < hq->hq_size);
188
189	vd = &hq->hq_dtable[didx];
190
191	/*
192	 * Verify that the descriptor is indirect, and obtain
193	 * the pointer to the indirect descriptor.
194	 * There has to be space for at least 3 descriptors
195	 * in the indirect descriptor array: the block header,
196	 * 1 or more data descriptors, and a status byte.
197	 */
198	assert(vd->vd_flags & VRING_DESC_F_INDIRECT);
199
200	nsegs = vd->vd_len / sizeof(struct virtio_desc);
201	assert(nsegs >= 3);
202	assert(nsegs < VTBLK_MAXSEGS + 2);
203
204	vid = paddr_guest2host(vd->vd_addr);
205	assert((vid->vd_flags & VRING_DESC_F_INDIRECT) == 0);
206
207	/*
208	 * The first descriptor will be the read-only fixed header
209	 */
210	vbh = paddr_guest2host(vid[0].vd_addr);
211	assert(vid[0].vd_len == sizeof(struct virtio_blk_hdr));
212	assert(vid[0].vd_flags & VRING_DESC_F_NEXT);
213	assert((vid[0].vd_flags & VRING_DESC_F_WRITE) == 0);
214
215	writeop = (vbh->vbh_type == VBH_OP_WRITE);
216
217	offset = vbh->vbh_sector * DEV_BSIZE;
218
219	/*
220	 * Build up the iovec based on the guest's data descriptors
221	 */
222	for (i = 1, iolen = 0; i < nsegs - 1; i++) {
223		iov[i-1].iov_base = paddr_guest2host(vid[i].vd_addr);
224		iov[i-1].iov_len = vid[i].vd_len;
225		iolen += vid[i].vd_len;
226
227		assert(vid[i].vd_flags & VRING_DESC_F_NEXT);
228		assert((vid[i].vd_flags & VRING_DESC_F_INDIRECT) == 0);
229
230		/*
231		 * - write op implies read-only descriptor,
232		 * - read op implies write-only descriptor,
233		 * therefore test the inverse of the descriptor bit
234		 * to the op.
235		 */
236		assert(((vid[i].vd_flags & VRING_DESC_F_WRITE) == 0) ==
237		       writeop);
238	}
239
240	/* Lastly, get the address of the status byte */
241	status = paddr_guest2host(vid[nsegs - 1].vd_addr);
242	assert(vid[nsegs - 1].vd_len == 1);
243	assert((vid[nsegs - 1].vd_flags & VRING_DESC_F_NEXT) == 0);
244	assert(vid[nsegs - 1].vd_flags & VRING_DESC_F_WRITE);
245
246	DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r",
247		 writeop ? "write" : "read", iolen, nsegs - 2, offset));
248
249	if (writeop){
250		err = pwritev(sc->vbsc_fd, iov, nsegs - 2, offset);
251	} else {
252		err = preadv(sc->vbsc_fd, iov, nsegs - 2, offset);
253	}
254
255	*status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK;
256
257	/*
258	 * Return the single indirect descriptor back to the host
259	 */
260	vu = &hq->hq_used_ring[uidx % hq->hq_size];
261	vu->vu_idx = didx;
262	vu->vu_tlen = 1;
263	hq->hq_cur_aidx++;
264	*hq->hq_used_idx += 1;
265}
266
267static void
268pci_vtblk_qnotify(struct pci_vtblk_softc *sc)
269{
270	struct vring_hqueue *hq = &sc->vbsc_q;
271	int i;
272	int ndescs;
273
274	/*
275	 * Calculate number of ring entries to process
276	 */
277	ndescs = hq_num_avail(hq);
278
279	if (ndescs == 0)
280		return;
281
282	/*
283	 * Run through all the entries, placing them into iovecs and
284	 * sending when an end-of-packet is found
285	 */
286	for (i = 0; i < ndescs; i++)
287		pci_vtblk_proc(sc, hq);
288
289	/*
290	 * Generate an interrupt if able
291	 */
292	if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0 &&
293		sc->vbsc_isr == 0) {
294		sc->vbsc_isr = 1;
295		pci_generate_msi(sc->vbsc_pi, 0);
296	}
297
298}
299
300static void
301pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn)
302{
303	struct vring_hqueue *hq;
304
305	sc->vbsc_pfn = pfn << VRING_PFN;
306
307	/*
308	 * Set up host pointers to the various parts of the
309	 * queue
310	 */
311	hq = &sc->vbsc_q;
312	hq->hq_size = VTBLK_RINGSZ;
313
314	hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN);
315	hq->hq_avail_flags =  (uint16_t *)(hq->hq_dtable + hq->hq_size);
316	hq->hq_avail_idx = hq->hq_avail_flags + 1;
317	hq->hq_avail_ring = hq->hq_avail_flags + 2;
318	hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
319						 VRING_ALIGN);
320	hq->hq_used_idx = hq->hq_used_flags + 1;
321	hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
322
323	/*
324	 * Initialize queue indexes
325	 */
326	hq->hq_cur_aidx = 0;
327}
328
329static int
330pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
331{
332	struct stat sbuf;
333	struct pci_vtblk_softc *sc;
334	int fd;
335
336	if (opts == NULL) {
337		printf("virtio-block: backing device required\n");
338		return (1);
339	}
340
341	/*
342	 * Access to guest memory is required. Fail if
343	 * memory not mapped
344	 */
345	if (paddr_guest2host(0) == NULL)
346		return (1);
347
348	/*
349	 * The supplied backing file has to exist
350	 */
351	fd = open(opts, O_RDWR);
352	if (fd < 0) {
353		perror("Could not open backing file");
354		return (1);
355	}
356
357	if (fstat(fd, &sbuf) < 0) {
358		perror("Could not stat backing file");
359		close(fd);
360		return (1);
361	}
362
363	sc = malloc(sizeof(struct pci_vtblk_softc));
364	memset(sc, 0, sizeof(struct pci_vtblk_softc));
365
366	pi->pi_arg = sc;
367	sc->vbsc_pi = pi;
368	sc->vbsc_fd = fd;
369
370	/* setup virtio block config space */
371	sc->vbsc_cfg.vbc_capacity = sbuf.st_size / DEV_BSIZE;
372	sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS;
373	sc->vbsc_cfg.vbc_blk_size = DEV_BSIZE;
374	sc->vbsc_cfg.vbc_size_max = 0;	/* not negotiated */
375	sc->vbsc_cfg.vbc_geom_c = 0;	/* no geometry */
376	sc->vbsc_cfg.vbc_geom_h = 0;
377	sc->vbsc_cfg.vbc_geom_s = 0;
378	sc->vbsc_cfg.vbc_sectors_max = 0;
379
380	/* initialize config space */
381	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
382	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
383	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
384	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
385	pci_emul_add_msicap(pi, 1);
386	pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTBLK_REGSZ);
387
388	return (0);
389}
390
391static void
392pci_vtblk_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
393		int baridx, uint64_t offset, int size, uint64_t value)
394{
395	struct pci_vtblk_softc *sc = pi->pi_arg;
396
397	assert(baridx == 0);
398
399	if (offset + size > VTBLK_REGSZ) {
400		DPRINTF(("vtblk_write: 2big, offset %ld size %d\n",
401			 offset, size));
402		return;
403	}
404
405	switch (offset) {
406	case VTCFG_R_GUESTCAP:
407		assert(size == 4);
408		sc->vbsc_features = value & VTBLK_S_HOSTCAPS;
409		break;
410	case VTCFG_R_PFN:
411		assert(size == 4);
412		pci_vtblk_ring_init(sc, value);
413		break;
414	case VTCFG_R_QSEL:
415		assert(size == 2);
416		sc->vbsc_lastq = value;
417		break;
418	case VTCFG_R_QNOTIFY:
419		assert(size == 2);
420		assert(value == 0);
421		pci_vtblk_qnotify(sc);
422		break;
423	case VTCFG_R_STATUS:
424		assert(size == 1);
425		pci_vtblk_update_status(sc, value);
426		break;
427	case VTCFG_R_HOSTCAP:
428	case VTCFG_R_QNUM:
429	case VTCFG_R_ISR:
430	case VTBLK_R_CFG ... VTBLK_R_CFG_END:
431		DPRINTF(("vtblk: write to readonly reg %ld\n\r", offset));
432		break;
433	default:
434		DPRINTF(("vtblk: unknown i/o write offset %ld\n\r", offset));
435		value = 0;
436		break;
437	}
438}
439
440uint64_t
441pci_vtblk_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
442	       int baridx, uint64_t offset, int size)
443{
444	struct pci_vtblk_softc *sc = pi->pi_arg;
445	void *ptr;
446	uint32_t value;
447
448	assert(baridx == 0);
449
450	if (offset + size > VTBLK_REGSZ) {
451		DPRINTF(("vtblk_read: 2big, offset %ld size %d\n",
452			 offset, size));
453		return (0);
454	}
455
456	switch (offset) {
457	case VTCFG_R_HOSTCAP:
458		assert(size == 4);
459		value = VTBLK_S_HOSTCAPS;
460		break;
461	case VTCFG_R_GUESTCAP:
462		assert(size == 4);
463		value = sc->vbsc_features; /* XXX never read ? */
464		break;
465	case VTCFG_R_PFN:
466		assert(size == 4);
467		value = sc->vbsc_pfn >> VRING_PFN;
468		break;
469	case VTCFG_R_QNUM:
470		value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0;
471		break;
472	case VTCFG_R_QSEL:
473		assert(size == 2);
474		value = sc->vbsc_lastq; /* XXX never read ? */
475		break;
476	case VTCFG_R_QNOTIFY:
477		assert(size == 2);
478		value = 0; /* XXX never read ? */
479		break;
480	case VTCFG_R_STATUS:
481		assert(size == 1);
482		value = sc->vbsc_status;
483		break;
484	case VTCFG_R_ISR:
485		assert(size == 1);
486		value = sc->vbsc_isr;
487		sc->vbsc_isr = 0;     /* a read clears this flag */
488		break;
489	case VTBLK_R_CFG ... VTBLK_R_CFG_END:
490		assert(size + offset <= (VTBLK_R_CFG_END + 1));
491		ptr = (uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG;
492		if (size == 1) {
493			value = *(uint8_t *) ptr;
494		} else if (size == 2) {
495			value = *(uint16_t *) ptr;
496		} else {
497			value = *(uint32_t *) ptr;
498		}
499		break;
500	default:
501		DPRINTF(("vtblk: unknown i/o read offset %ld\n\r", offset));
502		value = 0;
503		break;
504	}
505
506	return (value);
507}
508
509struct pci_devemu pci_de_vblk = {
510	.pe_emu =	"virtio-blk",
511	.pe_init =	pci_vtblk_init,
512	.pe_barwrite =	pci_vtblk_write,
513	.pe_barread =	pci_vtblk_read
514};
515PCI_EMUL_SET(pci_de_vblk);
516