Deleted Added
full compact
pci_virtio_block.c (248477) pci_virtio_block.c (249813)
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/usr.sbin/bhyve/pci_virtio_block.c 248477 2013-03-18 22:38:30Z neel $
26 * $FreeBSD: head/usr.sbin/bhyve/pci_virtio_block.c 249813 2013-04-23 16:40:39Z neel $
27 */
28
29#include <sys/cdefs.h>
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_virtio_block.c 248477 2013-03-18 22:38:30Z neel $");
30__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_virtio_block.c 249813 2013-04-23 16:40:39Z neel $");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <sys/disk.h>
38
39#include <errno.h>
40#include <fcntl.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <stdint.h>
44#include <string.h>
45#include <strings.h>
46#include <unistd.h>
47#include <assert.h>
48#include <pthread.h>
49
50#include "bhyverun.h"
51#include "pci_emul.h"
52#include "virtio.h"
53
54#define VTBLK_RINGSZ 64
55
56#define VTBLK_CFGSZ 28
57
58#define VTBLK_R_CFG VTCFG_R_CFG1
59#define VTBLK_R_CFG_END VTBLK_R_CFG + VTBLK_CFGSZ -1
60#define VTBLK_R_MAX VTBLK_R_CFG_END
61
62#define VTBLK_REGSZ VTBLK_R_MAX+1
63
64#define VTBLK_MAXSEGS 32
65
66#define VTBLK_S_OK 0
67#define VTBLK_S_IOERR 1
68
69/*
70 * Host capabilities
71 */
72#define VTBLK_S_HOSTCAPS \
73 ( 0x00000004 | /* host maximum request segments */ \
74 0x10000000 ) /* supports indirect descriptors */
75
76static int use_msix = 1;
77
78struct vring_hqueue {
79 /* Internal state */
80 uint16_t hq_size;
81 uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */
82
83 /* Host-context pointers to the queue */
84 struct virtio_desc *hq_dtable;
85 uint16_t *hq_avail_flags;
86 uint16_t *hq_avail_idx; /* monotonically increasing */
87 uint16_t *hq_avail_ring;
88
89 uint16_t *hq_used_flags;
90 uint16_t *hq_used_idx; /* monotonically increasing */
91 struct virtio_used *hq_used_ring;
92};
93
94/*
95 * Config space
96 */
97struct vtblk_config {
98 uint64_t vbc_capacity;
99 uint32_t vbc_size_max;
100 uint32_t vbc_seg_max;
101 uint16_t vbc_geom_c;
102 uint8_t vbc_geom_h;
103 uint8_t vbc_geom_s;
104 uint32_t vbc_blk_size;
105 uint32_t vbc_sectors_max;
106} __packed;
107CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ);
108
109/*
110 * Fixed-size block header
111 */
112struct virtio_blk_hdr {
113#define VBH_OP_READ 0
114#define VBH_OP_WRITE 1
115#define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */
116 uint32_t vbh_type;
117 uint32_t vbh_ioprio;
118 uint64_t vbh_sector;
119} __packed;
120
121/*
122 * Debug printf
123 */
124static int pci_vtblk_debug;
125#define DPRINTF(params) if (pci_vtblk_debug) printf params
126#define WPRINTF(params) printf params
127
128/*
129 * Per-device softc
130 */
131struct pci_vtblk_softc {
132 struct pci_devinst *vbsc_pi;
133 int vbsc_fd;
134 int vbsc_status;
135 int vbsc_isr;
136 int vbsc_lastq;
137 uint32_t vbsc_features;
138 uint64_t vbsc_pfn;
139 struct vring_hqueue vbsc_q;
140 struct vtblk_config vbsc_cfg;
141 uint16_t msix_table_idx_req;
142 uint16_t msix_table_idx_cfg;
143};
144#define vtblk_ctx(sc) ((sc)->vbsc_pi->pi_vmctx)
145
146/*
147 * Return the size of IO BAR that maps virtio header and device specific
148 * region. The size would vary depending on whether MSI-X is enabled or
149 * not
150 */
151static uint64_t
152pci_vtblk_iosize(struct pci_devinst *pi)
153{
154
155 if (pci_msix_enabled(pi))
156 return (VTBLK_REGSZ);
157 else
158 return (VTBLK_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX));
159}
160
161/*
162 * Return the number of available descriptors in the vring taking care
163 * of the 16-bit index wraparound.
164 */
165static int
166hq_num_avail(struct vring_hqueue *hq)
167{
168 uint16_t ndesc;
169
170 /*
171 * We're just computing (a-b) in GF(216).
172 *
173 * The only glitch here is that in standard C,
174 * uint16_t promotes to (signed) int when int has
175 * more than 16 bits (pretty much always now), so
176 * we have to force it back to unsigned.
177 */
178 ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx;
179
180 assert(ndesc <= hq->hq_size);
181
182 return (ndesc);
183}
184
185static void
186pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value)
187{
188 if (value == 0) {
189 DPRINTF(("vtblk: device reset requested !\n"));
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/stat.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <sys/disk.h>
38
39#include <errno.h>
40#include <fcntl.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <stdint.h>
44#include <string.h>
45#include <strings.h>
46#include <unistd.h>
47#include <assert.h>
48#include <pthread.h>
49
50#include "bhyverun.h"
51#include "pci_emul.h"
52#include "virtio.h"
53
54#define VTBLK_RINGSZ 64
55
56#define VTBLK_CFGSZ 28
57
58#define VTBLK_R_CFG VTCFG_R_CFG1
59#define VTBLK_R_CFG_END VTBLK_R_CFG + VTBLK_CFGSZ -1
60#define VTBLK_R_MAX VTBLK_R_CFG_END
61
62#define VTBLK_REGSZ VTBLK_R_MAX+1
63
64#define VTBLK_MAXSEGS 32
65
66#define VTBLK_S_OK 0
67#define VTBLK_S_IOERR 1
68
69/*
70 * Host capabilities
71 */
72#define VTBLK_S_HOSTCAPS \
73 ( 0x00000004 | /* host maximum request segments */ \
74 0x10000000 ) /* supports indirect descriptors */
75
76static int use_msix = 1;
77
78struct vring_hqueue {
79 /* Internal state */
80 uint16_t hq_size;
81 uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */
82
83 /* Host-context pointers to the queue */
84 struct virtio_desc *hq_dtable;
85 uint16_t *hq_avail_flags;
86 uint16_t *hq_avail_idx; /* monotonically increasing */
87 uint16_t *hq_avail_ring;
88
89 uint16_t *hq_used_flags;
90 uint16_t *hq_used_idx; /* monotonically increasing */
91 struct virtio_used *hq_used_ring;
92};
93
94/*
95 * Config space
96 */
97struct vtblk_config {
98 uint64_t vbc_capacity;
99 uint32_t vbc_size_max;
100 uint32_t vbc_seg_max;
101 uint16_t vbc_geom_c;
102 uint8_t vbc_geom_h;
103 uint8_t vbc_geom_s;
104 uint32_t vbc_blk_size;
105 uint32_t vbc_sectors_max;
106} __packed;
107CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ);
108
109/*
110 * Fixed-size block header
111 */
112struct virtio_blk_hdr {
113#define VBH_OP_READ 0
114#define VBH_OP_WRITE 1
115#define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */
116 uint32_t vbh_type;
117 uint32_t vbh_ioprio;
118 uint64_t vbh_sector;
119} __packed;
120
121/*
122 * Debug printf
123 */
124static int pci_vtblk_debug;
125#define DPRINTF(params) if (pci_vtblk_debug) printf params
126#define WPRINTF(params) printf params
127
128/*
129 * Per-device softc
130 */
131struct pci_vtblk_softc {
132 struct pci_devinst *vbsc_pi;
133 int vbsc_fd;
134 int vbsc_status;
135 int vbsc_isr;
136 int vbsc_lastq;
137 uint32_t vbsc_features;
138 uint64_t vbsc_pfn;
139 struct vring_hqueue vbsc_q;
140 struct vtblk_config vbsc_cfg;
141 uint16_t msix_table_idx_req;
142 uint16_t msix_table_idx_cfg;
143};
144#define vtblk_ctx(sc) ((sc)->vbsc_pi->pi_vmctx)
145
146/*
147 * Return the size of IO BAR that maps virtio header and device specific
148 * region. The size would vary depending on whether MSI-X is enabled or
149 * not
150 */
151static uint64_t
152pci_vtblk_iosize(struct pci_devinst *pi)
153{
154
155 if (pci_msix_enabled(pi))
156 return (VTBLK_REGSZ);
157 else
158 return (VTBLK_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX));
159}
160
161/*
162 * Return the number of available descriptors in the vring taking care
163 * of the 16-bit index wraparound.
164 */
165static int
166hq_num_avail(struct vring_hqueue *hq)
167{
168 uint16_t ndesc;
169
170 /*
171 * We're just computing (a-b) in GF(216).
172 *
173 * The only glitch here is that in standard C,
174 * uint16_t promotes to (signed) int when int has
175 * more than 16 bits (pretty much always now), so
176 * we have to force it back to unsigned.
177 */
178 ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx;
179
180 assert(ndesc <= hq->hq_size);
181
182 return (ndesc);
183}
184
185static void
186pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value)
187{
188 if (value == 0) {
189 DPRINTF(("vtblk: device reset requested !\n"));
190 sc->vbsc_isr = 0;
191 sc->msix_table_idx_req = VIRTIO_MSI_NO_VECTOR;
192 sc->msix_table_idx_cfg = VIRTIO_MSI_NO_VECTOR;
193 sc->vbsc_features = 0;
194 sc->vbsc_pfn = 0;
195 sc->vbsc_lastq = 0;
196 memset(&sc->vbsc_q, 0, sizeof(struct vring_hqueue));
190 }
191
192 sc->vbsc_status = value;
193}
194
195static void
196pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq)
197{
198 struct iovec iov[VTBLK_MAXSEGS];
199 struct virtio_blk_hdr *vbh;
200 struct virtio_desc *vd, *vid;
201 struct virtio_used *vu;
202 uint8_t *status;
203 int i;
204 int err;
205 int iolen;
197 }
198
199 sc->vbsc_status = value;
200}
201
202static void
203pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq)
204{
205 struct iovec iov[VTBLK_MAXSEGS];
206 struct virtio_blk_hdr *vbh;
207 struct virtio_desc *vd, *vid;
208 struct virtio_used *vu;
209 uint8_t *status;
210 int i;
211 int err;
212 int iolen;
206 int nsegs;
207 int uidx, aidx, didx;
213 int uidx, aidx, didx;
208 int writeop, type;
214 int indirect, writeop, type;
209 off_t offset;
210
211 uidx = *hq->hq_used_idx;
212 aidx = hq->hq_cur_aidx;
213 didx = hq->hq_avail_ring[aidx % hq->hq_size];
214 assert(didx >= 0 && didx < hq->hq_size);
215
216 vd = &hq->hq_dtable[didx];
217
215 off_t offset;
216
217 uidx = *hq->hq_used_idx;
218 aidx = hq->hq_cur_aidx;
219 didx = hq->hq_avail_ring[aidx % hq->hq_size];
220 assert(didx >= 0 && didx < hq->hq_size);
221
222 vd = &hq->hq_dtable[didx];
223
218 /*
219 * Verify that the descriptor is indirect, and obtain
220 * the pointer to the indirect descriptor.
221 * There has to be space for at least 3 descriptors
222 * in the indirect descriptor array: the block header,
223 * 1 or more data descriptors, and a status byte.
224 */
225 assert(vd->vd_flags & VRING_DESC_F_INDIRECT);
224 indirect = ((vd->vd_flags & VRING_DESC_F_INDIRECT) != 0);
226
225
227 nsegs = vd->vd_len / sizeof(struct virtio_desc);
228 assert(nsegs >= 3);
229 assert(nsegs < VTBLK_MAXSEGS + 2);
226 if (indirect) {
227 vid = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, vd->vd_len);
228 vd = &vid[0];
229 }
230
230
231 vid = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, vd->vd_len);
232 assert((vid->vd_flags & VRING_DESC_F_INDIRECT) == 0);
233
234 /*
235 * The first descriptor will be the read-only fixed header
236 */
231 /*
232 * The first descriptor will be the read-only fixed header
233 */
237 vbh = paddr_guest2host(vtblk_ctx(sc), vid[0].vd_addr,
234 vbh = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr,
238 sizeof(struct virtio_blk_hdr));
235 sizeof(struct virtio_blk_hdr));
239 assert(vid[0].vd_len == sizeof(struct virtio_blk_hdr));
240 assert(vid[0].vd_flags & VRING_DESC_F_NEXT);
241 assert((vid[0].vd_flags & VRING_DESC_F_WRITE) == 0);
236 assert(vd->vd_len == sizeof(struct virtio_blk_hdr));
237 assert(vd->vd_flags & VRING_DESC_F_NEXT);
238 assert((vd->vd_flags & VRING_DESC_F_WRITE) == 0);
242
243 /*
244 * XXX
245 * The guest should not be setting the BARRIER flag because
246 * we don't advertise the capability.
247 */
248 type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
249 writeop = (type == VBH_OP_WRITE);
250
251 offset = vbh->vbh_sector * DEV_BSIZE;
252
253 /*
254 * Build up the iovec based on the guest's data descriptors
255 */
239
240 /*
241 * XXX
242 * The guest should not be setting the BARRIER flag because
243 * we don't advertise the capability.
244 */
245 type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
246 writeop = (type == VBH_OP_WRITE);
247
248 offset = vbh->vbh_sector * DEV_BSIZE;
249
250 /*
251 * Build up the iovec based on the guest's data descriptors
252 */
256 for (i = 1, iolen = 0; i < nsegs - 1; i++) {
257 iov[i-1].iov_base = paddr_guest2host(vtblk_ctx(sc),
258 vid[i].vd_addr, vid[i].vd_len);
259 iov[i-1].iov_len = vid[i].vd_len;
260 iolen += vid[i].vd_len;
253 i = iolen = 0;
254 while (1) {
255 if (indirect)
256 vd = &vid[i + 1]; /* skip first indirect desc */
257 else
258 vd = &hq->hq_dtable[vd->vd_next];
261
259
262 assert(vid[i].vd_flags & VRING_DESC_F_NEXT);
263 assert((vid[i].vd_flags & VRING_DESC_F_INDIRECT) == 0);
260 if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0)
261 break;
264
262
263 if (i == VTBLK_MAXSEGS)
264 break;
265
265 /*
266 * - write op implies read-only descriptor,
267 * - read op implies write-only descriptor,
268 * therefore test the inverse of the descriptor bit
269 * to the op.
270 */
266 /*
267 * - write op implies read-only descriptor,
268 * - read op implies write-only descriptor,
269 * therefore test the inverse of the descriptor bit
270 * to the op.
271 */
271 assert(((vid[i].vd_flags & VRING_DESC_F_WRITE) == 0) ==
272 assert(((vd->vd_flags & VRING_DESC_F_WRITE) == 0) ==
272 writeop);
273 writeop);
274
275 iov[i].iov_base = paddr_guest2host(vtblk_ctx(sc),
276 vd->vd_addr,
277 vd->vd_len);
278 iov[i].iov_len = vd->vd_len;
279 iolen += vd->vd_len;
280 i++;
273 }
274
275 /* Lastly, get the address of the status byte */
281 }
282
283 /* Lastly, get the address of the status byte */
276 status = paddr_guest2host(vtblk_ctx(sc), vid[nsegs - 1].vd_addr, 1);
277 assert(vid[nsegs - 1].vd_len == 1);
278 assert((vid[nsegs - 1].vd_flags & VRING_DESC_F_NEXT) == 0);
279 assert(vid[nsegs - 1].vd_flags & VRING_DESC_F_WRITE);
284 status = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, 1);
285 assert(vd->vd_len == 1);
286 assert((vd->vd_flags & VRING_DESC_F_NEXT) == 0);
287 assert(vd->vd_flags & VRING_DESC_F_WRITE);
280
281 DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r",
288
289 DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r",
282 writeop ? "write" : "read", iolen, nsegs - 2, offset));
290 writeop ? "write" : "read", iolen, i, offset));
283
291
284 if (writeop){
285 err = pwritev(sc->vbsc_fd, iov, nsegs - 2, offset);
286 } else {
287 err = preadv(sc->vbsc_fd, iov, nsegs - 2, offset);
288 }
292 if (writeop)
293 err = pwritev(sc->vbsc_fd, iov, i, offset);
294 else
295 err = preadv(sc->vbsc_fd, iov, i, offset);
289
290 *status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK;
291
292 /*
296
297 *status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK;
298
299 /*
293 * Return the single indirect descriptor back to the host
300 * Return the single descriptor back to the host
294 */
295 vu = &hq->hq_used_ring[uidx % hq->hq_size];
296 vu->vu_idx = didx;
297 vu->vu_tlen = 1;
298 hq->hq_cur_aidx++;
299 *hq->hq_used_idx += 1;
301 */
302 vu = &hq->hq_used_ring[uidx % hq->hq_size];
303 vu->vu_idx = didx;
304 vu->vu_tlen = 1;
305 hq->hq_cur_aidx++;
306 *hq->hq_used_idx += 1;
300}
301
307
302static void
303pci_vtblk_qnotify(struct pci_vtblk_softc *sc)
304{
305 struct vring_hqueue *hq = &sc->vbsc_q;
306 int i;
307 int ndescs;
308
309 /*
308 /*
310 * Calculate number of ring entries to process
311 */
312 ndescs = hq_num_avail(hq);
313
314 if (ndescs == 0)
315 return;
316
317 /*
318 * Run through all the entries, placing them into iovecs and
319 * sending when an end-of-packet is found
320 */
321 for (i = 0; i < ndescs; i++)
322 pci_vtblk_proc(sc, hq);
323
324 /*
325 * Generate an interrupt if able
326 */
327 if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
328 if (use_msix) {
329 pci_generate_msix(sc->vbsc_pi, sc->msix_table_idx_req);
330 } else if (sc->vbsc_isr == 0) {
331 sc->vbsc_isr = 1;
332 pci_generate_msi(sc->vbsc_pi, 0);
333 }
334 }
309 * Generate an interrupt if able
310 */
311 if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
312 if (use_msix) {
313 pci_generate_msix(sc->vbsc_pi, sc->msix_table_idx_req);
314 } else if (sc->vbsc_isr == 0) {
315 sc->vbsc_isr = 1;
316 pci_generate_msi(sc->vbsc_pi, 0);
317 }
318 }
335
336}
337
338static void
319}
320
321static void
322pci_vtblk_qnotify(struct pci_vtblk_softc *sc)
323{
324 struct vring_hqueue *hq = &sc->vbsc_q;
325 int ndescs;
326
327 while ((ndescs = hq_num_avail(hq)) != 0) {
328 /*
329 * Run through all the entries, placing them into iovecs and
330 * sending when an end-of-packet is found
331 */
332 pci_vtblk_proc(sc, hq);
333 }
334}
335
336static void
339pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn)
340{
341 struct vring_hqueue *hq;
342
343 sc->vbsc_pfn = pfn << VRING_PFN;
344
345 /*
346 * Set up host pointers to the various parts of the
347 * queue
348 */
349 hq = &sc->vbsc_q;
350 hq->hq_size = VTBLK_RINGSZ;
351
352 hq->hq_dtable = paddr_guest2host(vtblk_ctx(sc), pfn << VRING_PFN,
353 vring_size(VTBLK_RINGSZ));
354 hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size);
355 hq->hq_avail_idx = hq->hq_avail_flags + 1;
356 hq->hq_avail_ring = hq->hq_avail_flags + 2;
357 hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
358 VRING_ALIGN);
359 hq->hq_used_idx = hq->hq_used_flags + 1;
360 hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
361
362 /*
363 * Initialize queue indexes
364 */
365 hq->hq_cur_aidx = 0;
366}
367
368static int
369pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
370{
371 struct stat sbuf;
372 struct pci_vtblk_softc *sc;
373 off_t size;
374 int fd;
375 int sectsz;
376 const char *env_msi;
377
378 if (opts == NULL) {
379 printf("virtio-block: backing device required\n");
380 return (1);
381 }
382
383 /*
384 * The supplied backing file has to exist
385 */
386 fd = open(opts, O_RDWR);
387 if (fd < 0) {
388 perror("Could not open backing file");
389 return (1);
390 }
391
392 if (fstat(fd, &sbuf) < 0) {
393 perror("Could not stat backing file");
394 close(fd);
395 return (1);
396 }
397
398 /*
399 * Deal with raw devices
400 */
401 size = sbuf.st_size;
402 sectsz = DEV_BSIZE;
403 if (S_ISCHR(sbuf.st_mode)) {
404 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
405 ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
406 perror("Could not fetch dev blk/sector size");
407 close(fd);
408 return (1);
409 }
410 assert(size != 0);
411 assert(sectsz != 0);
412 }
413
414 sc = malloc(sizeof(struct pci_vtblk_softc));
415 memset(sc, 0, sizeof(struct pci_vtblk_softc));
416
417 pi->pi_arg = sc;
418 sc->vbsc_pi = pi;
419 sc->vbsc_fd = fd;
420
421 /* setup virtio block config space */
422 sc->vbsc_cfg.vbc_capacity = size / sectsz;
423 sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS;
424 sc->vbsc_cfg.vbc_blk_size = sectsz;
425 sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */
426 sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */
427 sc->vbsc_cfg.vbc_geom_h = 0;
428 sc->vbsc_cfg.vbc_geom_s = 0;
429 sc->vbsc_cfg.vbc_sectors_max = 0;
430
431 /* initialize config space */
432 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
433 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
434 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
435 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
436
437 if ((env_msi = getenv("BHYVE_USE_MSI"))) {
438 if (strcasecmp(env_msi, "yes") == 0)
439 use_msix = 0;
440 }
441
442 if (use_msix) {
443 /* MSI-X Support */
444 sc->msix_table_idx_req = VIRTIO_MSI_NO_VECTOR;
445 sc->msix_table_idx_cfg = VIRTIO_MSI_NO_VECTOR;
446
447 if (pci_emul_add_msixcap(pi, 2, 1))
448 return (1);
449 } else {
450 /* MSI Support */
451 pci_emul_add_msicap(pi, 1);
452 }
453
454 pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTBLK_REGSZ);
455
456 return (0);
457}
458
459static uint64_t
460vtblk_adjust_offset(struct pci_devinst *pi, uint64_t offset)
461{
462 /*
463 * Device specific offsets used by guest would change
464 * based on whether MSI-X capability is enabled or not
465 */
466 if (!pci_msix_enabled(pi)) {
467 if (offset >= VTCFG_R_MSIX)
468 return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX));
469 }
470
471 return (offset);
472}
473
474static void
475pci_vtblk_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
476 int baridx, uint64_t offset, int size, uint64_t value)
477{
478 struct pci_vtblk_softc *sc = pi->pi_arg;
479
480 if (use_msix) {
481 if (baridx == pci_msix_table_bar(pi) ||
482 baridx == pci_msix_pba_bar(pi)) {
483 pci_emul_msix_twrite(pi, offset, size, value);
484 return;
485 }
486 }
487
488 assert(baridx == 0);
489
490 if (offset + size > pci_vtblk_iosize(pi)) {
491 DPRINTF(("vtblk_write: 2big, offset %ld size %d\n",
492 offset, size));
493 return;
494 }
495
496 offset = vtblk_adjust_offset(pi, offset);
497
498 switch (offset) {
499 case VTCFG_R_GUESTCAP:
500 assert(size == 4);
501 sc->vbsc_features = value & VTBLK_S_HOSTCAPS;
502 break;
503 case VTCFG_R_PFN:
504 assert(size == 4);
505 pci_vtblk_ring_init(sc, value);
506 break;
507 case VTCFG_R_QSEL:
508 assert(size == 2);
509 sc->vbsc_lastq = value;
510 break;
511 case VTCFG_R_QNOTIFY:
512 assert(size == 2);
513 assert(value == 0);
514 pci_vtblk_qnotify(sc);
515 break;
516 case VTCFG_R_STATUS:
517 assert(size == 1);
518 pci_vtblk_update_status(sc, value);
519 break;
520 case VTCFG_R_CFGVEC:
521 assert(size == 2);
522 sc->msix_table_idx_cfg = value;
523 break;
524 case VTCFG_R_QVEC:
525 assert(size == 2);
526 sc->msix_table_idx_req = value;
527 break;
528 case VTCFG_R_HOSTCAP:
529 case VTCFG_R_QNUM:
530 case VTCFG_R_ISR:
531 case VTBLK_R_CFG ... VTBLK_R_CFG_END:
532 DPRINTF(("vtblk: write to readonly reg %ld\n\r", offset));
533 break;
534 default:
535 DPRINTF(("vtblk: unknown i/o write offset %ld\n\r", offset));
536 value = 0;
537 break;
538 }
539}
540
541uint64_t
542pci_vtblk_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
543 int baridx, uint64_t offset, int size)
544{
545 struct pci_vtblk_softc *sc = pi->pi_arg;
546 void *ptr;
547 uint32_t value;
548
549 if (use_msix) {
550 if (baridx == pci_msix_table_bar(pi) ||
551 baridx == pci_msix_pba_bar(pi)) {
552 return (pci_emul_msix_tread(pi, offset, size));
553 }
554 }
555
556 assert(baridx == 0);
557
558 if (offset + size > pci_vtblk_iosize(pi)) {
559 DPRINTF(("vtblk_read: 2big, offset %ld size %d\n",
560 offset, size));
561 return (0);
562 }
563
564 offset = vtblk_adjust_offset(pi, offset);
565
566 switch (offset) {
567 case VTCFG_R_HOSTCAP:
568 assert(size == 4);
569 value = VTBLK_S_HOSTCAPS;
570 break;
571 case VTCFG_R_GUESTCAP:
572 assert(size == 4);
573 value = sc->vbsc_features; /* XXX never read ? */
574 break;
575 case VTCFG_R_PFN:
576 assert(size == 4);
577 value = sc->vbsc_pfn >> VRING_PFN;
578 break;
579 case VTCFG_R_QNUM:
580 value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0;
581 break;
582 case VTCFG_R_QSEL:
583 assert(size == 2);
584 value = sc->vbsc_lastq; /* XXX never read ? */
585 break;
586 case VTCFG_R_QNOTIFY:
587 assert(size == 2);
588 value = 0; /* XXX never read ? */
589 break;
590 case VTCFG_R_STATUS:
591 assert(size == 1);
592 value = sc->vbsc_status;
593 break;
594 case VTCFG_R_ISR:
595 assert(size == 1);
596 value = sc->vbsc_isr;
597 sc->vbsc_isr = 0; /* a read clears this flag */
598 break;
599 case VTCFG_R_CFGVEC:
600 assert(size == 2);
601 value = sc->msix_table_idx_cfg;
602 break;
603 case VTCFG_R_QVEC:
604 assert(size == 2);
605 value = sc->msix_table_idx_req;
606 break;
607 case VTBLK_R_CFG ... VTBLK_R_CFG_END:
608 assert(size + offset <= (VTBLK_R_CFG_END + 1));
609 ptr = (uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG;
610 if (size == 1) {
611 value = *(uint8_t *) ptr;
612 } else if (size == 2) {
613 value = *(uint16_t *) ptr;
614 } else {
615 value = *(uint32_t *) ptr;
616 }
617 break;
618 default:
619 DPRINTF(("vtblk: unknown i/o read offset %ld\n\r", offset));
620 value = 0;
621 break;
622 }
623
624 return (value);
625}
626
627struct pci_devemu pci_de_vblk = {
628 .pe_emu = "virtio-blk",
629 .pe_init = pci_vtblk_init,
630 .pe_barwrite = pci_vtblk_write,
631 .pe_barread = pci_vtblk_read
632};
633PCI_EMUL_SET(pci_de_vblk);
337pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn)
338{
339 struct vring_hqueue *hq;
340
341 sc->vbsc_pfn = pfn << VRING_PFN;
342
343 /*
344 * Set up host pointers to the various parts of the
345 * queue
346 */
347 hq = &sc->vbsc_q;
348 hq->hq_size = VTBLK_RINGSZ;
349
350 hq->hq_dtable = paddr_guest2host(vtblk_ctx(sc), pfn << VRING_PFN,
351 vring_size(VTBLK_RINGSZ));
352 hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size);
353 hq->hq_avail_idx = hq->hq_avail_flags + 1;
354 hq->hq_avail_ring = hq->hq_avail_flags + 2;
355 hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
356 VRING_ALIGN);
357 hq->hq_used_idx = hq->hq_used_flags + 1;
358 hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
359
360 /*
361 * Initialize queue indexes
362 */
363 hq->hq_cur_aidx = 0;
364}
365
366static int
367pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
368{
369 struct stat sbuf;
370 struct pci_vtblk_softc *sc;
371 off_t size;
372 int fd;
373 int sectsz;
374 const char *env_msi;
375
376 if (opts == NULL) {
377 printf("virtio-block: backing device required\n");
378 return (1);
379 }
380
381 /*
382 * The supplied backing file has to exist
383 */
384 fd = open(opts, O_RDWR);
385 if (fd < 0) {
386 perror("Could not open backing file");
387 return (1);
388 }
389
390 if (fstat(fd, &sbuf) < 0) {
391 perror("Could not stat backing file");
392 close(fd);
393 return (1);
394 }
395
396 /*
397 * Deal with raw devices
398 */
399 size = sbuf.st_size;
400 sectsz = DEV_BSIZE;
401 if (S_ISCHR(sbuf.st_mode)) {
402 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
403 ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
404 perror("Could not fetch dev blk/sector size");
405 close(fd);
406 return (1);
407 }
408 assert(size != 0);
409 assert(sectsz != 0);
410 }
411
412 sc = malloc(sizeof(struct pci_vtblk_softc));
413 memset(sc, 0, sizeof(struct pci_vtblk_softc));
414
415 pi->pi_arg = sc;
416 sc->vbsc_pi = pi;
417 sc->vbsc_fd = fd;
418
419 /* setup virtio block config space */
420 sc->vbsc_cfg.vbc_capacity = size / sectsz;
421 sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS;
422 sc->vbsc_cfg.vbc_blk_size = sectsz;
423 sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */
424 sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */
425 sc->vbsc_cfg.vbc_geom_h = 0;
426 sc->vbsc_cfg.vbc_geom_s = 0;
427 sc->vbsc_cfg.vbc_sectors_max = 0;
428
429 /* initialize config space */
430 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
431 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
432 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
433 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
434
435 if ((env_msi = getenv("BHYVE_USE_MSI"))) {
436 if (strcasecmp(env_msi, "yes") == 0)
437 use_msix = 0;
438 }
439
440 if (use_msix) {
441 /* MSI-X Support */
442 sc->msix_table_idx_req = VIRTIO_MSI_NO_VECTOR;
443 sc->msix_table_idx_cfg = VIRTIO_MSI_NO_VECTOR;
444
445 if (pci_emul_add_msixcap(pi, 2, 1))
446 return (1);
447 } else {
448 /* MSI Support */
449 pci_emul_add_msicap(pi, 1);
450 }
451
452 pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTBLK_REGSZ);
453
454 return (0);
455}
456
457static uint64_t
458vtblk_adjust_offset(struct pci_devinst *pi, uint64_t offset)
459{
460 /*
461 * Device specific offsets used by guest would change
462 * based on whether MSI-X capability is enabled or not
463 */
464 if (!pci_msix_enabled(pi)) {
465 if (offset >= VTCFG_R_MSIX)
466 return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX));
467 }
468
469 return (offset);
470}
471
472static void
473pci_vtblk_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
474 int baridx, uint64_t offset, int size, uint64_t value)
475{
476 struct pci_vtblk_softc *sc = pi->pi_arg;
477
478 if (use_msix) {
479 if (baridx == pci_msix_table_bar(pi) ||
480 baridx == pci_msix_pba_bar(pi)) {
481 pci_emul_msix_twrite(pi, offset, size, value);
482 return;
483 }
484 }
485
486 assert(baridx == 0);
487
488 if (offset + size > pci_vtblk_iosize(pi)) {
489 DPRINTF(("vtblk_write: 2big, offset %ld size %d\n",
490 offset, size));
491 return;
492 }
493
494 offset = vtblk_adjust_offset(pi, offset);
495
496 switch (offset) {
497 case VTCFG_R_GUESTCAP:
498 assert(size == 4);
499 sc->vbsc_features = value & VTBLK_S_HOSTCAPS;
500 break;
501 case VTCFG_R_PFN:
502 assert(size == 4);
503 pci_vtblk_ring_init(sc, value);
504 break;
505 case VTCFG_R_QSEL:
506 assert(size == 2);
507 sc->vbsc_lastq = value;
508 break;
509 case VTCFG_R_QNOTIFY:
510 assert(size == 2);
511 assert(value == 0);
512 pci_vtblk_qnotify(sc);
513 break;
514 case VTCFG_R_STATUS:
515 assert(size == 1);
516 pci_vtblk_update_status(sc, value);
517 break;
518 case VTCFG_R_CFGVEC:
519 assert(size == 2);
520 sc->msix_table_idx_cfg = value;
521 break;
522 case VTCFG_R_QVEC:
523 assert(size == 2);
524 sc->msix_table_idx_req = value;
525 break;
526 case VTCFG_R_HOSTCAP:
527 case VTCFG_R_QNUM:
528 case VTCFG_R_ISR:
529 case VTBLK_R_CFG ... VTBLK_R_CFG_END:
530 DPRINTF(("vtblk: write to readonly reg %ld\n\r", offset));
531 break;
532 default:
533 DPRINTF(("vtblk: unknown i/o write offset %ld\n\r", offset));
534 value = 0;
535 break;
536 }
537}
538
539uint64_t
540pci_vtblk_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
541 int baridx, uint64_t offset, int size)
542{
543 struct pci_vtblk_softc *sc = pi->pi_arg;
544 void *ptr;
545 uint32_t value;
546
547 if (use_msix) {
548 if (baridx == pci_msix_table_bar(pi) ||
549 baridx == pci_msix_pba_bar(pi)) {
550 return (pci_emul_msix_tread(pi, offset, size));
551 }
552 }
553
554 assert(baridx == 0);
555
556 if (offset + size > pci_vtblk_iosize(pi)) {
557 DPRINTF(("vtblk_read: 2big, offset %ld size %d\n",
558 offset, size));
559 return (0);
560 }
561
562 offset = vtblk_adjust_offset(pi, offset);
563
564 switch (offset) {
565 case VTCFG_R_HOSTCAP:
566 assert(size == 4);
567 value = VTBLK_S_HOSTCAPS;
568 break;
569 case VTCFG_R_GUESTCAP:
570 assert(size == 4);
571 value = sc->vbsc_features; /* XXX never read ? */
572 break;
573 case VTCFG_R_PFN:
574 assert(size == 4);
575 value = sc->vbsc_pfn >> VRING_PFN;
576 break;
577 case VTCFG_R_QNUM:
578 value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0;
579 break;
580 case VTCFG_R_QSEL:
581 assert(size == 2);
582 value = sc->vbsc_lastq; /* XXX never read ? */
583 break;
584 case VTCFG_R_QNOTIFY:
585 assert(size == 2);
586 value = 0; /* XXX never read ? */
587 break;
588 case VTCFG_R_STATUS:
589 assert(size == 1);
590 value = sc->vbsc_status;
591 break;
592 case VTCFG_R_ISR:
593 assert(size == 1);
594 value = sc->vbsc_isr;
595 sc->vbsc_isr = 0; /* a read clears this flag */
596 break;
597 case VTCFG_R_CFGVEC:
598 assert(size == 2);
599 value = sc->msix_table_idx_cfg;
600 break;
601 case VTCFG_R_QVEC:
602 assert(size == 2);
603 value = sc->msix_table_idx_req;
604 break;
605 case VTBLK_R_CFG ... VTBLK_R_CFG_END:
606 assert(size + offset <= (VTBLK_R_CFG_END + 1));
607 ptr = (uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG;
608 if (size == 1) {
609 value = *(uint8_t *) ptr;
610 } else if (size == 2) {
611 value = *(uint16_t *) ptr;
612 } else {
613 value = *(uint32_t *) ptr;
614 }
615 break;
616 default:
617 DPRINTF(("vtblk: unknown i/o read offset %ld\n\r", offset));
618 value = 0;
619 break;
620 }
621
622 return (value);
623}
624
625struct pci_devemu pci_de_vblk = {
626 .pe_emu = "virtio-blk",
627 .pe_init = pci_vtblk_init,
628 .pe_barwrite = pci_vtblk_write,
629 .pe_barread = pci_vtblk_read
630};
631PCI_EMUL_SET(pci_de_vblk);