pci_virtio_block.c revision 241744
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/param.h> 33#include <sys/linker_set.h> 34#include <sys/stat.h> 35#include <sys/uio.h> 36#include <sys/ioctl.h> 37 38#include <errno.h> 39#include <fcntl.h> 40#include <stdio.h> 41#include <stdlib.h> 42#include <stdint.h> 43#include <string.h> 44#include <strings.h> 45#include <unistd.h> 46#include <assert.h> 47#include <pthread.h> 48 49#include "fbsdrun.h" 50#include "pci_emul.h" 51#include "virtio.h" 52 53#define VTBLK_RINGSZ 64 54 55#define VTBLK_CFGSZ 28 56 57#define VTBLK_R_CFG VTCFG_R_CFG0 58#define VTBLK_R_CFG_END VTBLK_R_CFG + VTBLK_CFGSZ -1 59#define VTBLK_R_MAX VTBLK_R_CFG_END 60 61#define VTBLK_REGSZ VTBLK_R_MAX+1 62 63#define VTBLK_MAXSEGS 32 64 65#define VTBLK_S_OK 0 66#define VTBLK_S_IOERR 1 67 68/* 69 * Host capabilities 70 */ 71#define VTBLK_S_HOSTCAPS \ 72 ( 0x00000004 | /* host maximum request segments */ \ 73 0x10000000 ) /* supports indirect descriptors */ 74 75struct vring_hqueue { 76 /* Internal state */ 77 uint16_t hq_size; 78 uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */ 79 80 /* Host-context pointers to the queue */ 81 struct virtio_desc *hq_dtable; 82 uint16_t *hq_avail_flags; 83 uint16_t *hq_avail_idx; /* monotonically increasing */ 84 uint16_t *hq_avail_ring; 85 86 uint16_t *hq_used_flags; 87 uint16_t *hq_used_idx; /* monotonically increasing */ 88 struct virtio_used *hq_used_ring; 89}; 90 91/* 92 * Config space 93 */ 94struct vtblk_config { 95 uint64_t vbc_capacity; 96 uint32_t vbc_size_max; 97 uint32_t vbc_seg_max; 98 uint16_t vbc_geom_c; 99 uint8_t vbc_geom_h; 100 uint8_t vbc_geom_s; 101 uint32_t vbc_blk_size; 102 uint32_t vbc_sectors_max; 103} __packed; 104CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ); 105 106/* 107 * Fixed-size block header 108 */ 109struct virtio_blk_hdr { 110#define VBH_OP_READ 0 111#define VBH_OP_WRITE 1 112 uint32_t vbh_type; 113 uint32_t vbh_ioprio; 114 uint64_t vbh_sector; 115} __packed; 116 117/* 118 * Debug printf 119 */ 120static int pci_vtblk_debug; 121#define DPRINTF(params) if (pci_vtblk_debug) printf params 122#define WPRINTF(params) printf params 123 124/* 125 * Per-device softc 126 */ 127struct pci_vtblk_softc { 128 struct pci_devinst *vbsc_pi; 129 int vbsc_fd; 130 int vbsc_status; 131 int vbsc_isr; 132 int vbsc_lastq; 133 uint32_t vbsc_features; 134 uint64_t vbsc_pfn; 135 struct vring_hqueue vbsc_q; 136 struct vtblk_config vbsc_cfg; 137}; 138 139/* 140 * Return the number of available descriptors in the vring taking care 141 * of the 16-bit index wraparound. 142 */ 143static int 144hq_num_avail(struct vring_hqueue *hq) 145{ 146 int ndesc; 147 148 if (*hq->hq_avail_idx >= hq->hq_cur_aidx) 149 ndesc = *hq->hq_avail_idx - hq->hq_cur_aidx; 150 else 151 ndesc = UINT16_MAX - hq->hq_cur_aidx + *hq->hq_avail_idx + 1; 152 153 assert(ndesc >= 0 && ndesc <= hq->hq_size); 154 155 return (ndesc); 156} 157 158static void 159pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value) 160{ 161 if (value == 0) { 162 DPRINTF(("vtblk: device reset requested !\n")); 163 } 164 165 sc->vbsc_status = value; 166} 167 168static void 169pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq) 170{ 171 struct iovec iov[VTBLK_MAXSEGS]; 172 struct virtio_blk_hdr *vbh; 173 struct virtio_desc *vd, *vid; 174 struct virtio_used *vu; 175 uint8_t *status; 176 int i; 177 int err; 178 int iolen; 179 int nsegs; 180 int uidx, aidx, didx; 181 int writeop; 182 off_t offset; 183 184 uidx = *hq->hq_used_idx; 185 aidx = hq->hq_cur_aidx; 186 didx = hq->hq_avail_ring[aidx % hq->hq_size]; 187 assert(didx >= 0 && didx < hq->hq_size); 188 189 vd = &hq->hq_dtable[didx]; 190 191 /* 192 * Verify that the descriptor is indirect, and obtain 193 * the pointer to the indirect descriptor. 194 * There has to be space for at least 3 descriptors 195 * in the indirect descriptor array: the block header, 196 * 1 or more data descriptors, and a status byte. 197 */ 198 assert(vd->vd_flags & VRING_DESC_F_INDIRECT); 199 200 nsegs = vd->vd_len / sizeof(struct virtio_desc); 201 assert(nsegs >= 3); 202 assert(nsegs < VTBLK_MAXSEGS + 2); 203 204 vid = paddr_guest2host(vd->vd_addr); 205 assert((vid->vd_flags & VRING_DESC_F_INDIRECT) == 0); 206 207 /* 208 * The first descriptor will be the read-only fixed header 209 */ 210 vbh = paddr_guest2host(vid[0].vd_addr); 211 assert(vid[0].vd_len == sizeof(struct virtio_blk_hdr)); 212 assert(vid[0].vd_flags & VRING_DESC_F_NEXT); 213 assert((vid[0].vd_flags & VRING_DESC_F_WRITE) == 0); 214 215 writeop = (vbh->vbh_type == VBH_OP_WRITE); 216 217 offset = vbh->vbh_sector * DEV_BSIZE; 218 219 /* 220 * Build up the iovec based on the guest's data descriptors 221 */ 222 for (i = 1, iolen = 0; i < nsegs - 1; i++) { 223 iov[i-1].iov_base = paddr_guest2host(vid[i].vd_addr); 224 iov[i-1].iov_len = vid[i].vd_len; 225 iolen += vid[i].vd_len; 226 227 assert(vid[i].vd_flags & VRING_DESC_F_NEXT); 228 assert((vid[i].vd_flags & VRING_DESC_F_INDIRECT) == 0); 229 230 /* 231 * - write op implies read-only descriptor, 232 * - read op implies write-only descriptor, 233 * therefore test the inverse of the descriptor bit 234 * to the op. 235 */ 236 assert(((vid[i].vd_flags & VRING_DESC_F_WRITE) == 0) == 237 writeop); 238 } 239 240 /* Lastly, get the address of the status byte */ 241 status = paddr_guest2host(vid[nsegs - 1].vd_addr); 242 assert(vid[nsegs - 1].vd_len == 1); 243 assert((vid[nsegs - 1].vd_flags & VRING_DESC_F_NEXT) == 0); 244 assert(vid[nsegs - 1].vd_flags & VRING_DESC_F_WRITE); 245 246 DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", 247 writeop ? "write" : "read", iolen, nsegs - 2, offset)); 248 249 if (writeop){ 250 err = pwritev(sc->vbsc_fd, iov, nsegs - 2, offset); 251 } else { 252 err = preadv(sc->vbsc_fd, iov, nsegs - 2, offset); 253 } 254 255 *status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK; 256 257 /* 258 * Return the single indirect descriptor back to the host 259 */ 260 vu = &hq->hq_used_ring[uidx % hq->hq_size]; 261 vu->vu_idx = didx; 262 vu->vu_tlen = 1; 263 hq->hq_cur_aidx++; 264 *hq->hq_used_idx += 1; 265} 266 267static void 268pci_vtblk_qnotify(struct pci_vtblk_softc *sc) 269{ 270 struct vring_hqueue *hq = &sc->vbsc_q; 271 int i; 272 int ndescs; 273 274 /* 275 * Calculate number of ring entries to process 276 */ 277 ndescs = hq_num_avail(hq); 278 279 if (ndescs == 0) 280 return; 281 282 /* 283 * Run through all the entries, placing them into iovecs and 284 * sending when an end-of-packet is found 285 */ 286 for (i = 0; i < ndescs; i++) 287 pci_vtblk_proc(sc, hq); 288 289 /* 290 * Generate an interrupt if able 291 */ 292 if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0 && 293 sc->vbsc_isr == 0) { 294 sc->vbsc_isr = 1; 295 pci_generate_msi(sc->vbsc_pi, 0); 296 } 297 298} 299 300static void 301pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn) 302{ 303 struct vring_hqueue *hq; 304 305 sc->vbsc_pfn = pfn << VRING_PFN; 306 307 /* 308 * Set up host pointers to the various parts of the 309 * queue 310 */ 311 hq = &sc->vbsc_q; 312 hq->hq_size = VTBLK_RINGSZ; 313 314 hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN); 315 hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size); 316 hq->hq_avail_idx = hq->hq_avail_flags + 1; 317 hq->hq_avail_ring = hq->hq_avail_flags + 2; 318 hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring, 319 VRING_ALIGN); 320 hq->hq_used_idx = hq->hq_used_flags + 1; 321 hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2); 322 323 /* 324 * Initialize queue indexes 325 */ 326 hq->hq_cur_aidx = 0; 327} 328 329static int 330pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 331{ 332 struct stat sbuf; 333 struct pci_vtblk_softc *sc; 334 int fd; 335 336 if (opts == NULL) { 337 printf("virtio-block: backing device required\n"); 338 return (1); 339 } 340 341 /* 342 * Access to guest memory is required. Fail if 343 * memory not mapped 344 */ 345 if (paddr_guest2host(0) == NULL) 346 return (1); 347 348 /* 349 * The supplied backing file has to exist 350 */ 351 fd = open(opts, O_RDWR); 352 if (fd < 0) { 353 perror("Could not open backing file"); 354 return (1); 355 } 356 357 if (fstat(fd, &sbuf) < 0) { 358 perror("Could not stat backing file"); 359 close(fd); 360 return (1); 361 } 362 363 sc = malloc(sizeof(struct pci_vtblk_softc)); 364 memset(sc, 0, sizeof(struct pci_vtblk_softc)); 365 366 pi->pi_arg = sc; 367 sc->vbsc_pi = pi; 368 sc->vbsc_fd = fd; 369 370 /* setup virtio block config space */ 371 sc->vbsc_cfg.vbc_capacity = sbuf.st_size / DEV_BSIZE; 372 sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS; 373 sc->vbsc_cfg.vbc_blk_size = DEV_BSIZE; 374 sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ 375 sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */ 376 sc->vbsc_cfg.vbc_geom_h = 0; 377 sc->vbsc_cfg.vbc_geom_s = 0; 378 sc->vbsc_cfg.vbc_sectors_max = 0; 379 380 /* initialize config space */ 381 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); 382 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 383 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); 384 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); 385 pci_emul_add_msicap(pi, 1); 386 pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTBLK_REGSZ); 387 388 return (0); 389} 390 391static void 392pci_vtblk_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 393 int baridx, uint64_t offset, int size, uint64_t value) 394{ 395 struct pci_vtblk_softc *sc = pi->pi_arg; 396 397 assert(baridx == 0); 398 399 if (offset + size > VTBLK_REGSZ) { 400 DPRINTF(("vtblk_write: 2big, offset %ld size %d\n", 401 offset, size)); 402 return; 403 } 404 405 switch (offset) { 406 case VTCFG_R_GUESTCAP: 407 assert(size == 4); 408 sc->vbsc_features = value & VTBLK_S_HOSTCAPS; 409 break; 410 case VTCFG_R_PFN: 411 assert(size == 4); 412 pci_vtblk_ring_init(sc, value); 413 break; 414 case VTCFG_R_QSEL: 415 assert(size == 2); 416 sc->vbsc_lastq = value; 417 break; 418 case VTCFG_R_QNOTIFY: 419 assert(size == 2); 420 assert(value == 0); 421 pci_vtblk_qnotify(sc); 422 break; 423 case VTCFG_R_STATUS: 424 assert(size == 1); 425 pci_vtblk_update_status(sc, value); 426 break; 427 case VTCFG_R_HOSTCAP: 428 case VTCFG_R_QNUM: 429 case VTCFG_R_ISR: 430 case VTBLK_R_CFG ... VTBLK_R_CFG_END: 431 DPRINTF(("vtblk: write to readonly reg %ld\n\r", offset)); 432 break; 433 default: 434 DPRINTF(("vtblk: unknown i/o write offset %ld\n\r", offset)); 435 value = 0; 436 break; 437 } 438} 439 440uint64_t 441pci_vtblk_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 442 int baridx, uint64_t offset, int size) 443{ 444 struct pci_vtblk_softc *sc = pi->pi_arg; 445 void *ptr; 446 uint32_t value; 447 448 assert(baridx == 0); 449 450 if (offset + size > VTBLK_REGSZ) { 451 DPRINTF(("vtblk_read: 2big, offset %ld size %d\n", 452 offset, size)); 453 return (0); 454 } 455 456 switch (offset) { 457 case VTCFG_R_HOSTCAP: 458 assert(size == 4); 459 value = VTBLK_S_HOSTCAPS; 460 break; 461 case VTCFG_R_GUESTCAP: 462 assert(size == 4); 463 value = sc->vbsc_features; /* XXX never read ? */ 464 break; 465 case VTCFG_R_PFN: 466 assert(size == 4); 467 value = sc->vbsc_pfn >> VRING_PFN; 468 break; 469 case VTCFG_R_QNUM: 470 value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0; 471 break; 472 case VTCFG_R_QSEL: 473 assert(size == 2); 474 value = sc->vbsc_lastq; /* XXX never read ? */ 475 break; 476 case VTCFG_R_QNOTIFY: 477 assert(size == 2); 478 value = 0; /* XXX never read ? */ 479 break; 480 case VTCFG_R_STATUS: 481 assert(size == 1); 482 value = sc->vbsc_status; 483 break; 484 case VTCFG_R_ISR: 485 assert(size == 1); 486 value = sc->vbsc_isr; 487 sc->vbsc_isr = 0; /* a read clears this flag */ 488 break; 489 case VTBLK_R_CFG ... VTBLK_R_CFG_END: 490 assert(size + offset <= (VTBLK_R_CFG_END + 1)); 491 ptr = (uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG; 492 if (size == 1) { 493 value = *(uint8_t *) ptr; 494 } else if (size == 2) { 495 value = *(uint16_t *) ptr; 496 } else { 497 value = *(uint32_t *) ptr; 498 } 499 break; 500 default: 501 DPRINTF(("vtblk: unknown i/o read offset %ld\n\r", offset)); 502 value = 0; 503 break; 504 } 505 506 return (value); 507} 508 509struct pci_devemu pci_de_vblk = { 510 .pe_emu = "virtio-blk", 511 .pe_init = pci_vtblk_init, 512 .pe_barwrite = pci_vtblk_write, 513 .pe_barread = pci_vtblk_read 514}; 515PCI_EMUL_SET(pci_de_vblk); 516