pci_virtio_block.c revision 256390
1254721Semaste/*- 2254721Semaste * Copyright (c) 2011 NetApp, Inc. 3254721Semaste * All rights reserved. 4254721Semaste * 5254721Semaste * Redistribution and use in source and binary forms, with or without 6254721Semaste * modification, are permitted provided that the following conditions 7254721Semaste * are met: 8254721Semaste * 1. Redistributions of source code must retain the above copyright 9254721Semaste * notice, this list of conditions and the following disclaimer. 10254721Semaste * 2. Redistributions in binary form must reproduce the above copyright 11254721Semaste * notice, this list of conditions and the following disclaimer in the 12254721Semaste * documentation and/or other materials provided with the distribution. 13254721Semaste * 14254721Semaste * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15254721Semaste * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16254721Semaste * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17254721Semaste * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18254721Semaste * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19254721Semaste * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20254721Semaste * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21254721Semaste * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22254721Semaste * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23254721Semaste * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24254721Semaste * SUCH DAMAGE. 25254721Semaste * 26254721Semaste * $FreeBSD: stable/10/usr.sbin/bhyve/pci_virtio_block.c 256390 2013-10-12 19:41:35Z grehan $ 27254721Semaste */ 28254721Semaste 29254721Semaste#include <sys/cdefs.h> 30254721Semaste__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_virtio_block.c 256390 2013-10-12 19:41:35Z grehan $"); 31254721Semaste 32254721Semaste#include <sys/param.h> 33254721Semaste#include <sys/linker_set.h> 34254721Semaste#include <sys/stat.h> 35254721Semaste#include <sys/uio.h> 36254721Semaste#include <sys/ioctl.h> 37254721Semaste#include <sys/disk.h> 38254721Semaste 39254721Semaste#include <errno.h> 40254721Semaste#include <fcntl.h> 41254721Semaste#include <stdio.h> 42254721Semaste#include <stdlib.h> 43254721Semaste#include <stdint.h> 44254721Semaste#include <string.h> 45254721Semaste#include <strings.h> 46254721Semaste#include <unistd.h> 47254721Semaste#include <assert.h> 48254721Semaste#include <pthread.h> 49254721Semaste#include <md5.h> 50254721Semaste 51254721Semaste#include "bhyverun.h" 52254721Semaste#include "pci_emul.h" 53254721Semaste#include "virtio.h" 54254721Semaste 55254721Semaste#ifndef min 56254721Semaste#define min(a, b) ((a) < (b) ? (a) : (b)) 57254721Semaste#endif 58254721Semaste 59254721Semaste#define VTBLK_RINGSZ 64 60254721Semaste 61254721Semaste#define VTBLK_MAXSEGS 32 62254721Semaste 63254721Semaste#define VTBLK_S_OK 0 64254721Semaste#define VTBLK_S_IOERR 1 65254721Semaste#define VTBLK_S_UNSUPP 2 66254721Semaste 67254721Semaste#define VTBLK_BLK_ID_BYTES 20 68254721Semaste 69254721Semaste/* 70254721Semaste * Host capabilities 71254721Semaste */ 72254721Semaste#define VTBLK_S_HOSTCAPS \ 73254721Semaste ( 0x00000004 | /* host maximum request segments */ \ 74254721Semaste VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */ 75254721Semaste 76254721Semaste/* 77254721Semaste * Config space "registers" 78254721Semaste */ 79254721Semastestruct vtblk_config { 80254721Semaste uint64_t vbc_capacity; 81254721Semaste uint32_t vbc_size_max; 82254721Semaste uint32_t vbc_seg_max; 83254721Semaste uint16_t vbc_geom_c; 84254721Semaste uint8_t vbc_geom_h; 85254721Semaste uint8_t vbc_geom_s; 86254721Semaste uint32_t vbc_blk_size; 87254721Semaste uint32_t vbc_sectors_max; 88254721Semaste} __packed; 89254721Semaste 90254721Semaste/* 91254721Semaste * Fixed-size block header 92254721Semaste */ 93254721Semastestruct virtio_blk_hdr { 94254721Semaste#define VBH_OP_READ 0 95254721Semaste#define VBH_OP_WRITE 1 96254721Semaste#define VBH_OP_IDENT 8 97254721Semaste#define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */ 98254721Semaste uint32_t vbh_type; 99254721Semaste uint32_t vbh_ioprio; 100254721Semaste uint64_t vbh_sector; 101254721Semaste} __packed; 102254721Semaste 103254721Semaste/* 104254721Semaste * Debug printf 105254721Semaste */ 106254721Semastestatic int pci_vtblk_debug; 107254721Semaste#define DPRINTF(params) if (pci_vtblk_debug) printf params 108254721Semaste#define WPRINTF(params) printf params 109254721Semaste 110254721Semaste/* 111254721Semaste * Per-device softc 112254721Semaste */ 113254721Semastestruct pci_vtblk_softc { 114254721Semaste struct virtio_softc vbsc_vs; 115254721Semaste struct vqueue_info vbsc_vq; 116254721Semaste int vbsc_fd; 117254721Semaste struct vtblk_config vbsc_cfg; 118254721Semaste char vbsc_ident[VTBLK_BLK_ID_BYTES]; 119254721Semaste}; 120254721Semaste 121254721Semastestatic void pci_vtblk_reset(void *); 122254721Semastestatic void pci_vtblk_notify(void *, struct vqueue_info *); 123254721Semastestatic int pci_vtblk_cfgread(void *, int, int, uint32_t *); 124254721Semastestatic int pci_vtblk_cfgwrite(void *, int, int, uint32_t); 125254721Semaste 126254721Semastestatic struct virtio_consts vtblk_vi_consts = { 127254721Semaste "vtblk", /* our name */ 128254721Semaste 1, /* we support 1 virtqueue */ 129254721Semaste sizeof(struct vtblk_config), /* config reg size */ 130254721Semaste pci_vtblk_reset, /* reset */ 131254721Semaste pci_vtblk_notify, /* device-wide qnotify */ 132254721Semaste pci_vtblk_cfgread, /* read PCI config */ 133254721Semaste pci_vtblk_cfgwrite, /* write PCI config */ 134254721Semaste VTBLK_S_HOSTCAPS, /* our capabilities */ 135254721Semaste}; 136254721Semaste 137254721Semastestatic void 138254721Semastepci_vtblk_reset(void *vsc) 139254721Semaste{ 140254721Semaste struct pci_vtblk_softc *sc = vsc; 141254721Semaste 142254721Semaste DPRINTF(("vtblk: device reset requested !\n")); 143254721Semaste vi_reset_dev(&sc->vbsc_vs); 144254721Semaste} 145254721Semaste 146254721Semastestatic void 147254721Semastepci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) 148254721Semaste{ 149254721Semaste struct virtio_blk_hdr *vbh; 150254721Semaste uint8_t *status; 151254721Semaste int i, n; 152254721Semaste int err; 153254721Semaste int iolen; 154254721Semaste int writeop, type; 155254721Semaste off_t offset; 156254721Semaste struct iovec iov[VTBLK_MAXSEGS + 2]; 157254721Semaste uint16_t flags[VTBLK_MAXSEGS + 2]; 158254721Semaste 159254721Semaste n = vq_getchain(vq, iov, VTBLK_MAXSEGS + 2, flags); 160254721Semaste 161254721Semaste /* 162254721Semaste * The first descriptor will be the read-only fixed header, 163254721Semaste * and the last is for status (hence +2 above and below). 164254721Semaste * The remaining iov's are the actual data I/O vectors. 165254721Semaste * 166254721Semaste * XXX - note - this fails on crash dump, which does a 167254721Semaste * VIRTIO_BLK_T_FLUSH with a zero transfer length 168254721Semaste */ 169254721Semaste assert(n >= 2 && n <= VTBLK_MAXSEGS + 2); 170254721Semaste 171254721Semaste assert((flags[0] & VRING_DESC_F_WRITE) == 0); 172254721Semaste assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr)); 173254721Semaste vbh = iov[0].iov_base; 174254721Semaste 175254721Semaste status = iov[--n].iov_base; 176254721Semaste assert(iov[n].iov_len == 1); 177254721Semaste assert(flags[n] & VRING_DESC_F_WRITE); 178254721Semaste 179254721Semaste /* 180254721Semaste * XXX 181254721Semaste * The guest should not be setting the BARRIER flag because 182254721Semaste * we don't advertise the capability. 183254721Semaste */ 184254721Semaste type = vbh->vbh_type & ~VBH_FLAG_BARRIER; 185254721Semaste writeop = (type == VBH_OP_WRITE); 186254721Semaste 187254721Semaste offset = vbh->vbh_sector * DEV_BSIZE; 188254721Semaste 189254721Semaste iolen = 0; 190254721Semaste for (i = 1; i < n; i++) { 191254721Semaste /* 192254721Semaste * - write op implies read-only descriptor, 193254721Semaste * - read/ident op implies write-only descriptor, 194254721Semaste * therefore test the inverse of the descriptor bit 195254721Semaste * to the op. 196254721Semaste */ 197254721Semaste assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop); 198254721Semaste iolen += iov[i].iov_len; 199254721Semaste } 200254721Semaste 201254721Semaste DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", 202254721Semaste writeop ? "write" : "read/ident", iolen, i - 1, offset)); 203254721Semaste 204254721Semaste switch (type) { 205254721Semaste case VBH_OP_WRITE: 206254721Semaste err = pwritev(sc->vbsc_fd, iov + 1, i - 1, offset); 207254721Semaste break; 208254721Semaste case VBH_OP_READ: 209254721Semaste err = preadv(sc->vbsc_fd, iov + 1, i - 1, offset); 210254721Semaste break; 211254721Semaste case VBH_OP_IDENT: 212254721Semaste /* Assume a single buffer */ 213254721Semaste strlcpy(iov[1].iov_base, sc->vbsc_ident, 214254721Semaste min(iov[1].iov_len, sizeof(sc->vbsc_ident))); 215254721Semaste err = 0; 216254721Semaste break; 217254721Semaste default: 218254721Semaste err = -ENOSYS; 219254721Semaste break; 220254721Semaste } 221254721Semaste 222254721Semaste /* convert errno into a virtio block error return */ 223254721Semaste if (err < 0) { 224254721Semaste if (err == -ENOSYS) 225254721Semaste *status = VTBLK_S_UNSUPP; 226254721Semaste else 227254721Semaste *status = VTBLK_S_IOERR; 228254721Semaste } else 229254721Semaste *status = VTBLK_S_OK; 230254721Semaste 231254721Semaste /* 232254721Semaste * Return the descriptor back to the host. 233254721Semaste * We wrote 1 byte (our status) to host. 234254721Semaste */ 235254721Semaste vq_relchain(vq, 1); 236254721Semaste} 237254721Semaste 238254721Semastestatic void 239254721Semastepci_vtblk_notify(void *vsc, struct vqueue_info *vq) 240254721Semaste{ 241254721Semaste struct pci_vtblk_softc *sc = vsc; 242254721Semaste 243254721Semaste vq_startchains(vq); 244254721Semaste while (vq_has_descs(vq)) 245254721Semaste pci_vtblk_proc(sc, vq); 246254721Semaste vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ 247254721Semaste} 248254721Semaste 249254721Semastestatic int 250254721Semastepci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 251254721Semaste{ 252254721Semaste struct stat sbuf; 253254721Semaste MD5_CTX mdctx; 254254721Semaste u_char digest[16]; 255254721Semaste struct pci_vtblk_softc *sc; 256254721Semaste off_t size; 257254721Semaste int fd; 258254721Semaste int sectsz; 259254721Semaste int use_msix; 260254721Semaste const char *env_msi; 261254721Semaste 262254721Semaste if (opts == NULL) { 263254721Semaste printf("virtio-block: backing device required\n"); 264254721Semaste return (1); 265254721Semaste } 266254721Semaste 267254721Semaste /* 268254721Semaste * The supplied backing file has to exist 269254721Semaste */ 270254721Semaste fd = open(opts, O_RDWR); 271254721Semaste if (fd < 0) { 272254721Semaste perror("Could not open backing file"); 273254721Semaste return (1); 274254721Semaste } 275269024Semaste 276269024Semaste if (fstat(fd, &sbuf) < 0) { 277254721Semaste perror("Could not stat backing file"); 278254721Semaste close(fd); 279254721Semaste return (1); 280254721Semaste } 281254721Semaste 282254721Semaste /* 283254721Semaste * Deal with raw devices 284254721Semaste */ 285254721Semaste size = sbuf.st_size; 286254721Semaste sectsz = DEV_BSIZE; 287254721Semaste if (S_ISCHR(sbuf.st_mode)) { 288254721Semaste if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 289254721Semaste ioctl(fd, DIOCGSECTORSIZE, §sz)) { 290254721Semaste perror("Could not fetch dev blk/sector size"); 291254721Semaste close(fd); 292254721Semaste return (1); 293254721Semaste } 294263363Semaste assert(size != 0); 295263363Semaste assert(sectsz != 0); 296263363Semaste } 297263363Semaste 298269024Semaste sc = malloc(sizeof(struct pci_vtblk_softc)); 299269024Semaste memset(sc, 0, sizeof(struct pci_vtblk_softc)); 300263363Semaste 301269024Semaste /* record fd of storage device/file */ 302263363Semaste sc->vbsc_fd = fd; 303263363Semaste 304263363Semaste /* init virtio softc and virtqueues */ 305263363Semaste vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq); 306263363Semaste sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ; 307263363Semaste /* sc->vbsc_vq.vq_notify = we have no per-queue notify */ 308254721Semaste 309254721Semaste /* 310254721Semaste * Create an identifier for the backing file. Use parts of the 311254721Semaste * md5 sum of the filename 312254721Semaste */ 313254721Semaste MD5Init(&mdctx); 314254721Semaste MD5Update(&mdctx, opts, strlen(opts)); 315254721Semaste MD5Final(digest, &mdctx); 316254721Semaste sprintf(sc->vbsc_ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X", 317254721Semaste digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); 318254721Semaste 319254721Semaste /* setup virtio block config space */ 320254721Semaste sc->vbsc_cfg.vbc_capacity = size / sectsz; 321269024Semaste sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS; 322254721Semaste sc->vbsc_cfg.vbc_blk_size = sectsz; 323254721Semaste sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ 324254721Semaste sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */ 325254721Semaste sc->vbsc_cfg.vbc_geom_h = 0; 326254721Semaste sc->vbsc_cfg.vbc_geom_s = 0; 327254721Semaste sc->vbsc_cfg.vbc_sectors_max = 0; 328254721Semaste 329254721Semaste /* 330254721Semaste * Should we move some of this into virtio.c? Could 331254721Semaste * have the device, class, and subdev_0 as fields in 332254721Semaste * the virtio constants structure. 333254721Semaste */ 334254721Semaste pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); 335254721Semaste pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 336254721Semaste pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); 337254721Semaste pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); 338254721Semaste 339254721Semaste use_msix = 1; 340254721Semaste if ((env_msi = getenv("BHYVE_USE_MSI"))) { 341254721Semaste if (strcasecmp(env_msi, "yes") == 0) 342254721Semaste use_msix = 0; 343254721Semaste } 344254721Semaste if (vi_intr_init(&sc->vbsc_vs, 1, use_msix)) 345254721Semaste return (1); 346254721Semaste vi_set_io_bar(&sc->vbsc_vs, 0); 347254721Semaste return (0); 348254721Semaste} 349254721Semaste 350254721Semastestatic int 351254721Semastepci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value) 352254721Semaste{ 353254721Semaste 354254721Semaste DPRINTF(("vtblk: write to readonly reg %d\n\r", offset)); 355254721Semaste return (1); 356254721Semaste} 357254721Semaste 358254721Semastestatic int 359254721Semastepci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval) 360254721Semaste{ 361254721Semaste struct pci_vtblk_softc *sc = vsc; 362254721Semaste void *ptr; 363254721Semaste 364254721Semaste /* our caller has already verified offset and size */ 365254721Semaste ptr = (uint8_t *)&sc->vbsc_cfg + offset; 366254721Semaste memcpy(retval, ptr, size); 367254721Semaste return (0); 368254721Semaste} 369254721Semaste 370254721Semastestruct pci_devemu pci_de_vblk = { 371254721Semaste .pe_emu = "virtio-blk", 372254721Semaste .pe_init = pci_vtblk_init, 373254721Semaste .pe_barwrite = vi_pci_write, 374 .pe_barread = vi_pci_read 375}; 376PCI_EMUL_SET(pci_de_vblk); 377