1/*- 2 * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/param.h> 33#include <sys/queue.h> 34#include <sys/errno.h> 35#include <sys/stat.h> 36#include <sys/ioctl.h> 37#include <sys/disk.h> 38 39#include <assert.h> 40#include <fcntl.h> 41#include <stdio.h> 42#include <stdlib.h> 43#include <string.h> 44#include <pthread.h> 45#include <pthread_np.h> 46#include <unistd.h> 47 48#include "bhyverun.h" 49#include "block_if.h" 50 51#define BLOCKIF_SIG 0xb109b109 52 53#define BLOCKIF_MAXREQ 16 54 55enum blockop { 56 BOP_READ, 57 BOP_WRITE, 58 BOP_FLUSH, 59 BOP_CANCEL 60}; 61 62enum blockstat { 63 BST_FREE, 64 BST_INUSE 65}; 66 67struct blockif_elem { 68 TAILQ_ENTRY(blockif_elem) be_link; 69 struct blockif_req *be_req; 70 enum blockop be_op; 71 enum blockstat be_status; 72}; 73 74struct blockif_ctxt { 75 int bc_magic; 76 int bc_fd; 77 int bc_rdonly; 78 off_t bc_size; 79 int bc_sectsz; 80 pthread_t bc_btid; 81 pthread_mutex_t bc_mtx; 82 pthread_cond_t bc_cond; 83 int bc_closing; 84 85 /* Request elements and free/inuse queues */ 86 TAILQ_HEAD(, blockif_elem) bc_freeq; 87 TAILQ_HEAD(, blockif_elem) bc_inuseq; 88 u_int bc_req_count; 89 struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; 90}; 91 92static int 93blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, 94 enum blockop op) 95{ 96 struct blockif_elem *be; 97 98 assert(bc->bc_req_count < BLOCKIF_MAXREQ); 99 100 be = TAILQ_FIRST(&bc->bc_freeq); 101 assert(be != NULL); 102 assert(be->be_status == BST_FREE); 103 104 TAILQ_REMOVE(&bc->bc_freeq, be, be_link); 105 be->be_status = BST_INUSE; 106 be->be_req = breq; 107 be->be_op = op; 108 TAILQ_INSERT_TAIL(&bc->bc_inuseq, be, be_link); 109 110 bc->bc_req_count++; 111 112 return (0); 113} 114 115static int 116blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem *el) 117{ 118 struct blockif_elem *be; 119 120 if (bc->bc_req_count == 0) 121 return (ENOENT); 122 123 be = TAILQ_FIRST(&bc->bc_inuseq); 124 assert(be != NULL); 125 assert(be->be_status == BST_INUSE); 126 *el = *be; 127 128 TAILQ_REMOVE(&bc->bc_inuseq, be, be_link); 129 be->be_status = BST_FREE; 130 be->be_req = NULL; 131 TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 132 133 bc->bc_req_count--; 134 135 return (0); 136} 137 138static void 139blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) 140{ 141 struct blockif_req *br; 142 int err; 143 144 br = be->be_req; 145 err = 0; 146 147 switch (be->be_op) { 148 case BOP_READ: 149 if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, 150 br->br_offset) < 0) 151 err = errno; 152 break; 153 case BOP_WRITE: 154 if (bc->bc_rdonly) 155 err = EROFS; 156 else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, 157 br->br_offset) < 0) 158 err = errno; 159 break; 160 case BOP_FLUSH: 161 break; 162 case BOP_CANCEL: 163 err = EINTR; 164 break; 165 default: 166 err = EINVAL; 167 break; 168 } 169 170 (*br->br_callback)(br, err); 171} 172 173static void * 174blockif_thr(void *arg) 175{ 176 struct blockif_ctxt *bc; 177 struct blockif_elem req; 178 179 bc = arg; 180 181 for (;;) { 182 pthread_mutex_lock(&bc->bc_mtx); 183 while (!blockif_dequeue(bc, &req)) { 184 pthread_mutex_unlock(&bc->bc_mtx); 185 blockif_proc(bc, &req); 186 pthread_mutex_lock(&bc->bc_mtx); 187 } 188 pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); 189 pthread_mutex_unlock(&bc->bc_mtx); 190 191 /* 192 * Check ctxt status here to see if exit requested 193 */ 194 if (bc->bc_closing) 195 pthread_exit(NULL); 196 } 197 198 /* Not reached */ 199 return (NULL); 200} 201 202struct blockif_ctxt * 203blockif_open(const char *optstr, const char *ident) 204{ 205 char tname[MAXCOMLEN + 1]; 206 char *nopt, *xopts; 207 struct blockif_ctxt *bc; 208 struct stat sbuf; 209 off_t size; 210 int extra, fd, i, sectsz; 211 int nocache, sync, ro; 212 213 nocache = 0; 214 sync = 0; 215 ro = 0; 216 217 /* 218 * The first element in the optstring is always a pathname. 219 * Optional elements follow 220 */ 221 nopt = strdup(optstr); 222 for (xopts = strtok(nopt, ","); 223 xopts != NULL; 224 xopts = strtok(NULL, ",")) { 225 if (!strcmp(xopts, "nocache")) 226 nocache = 1; 227 else if (!strcmp(xopts, "sync")) 228 sync = 1; 229 else if (!strcmp(xopts, "ro")) 230 ro = 1; 231 } 232 233 extra = 0; 234 if (nocache) 235 extra |= O_DIRECT; 236 if (sync) 237 extra |= O_SYNC; 238 239 fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); 240 if (fd < 0 && !ro) { 241 /* Attempt a r/w fail with a r/o open */ 242 fd = open(nopt, O_RDONLY | extra); 243 ro = 1; 244 } 245 246 if (fd < 0) { 247 perror("Could not open backing file"); 248 return (NULL); 249 } 250 251 if (fstat(fd, &sbuf) < 0) { 252 perror("Could not stat backing file"); 253 close(fd); 254 return (NULL); 255 } 256 257 /* 258 * Deal with raw devices 259 */ 260 size = sbuf.st_size; 261 sectsz = DEV_BSIZE; 262 if (S_ISCHR(sbuf.st_mode)) { 263 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 264 ioctl(fd, DIOCGSECTORSIZE, §sz)) { 265 perror("Could not fetch dev blk/sector size"); 266 close(fd); 267 return (NULL); 268 } 269 assert(size != 0); 270 assert(sectsz != 0); 271 } 272 273 bc = malloc(sizeof(struct blockif_ctxt)); 274 if (bc == NULL) { 275 close(fd); 276 return (NULL); 277 } 278 279 memset(bc, 0, sizeof(*bc)); 280 bc->bc_magic = BLOCKIF_SIG; 281 bc->bc_fd = fd; 282 bc->bc_size = size; 283 bc->bc_sectsz = sectsz; 284 pthread_mutex_init(&bc->bc_mtx, NULL); 285 pthread_cond_init(&bc->bc_cond, NULL); 286 TAILQ_INIT(&bc->bc_freeq); 287 TAILQ_INIT(&bc->bc_inuseq); 288 bc->bc_req_count = 0; 289 for (i = 0; i < BLOCKIF_MAXREQ; i++) { 290 bc->bc_reqs[i].be_status = BST_FREE; 291 TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); 292 } 293 294 pthread_create(&bc->bc_btid, NULL, blockif_thr, bc); 295 296 snprintf(tname, sizeof(tname), "blk-%s", ident); 297 pthread_set_name_np(bc->bc_btid, tname); 298 299 return (bc); 300} 301 302static int 303blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, 304 enum blockop op) 305{ 306 int err; 307 308 err = 0; 309 310 pthread_mutex_lock(&bc->bc_mtx); 311 if (bc->bc_req_count < BLOCKIF_MAXREQ) { 312 /* 313 * Enqueue and inform the block i/o thread 314 * that there is work available 315 */ 316 blockif_enqueue(bc, breq, op); 317 pthread_cond_signal(&bc->bc_cond); 318 } else { 319 /* 320 * Callers are not allowed to enqueue more than 321 * the specified blockif queue limit. Return an 322 * error to indicate that the queue length has been 323 * exceeded. 324 */ 325 err = E2BIG; 326 } 327 pthread_mutex_unlock(&bc->bc_mtx); 328 329 return (err); 330} 331 332int 333blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) 334{ 335 336 assert(bc->bc_magic == BLOCKIF_SIG); 337 return (blockif_request(bc, breq, BOP_READ)); 338} 339 340int 341blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) 342{ 343 344 assert(bc->bc_magic == BLOCKIF_SIG); 345 return (blockif_request(bc, breq, BOP_WRITE)); 346} 347 348int 349blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) 350{ 351 352 assert(bc->bc_magic == BLOCKIF_SIG); 353 return (blockif_request(bc, breq, BOP_FLUSH)); 354} 355 356int 357blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) 358{ 359 360 assert(bc->bc_magic == BLOCKIF_SIG); 361 return (blockif_request(bc, breq, BOP_CANCEL)); 362} 363 364int 365blockif_close(struct blockif_ctxt *bc) 366{ 367 void *jval; 368 int err; 369 370 err = 0; 371 372 assert(bc->bc_magic == BLOCKIF_SIG); 373 374 /* 375 * Stop the block i/o thread 376 */ 377 bc->bc_closing = 1; 378 pthread_cond_signal(&bc->bc_cond); 379 pthread_join(bc->bc_btid, &jval); 380 381 /* XXX Cancel queued i/o's ??? */ 382 383 /* 384 * Release resources 385 */ 386 bc->bc_magic = 0; 387 close(bc->bc_fd); 388 free(bc); 389 390 return (0); 391} 392 393/* 394 * Accessors 395 */ 396off_t 397blockif_size(struct blockif_ctxt *bc) 398{ 399 400 assert(bc->bc_magic == BLOCKIF_SIG); 401 return (bc->bc_size); 402} 403 404int 405blockif_sectsz(struct blockif_ctxt *bc) 406{ 407 408 assert(bc->bc_magic == BLOCKIF_SIG); 409 return (bc->bc_sectsz); 410} 411 412int 413blockif_queuesz(struct blockif_ctxt *bc) 414{ 415 416 assert(bc->bc_magic == BLOCKIF_SIG); 417 return (BLOCKIF_MAXREQ); 418} 419 420int 421blockif_is_ro(struct blockif_ctxt *bc) 422{ 423 424 assert(bc->bc_magic == BLOCKIF_SIG); 425 return (bc->bc_rdonly); 426} 427