1/* Minimal block driver for Mini-OS. 2 * Copyright (c) 2007-2008 Samuel Thibault. 3 * Based on netfront.c. 4 */ 5 6#include <mini-os/os.h> 7#include <mini-os/xenbus.h> 8#include <mini-os/events.h> 9#include <mini-os/gnttab.h> 10#include <mini-os/blkfront.h> 11 12#include <xen/io/blkif.h> 13#include <xen/io/protocols.h> 14 15#include <bmk-core/errno.h> 16#include <bmk-core/memalloc.h> 17#include <bmk-core/pgalloc.h> 18#include <bmk-core/printf.h> 19#include <bmk-core/string.h> 20 21/* SHARED_RING_INIT() uses memset() */ 22#define memset(a,b,c) bmk_memset(a,b,c) 23 24/* Note: we generally don't need to disable IRQs since we hardly do anything in 25 * the interrupt handler. */ 26 27/* Note: we really suppose non-preemptive threads. */ 28 29DECLARE_WAIT_QUEUE_HEAD(blkfront_queue); 30 31#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) 32#define GRANT_INVALID_REF 0 33 34struct blk_buffer { 35 void* page; 36 grant_ref_t gref; 37}; 38 39struct blkfront_dev { 40 domid_t dom; 41 42 struct blkif_front_ring ring; 43 grant_ref_t ring_ref; 44 evtchn_port_t evtchn; 45 blkif_vdev_t handle; 46 47 char nodename[64]; 48 char *backend; 49 struct blkfront_info info; 50 51 struct xenbus_event_queue events; 52 53}; 54 55void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) 56{ 57 minios_wake_up(&blkfront_queue); 58} 59 60static void free_blkfront(struct blkfront_dev *dev) 61{ 62 minios_mask_evtchn(dev->evtchn); 63 64 bmk_memfree(dev->backend, BMK_MEMWHO_WIREDBMK); 65 66 gnttab_end_access(dev->ring_ref); 67 bmk_pgfree_one(dev->ring.sring); 68 69 minios_unbind_evtchn(dev->evtchn); 70 71 bmk_memfree(dev, BMK_MEMWHO_WIREDBMK); 72} 73 74struct blkfront_dev *blkfront_init(char *_nodename, struct blkfront_info *info) 75{ 76 xenbus_transaction_t xbt; 77 char* err; 78 char* message=NULL; 79 struct blkif_sring *s; 80 int retry=0; 81 char* msg = NULL; 82 char* c; 83 char* nodename = _nodename ? _nodename : "device/vbd/768"; 84 unsigned long len; 85 86 struct blkfront_dev *dev; 87 88 char path[bmk_strlen(nodename) + 1 + 10 + 1]; 89 90 dev = bmk_memcalloc(1, sizeof(*dev), BMK_MEMWHO_WIREDBMK); 91 bmk_strncpy(dev->nodename, nodename, sizeof(dev->nodename)-1); 92 93 bmk_snprintf(path, sizeof(path), "%s/backend-id", nodename); 94 dev->dom = xenbus_read_integer(path); 95 minios_evtchn_alloc_unbound(dev->dom, blkfront_handler, dev, &dev->evtchn); 96 97 s = bmk_pgalloc_one(); 98 bmk_memset(s,0,PAGE_SIZE); 99 100 101 SHARED_RING_INIT(s); 102 FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE); 103 104 dev->ring_ref = gnttab_grant_access(dev->dom,virt_to_mfn(s),0); 105 106 xenbus_event_queue_init(&dev->events); 107 108again: 109 err = xenbus_transaction_start(&xbt); 110 if (err) { 111 minios_printk("starting transaction\n"); 112 bmk_memfree(err, BMK_MEMWHO_WIREDBMK); 113 } 114 115 err = xenbus_printf(xbt, nodename, "ring-ref","%u", 116 dev->ring_ref); 117 if (err) { 118 message = "writing ring-ref"; 119 goto abort_transaction; 120 } 121 err = xenbus_printf(xbt, nodename, 122 "event-channel", "%u", dev->evtchn); 123 if (err) { 124 message = "writing event-channel"; 125 goto abort_transaction; 126 } 127 err = xenbus_printf(xbt, nodename, 128 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); 129 if (err) { 130 message = "writing protocol"; 131 goto abort_transaction; 132 } 133 134 bmk_snprintf(path, sizeof(path), "%s/state", nodename); 135 err = xenbus_switch_state(xbt, path, XenbusStateConnected); 136 if (err) { 137 message = "switching state"; 138 goto abort_transaction; 139 } 140 141 142 err = xenbus_transaction_end(xbt, 0, &retry); 143 if (err) bmk_memfree(err, BMK_MEMWHO_WIREDBMK); 144 if (retry) { 145 goto again; 146 minios_printk("completing transaction\n"); 147 } 148 149 goto done; 150 151abort_transaction: 152 bmk_memfree(err, BMK_MEMWHO_WIREDBMK); 153 err = xenbus_transaction_end(xbt, 1, &retry); 154 minios_printk("Abort transaction %s\n", message); 155 goto error; 156 157done: 158 159 bmk_snprintf(path, sizeof(path), "%s/backend", nodename); 160 msg = xenbus_read(XBT_NIL, path, &dev->backend); 161 if (msg) { 162 minios_printk("Error %s when reading the backend path %s\n", msg, path); 163 goto error; 164 } 165 166 minios_printk("blkfront: node=%s backend=%s\n", nodename, dev->backend); 167 168 len = bmk_strlen(nodename); 169 dev->handle = bmk_strtoul((char *)bmk_memrchr(nodename+len, '/', len)+1, NULL, 10); 170 171 { 172 XenbusState state; 173 char path[bmk_strlen(dev->backend) + 1 + 19 + 1]; 174 bmk_snprintf(path, sizeof(path), "%s/mode", dev->backend); 175 msg = xenbus_read(XBT_NIL, path, &c); 176 if (msg) { 177 minios_printk("Error %s when reading the mode\n", msg); 178 goto error; 179 } 180 if (*c == 'w') 181 dev->info.mode = BLKFRONT_RDWR; 182 else 183 dev->info.mode = BLKFRONT_RDONLY; 184 bmk_memfree(c, BMK_MEMWHO_WIREDBMK); 185 186 bmk_snprintf(path, sizeof(path), "%s/state", dev->backend); 187 188 xenbus_watch_path_token(XBT_NIL, path, path, &dev->events); 189 190 msg = NULL; 191 state = xenbus_read_integer(path); 192 while (msg == NULL && state < XenbusStateConnected) 193 msg = xenbus_wait_for_state_change(path, &state, &dev->events); 194 if (msg != NULL || state != XenbusStateConnected) { 195 minios_printk("backend not available, state=%d\n", state); 196 xenbus_unwatch_path_token(XBT_NIL, path, path); 197 goto error; 198 } 199 200 bmk_snprintf(path, sizeof(path), "%s/info", dev->backend); 201 dev->info.info = xenbus_read_integer(path); 202 203 bmk_snprintf(path, sizeof(path), "%s/sectors", dev->backend); 204 // FIXME: read_integer returns an int, so disk size limited to 1TB for now 205 dev->info.sectors = xenbus_read_integer(path); 206 207 bmk_snprintf(path, sizeof(path), "%s/sector-size", dev->backend); 208 dev->info.sector_size = xenbus_read_integer(path); 209 210 bmk_snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend); 211 dev->info.barrier = xenbus_read_integer(path); 212 213 bmk_snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend); 214 dev->info.flush = xenbus_read_integer(path); 215 216 *info = dev->info; 217 } 218 minios_unmask_evtchn(dev->evtchn); 219 220 minios_printk("blkfront: %u sectors\n", dev->info.sectors); 221 222 return dev; 223 224error: 225 bmk_memfree(msg, BMK_MEMWHO_WIREDBMK); 226 bmk_memfree(err, BMK_MEMWHO_WIREDBMK); 227 free_blkfront(dev); 228 return NULL; 229} 230 231void blkfront_shutdown(struct blkfront_dev *dev) 232{ 233 char* err = NULL; 234 XenbusState state; 235 236 char path[bmk_strlen(dev->backend) + 1 + 5 + 1]; 237 char nodename[bmk_strlen(dev->nodename) + 1 + 5 + 1]; 238 239 blkfront_sync(dev); 240 241 minios_printk("blkfront detached: node=%s\n", dev->nodename); 242 243 bmk_snprintf(path, sizeof(path), "%s/state", dev->backend); 244 bmk_snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); 245 246 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosing)) != NULL) { 247 minios_printk("shutdown_blkfront: error changing state to %d: %s\n", 248 XenbusStateClosing, err); 249 goto close; 250 } 251 state = xenbus_read_integer(path); 252 while (err == NULL && state < XenbusStateClosing) 253 err = xenbus_wait_for_state_change(path, &state, &dev->events); 254 if (err) bmk_memfree(err, BMK_MEMWHO_WIREDBMK); 255 256 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { 257 minios_printk("shutdown_blkfront: error changing state to %d: %s\n", 258 XenbusStateClosed, err); 259 goto close; 260 } 261 state = xenbus_read_integer(path); 262 while (state < XenbusStateClosed) { 263 err = xenbus_wait_for_state_change(path, &state, &dev->events); 264 if (err) bmk_memfree(err, BMK_MEMWHO_WIREDBMK); 265 } 266 267 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { 268 minios_printk("shutdown_blkfront: error changing state to %d: %s\n", 269 XenbusStateInitialising, err); 270 goto close; 271 } 272 err = NULL; 273 state = xenbus_read_integer(path); 274 while (err == NULL && (state < XenbusStateInitWait || state >= XenbusStateClosed)) 275 err = xenbus_wait_for_state_change(path, &state, &dev->events); 276 277close: 278 if (err) bmk_memfree(err, BMK_MEMWHO_WIREDBMK); 279 xenbus_unwatch_path_token(XBT_NIL, path, path); 280 281 bmk_snprintf(path, sizeof(path), "%s/ring-ref", nodename); 282 xenbus_rm(XBT_NIL, path); 283 bmk_snprintf(path, sizeof(path), "%s/event-channel", nodename); 284 xenbus_rm(XBT_NIL, path); 285 286 if (!err) 287 free_blkfront(dev); 288} 289 290static void blkfront_wait_slot(struct blkfront_dev *dev) 291{ 292 /* Wait for a slot */ 293 if (RING_FULL(&dev->ring)) { 294 unsigned long flags; 295 DEFINE_WAIT(w); 296 local_irq_save(flags); 297 while (1) { 298 blkfront_aio_poll(dev); 299 if (!RING_FULL(&dev->ring)) 300 break; 301 /* Really no slot, go to sleep. */ 302 minios_add_waiter(w, blkfront_queue); 303 local_irq_restore(flags); 304 minios_wait(w); 305 local_irq_save(flags); 306 } 307 minios_remove_waiter(w, blkfront_queue); 308 local_irq_restore(flags); 309 } 310} 311 312/* Issue an aio */ 313void blkfront_aio(struct blkfront_aiocb *aiocbp, int write) 314{ 315 struct blkfront_dev *dev = aiocbp->aio_dev; 316 struct blkif_request *req; 317 RING_IDX i; 318 int notify; 319 int n, j; 320 uintptr_t start, end; 321 322 // Can't io at non-sector-aligned location 323 ASSERT(!(aiocbp->aio_offset & (dev->info.sector_size-1))); 324 // Can't io non-sector-sized amounts 325 ASSERT(!(aiocbp->aio_nbytes & (dev->info.sector_size-1))); 326 // Can't io non-sector-aligned buffer 327 ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->info.sector_size-1))); 328 329 start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK; 330 end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK; 331 aiocbp->n = n = (end - start) / PAGE_SIZE; 332 333 /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB, 334 * so max 44KB can't happen */ 335 ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 336 337 blkfront_wait_slot(dev); 338 i = dev->ring.req_prod_pvt; 339 req = RING_GET_REQUEST(&dev->ring, i); 340 341 req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ; 342 req->nr_segments = n; 343 req->handle = dev->handle; 344 req->id = (uintptr_t) aiocbp; 345 req->sector_number = aiocbp->aio_offset / 512; 346 347 for (j = 0; j < n; j++) { 348 req->seg[j].first_sect = 0; 349 req->seg[j].last_sect = PAGE_SIZE / 512 - 1; 350 } 351 req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / 512; 352 req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / 512; 353 for (j = 0; j < n; j++) { 354 uintptr_t data = start + j * PAGE_SIZE; 355 if (!write) { 356 /* Trigger CoW if needed */ 357 *(char*)(data + (req->seg[j].first_sect << 9)) = 0; 358 barrier(); 359 } 360 aiocbp->gref[j] = req->seg[j].gref = 361 gnttab_grant_access(dev->dom, virtual_to_mfn(data), write); 362 } 363 364 dev->ring.req_prod_pvt = i + 1; 365 366 wmb(); 367 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify); 368 369 if(notify) minios_notify_remote_via_evtchn(dev->evtchn); 370} 371 372static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret) 373{ 374 aiocbp->data = (void*) 1; 375} 376 377void blkfront_io(struct blkfront_aiocb *aiocbp, int write) 378{ 379 unsigned long flags; 380 DEFINE_WAIT(w); 381 382 ASSERT(!aiocbp->aio_cb); 383 aiocbp->aio_cb = blkfront_aio_cb; 384 blkfront_aio(aiocbp, write); 385 aiocbp->data = NULL; 386 387 local_irq_save(flags); 388 while (1) { 389 blkfront_aio_poll(aiocbp->aio_dev); 390 if (aiocbp->data) 391 break; 392 393 minios_add_waiter(w, blkfront_queue); 394 local_irq_restore(flags); 395 minios_wait(w); 396 local_irq_save(flags); 397 } 398 minios_remove_waiter(w, blkfront_queue); 399 local_irq_restore(flags); 400} 401 402static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op, uint64_t id) 403{ 404 int i; 405 struct blkif_request *req; 406 int notify; 407 408 blkfront_wait_slot(dev); 409 i = dev->ring.req_prod_pvt; 410 req = RING_GET_REQUEST(&dev->ring, i); 411 req->operation = op; 412 req->nr_segments = 0; 413 req->handle = dev->handle; 414 req->id = id; 415 /* Not needed anyway, but the backend will check it */ 416 req->sector_number = 0; 417 dev->ring.req_prod_pvt = i + 1; 418 wmb(); 419 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify); 420 if (notify) minios_notify_remote_via_evtchn(dev->evtchn); 421} 422 423void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op) 424{ 425 struct blkfront_dev *dev = aiocbp->aio_dev; 426 blkfront_push_operation(dev, op, (uintptr_t) aiocbp); 427} 428 429void blkfront_sync(struct blkfront_dev *dev) 430{ 431 unsigned long flags; 432 DEFINE_WAIT(w); 433 434 if (dev->info.mode == BLKFRONT_RDWR) { 435 if (dev->info.barrier == 1) 436 blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER, 0); 437 438 if (dev->info.flush == 1) 439 blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE, 0); 440 } 441 442 /* Note: This won't finish if another thread enqueues requests. */ 443 local_irq_save(flags); 444 while (1) { 445 blkfront_aio_poll(dev); 446 if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring)) 447 break; 448 449 minios_add_waiter(w, blkfront_queue); 450 local_irq_restore(flags); 451 minios_wait(w); 452 local_irq_save(flags); 453 } 454 minios_remove_waiter(w, blkfront_queue); 455 local_irq_restore(flags); 456} 457 458int blkfront_aio_poll(struct blkfront_dev *dev) 459{ 460 RING_IDX rp, cons; 461 struct blkif_response *rsp; 462 int more; 463 int nr_consumed; 464 465moretodo: 466 467 rp = dev->ring.sring->rsp_prod; 468 rmb(); /* Ensure we see queued responses up to 'rp'. */ 469 cons = dev->ring.rsp_cons; 470 471 nr_consumed = 0; 472 while ((cons != rp)) 473 { 474 struct blkfront_aiocb *aiocbp; 475 int status; 476 477 rsp = RING_GET_RESPONSE(&dev->ring, cons); 478 nr_consumed++; 479 480 aiocbp = (void*) (uintptr_t) rsp->id; 481 status = rsp->status; 482 483 if (status != BLKIF_RSP_OKAY) 484 minios_printk("block error %d for op %d\n", status, rsp->operation); 485 486 switch (rsp->operation) { 487 case BLKIF_OP_READ: 488 case BLKIF_OP_WRITE: 489 { 490 int j; 491 492 for (j = 0; j < aiocbp->n; j++) 493 gnttab_end_access(aiocbp->gref[j]); 494 495 break; 496 } 497 498 case BLKIF_OP_WRITE_BARRIER: 499 case BLKIF_OP_FLUSH_DISKCACHE: 500 break; 501 502 default: 503 minios_printk("unrecognized block operation %d response\n", rsp->operation); 504 } 505 506 dev->ring.rsp_cons = ++cons; 507 /* Nota: callback frees aiocbp itself */ 508 if (aiocbp && aiocbp->aio_cb) 509 aiocbp->aio_cb(aiocbp, status ? -BMK_EIO : 0); 510 if (dev->ring.rsp_cons != cons) 511 /* We reentered, we must not continue here */ 512 break; 513 } 514 515 RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more); 516 if (more) goto moretodo; 517 518 return nr_consumed; 519} 520