1/* 2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) 3 * Licensed under the GPL 4 */ 5 6/* 2001-09-28...2002-04-17 7 * Partition stuff by James_McMechan@hotmail.com 8 * old style ubd by setting UBD_SHIFT to 0 9 * 2002-09-27...2002-10-18 massive tinkering for 2.5 10 * partitions have changed in 2.5 11 * 2003-01-29 more tinkering for 2.5.59-1 12 * This should now address the sysfs problems and has 13 * the symlink for devfs to allow for booting with 14 * the common /dev/ubd/discX/... names rather than 15 * only /dev/ubdN/discN this version also has lots of 16 * clean ups preparing for ubd-many. 17 * James McMechan 18 */ 19 20#define MAJOR_NR UBD_MAJOR 21#define UBD_SHIFT 4 22 23#include "linux/kernel.h" 24#include "linux/module.h" 25#include "linux/blkdev.h" 26#include "linux/hdreg.h" 27#include "linux/init.h" 28#include "linux/cdrom.h" 29#include "linux/proc_fs.h" 30#include "linux/ctype.h" 31#include "linux/capability.h" 32#include "linux/mm.h" 33#include "linux/vmalloc.h" 34#include "linux/blkpg.h" 35#include "linux/genhd.h" 36#include "linux/spinlock.h" 37#include "linux/platform_device.h" 38#include "asm/segment.h" 39#include "asm/uaccess.h" 40#include "asm/irq.h" 41#include "asm/types.h" 42#include "asm/tlbflush.h" 43#include "mem_user.h" 44#include "kern_util.h" 45#include "kern.h" 46#include "mconsole_kern.h" 47#include "init.h" 48#include "irq_user.h" 49#include "irq_kern.h" 50#include "ubd_user.h" 51#include "os.h" 52#include "mem.h" 53#include "mem_kern.h" 54#include "cow.h" 55 56enum ubd_req { UBD_READ, UBD_WRITE }; 57 58struct io_thread_req { 59 struct request *req; 60 enum ubd_req op; 61 int fds[2]; 62 unsigned long offsets[2]; 63 unsigned long long offset; 64 unsigned long length; 65 char *buffer; 66 int sectorsize; 67 unsigned long sector_mask; 68 unsigned long long cow_offset; 69 unsigned long bitmap_words[2]; 70 int error; 71}; 72 73extern int open_ubd_file(char *file, struct openflags *openflags, int shared, 74 char **backing_file_out, int *bitmap_offset_out, 75 unsigned long *bitmap_len_out, int *data_offset_out, 76 int *create_cow_out); 77extern int create_cow_file(char *cow_file, char *backing_file, 78 struct openflags flags, int sectorsize, 79 int alignment, int *bitmap_offset_out, 80 unsigned long *bitmap_len_out, 81 int *data_offset_out); 82extern int read_cow_bitmap(int fd, void *buf, int offset, int len); 83extern void do_io(struct io_thread_req *req); 84 85static inline int ubd_test_bit(__u64 bit, unsigned char *data) 86{ 87 __u64 n; 88 int bits, off; 89 90 bits = sizeof(data[0]) * 8; 91 n = bit / bits; 92 off = bit % bits; 93 return (data[n] & (1 << off)) != 0; 94} 95 96static inline void ubd_set_bit(__u64 bit, unsigned char *data) 97{ 98 __u64 n; 99 int bits, off; 100 101 bits = sizeof(data[0]) * 8; 102 n = bit / bits; 103 off = bit % bits; 104 data[n] |= (1 << off); 105} 106/*End stuff from ubd_user.h*/ 107 108#define DRIVER_NAME "uml-blkdev" 109 110static DEFINE_MUTEX(ubd_lock); 111 112static int ubd_open(struct inode * inode, struct file * filp); 113static int ubd_release(struct inode * inode, struct file * file); 114static int ubd_ioctl(struct inode * inode, struct file * file, 115 unsigned int cmd, unsigned long arg); 116static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); 117 118#define MAX_DEV (16) 119 120static struct block_device_operations ubd_blops = { 121 .owner = THIS_MODULE, 122 .open = ubd_open, 123 .release = ubd_release, 124 .ioctl = ubd_ioctl, 125 .getgeo = ubd_getgeo, 126}; 127 128/* Protected by ubd_lock */ 129static int fake_major = MAJOR_NR; 130static struct gendisk *ubd_gendisk[MAX_DEV]; 131static struct gendisk *fake_gendisk[MAX_DEV]; 132 133#ifdef CONFIG_BLK_DEV_UBD_SYNC 134#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ 135 .cl = 1 }) 136#else 137#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \ 138 .cl = 1 }) 139#endif 140static struct openflags global_openflags = OPEN_FLAGS; 141 142struct cow { 143 /* backing file name */ 144 char *file; 145 /* backing file fd */ 146 int fd; 147 unsigned long *bitmap; 148 unsigned long bitmap_len; 149 int bitmap_offset; 150 int data_offset; 151}; 152 153#define MAX_SG 64 154 155struct ubd { 156 struct list_head restart; 157 /* name (and fd, below) of the file opened for writing, either the 158 * backing or the cow file. */ 159 char *file; 160 int count; 161 int fd; 162 __u64 size; 163 struct openflags boot_openflags; 164 struct openflags openflags; 165 unsigned shared:1; 166 unsigned no_cow:1; 167 struct cow cow; 168 struct platform_device pdev; 169 struct request_queue *queue; 170 spinlock_t lock; 171 struct scatterlist sg[MAX_SG]; 172 struct request *request; 173 int start_sg, end_sg; 174}; 175 176#define DEFAULT_COW { \ 177 .file = NULL, \ 178 .fd = -1, \ 179 .bitmap = NULL, \ 180 .bitmap_offset = 0, \ 181 .data_offset = 0, \ 182} 183 184#define DEFAULT_UBD { \ 185 .file = NULL, \ 186 .count = 0, \ 187 .fd = -1, \ 188 .size = -1, \ 189 .boot_openflags = OPEN_FLAGS, \ 190 .openflags = OPEN_FLAGS, \ 191 .no_cow = 0, \ 192 .shared = 0, \ 193 .cow = DEFAULT_COW, \ 194 .lock = SPIN_LOCK_UNLOCKED, \ 195 .request = NULL, \ 196 .start_sg = 0, \ 197 .end_sg = 0, \ 198} 199 200/* Protected by ubd_lock */ 201struct ubd ubd_devs[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; 202 203/* Only changed by fake_ide_setup which is a setup */ 204static int fake_ide = 0; 205static struct proc_dir_entry *proc_ide_root = NULL; 206static struct proc_dir_entry *proc_ide = NULL; 207 208static void make_proc_ide(void) 209{ 210 proc_ide_root = proc_mkdir("ide", NULL); 211 proc_ide = proc_mkdir("ide0", proc_ide_root); 212} 213 214static int proc_ide_read_media(char *page, char **start, off_t off, int count, 215 int *eof, void *data) 216{ 217 int len; 218 219 strcpy(page, "disk\n"); 220 len = strlen("disk\n"); 221 len -= off; 222 if (len < count){ 223 *eof = 1; 224 if (len <= 0) return 0; 225 } 226 else len = count; 227 *start = page + off; 228 return len; 229} 230 231static void make_ide_entries(char *dev_name) 232{ 233 struct proc_dir_entry *dir, *ent; 234 char name[64]; 235 236 if(proc_ide_root == NULL) make_proc_ide(); 237 238 dir = proc_mkdir(dev_name, proc_ide); 239 if(!dir) return; 240 241 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir); 242 if(!ent) return; 243 ent->data = NULL; 244 ent->read_proc = proc_ide_read_media; 245 ent->write_proc = NULL; 246 sprintf(name,"ide0/%s", dev_name); 247 proc_symlink(dev_name, proc_ide_root, name); 248} 249 250static int fake_ide_setup(char *str) 251{ 252 fake_ide = 1; 253 return 1; 254} 255 256__setup("fake_ide", fake_ide_setup); 257 258__uml_help(fake_ide_setup, 259"fake_ide\n" 260" Create ide0 entries that map onto ubd devices.\n\n" 261); 262 263static int parse_unit(char **ptr) 264{ 265 char *str = *ptr, *end; 266 int n = -1; 267 268 if(isdigit(*str)) { 269 n = simple_strtoul(str, &end, 0); 270 if(end == str) 271 return -1; 272 *ptr = end; 273 } 274 else if (('a' <= *str) && (*str <= 'z')) { 275 n = *str - 'a'; 276 str++; 277 *ptr = str; 278 } 279 return n; 280} 281 282/* If *index_out == -1 at exit, the passed option was a general one; 283 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it 284 * should not be freed on exit. 285 */ 286static int ubd_setup_common(char *str, int *index_out, char **error_out) 287{ 288 struct ubd *ubd_dev; 289 struct openflags flags = global_openflags; 290 char *backing_file; 291 int n, err = 0, i; 292 293 if(index_out) *index_out = -1; 294 n = *str; 295 if(n == '='){ 296 char *end; 297 int major; 298 299 str++; 300 if(!strcmp(str, "sync")){ 301 global_openflags = of_sync(global_openflags); 302 goto out1; 303 } 304 305 err = -EINVAL; 306 major = simple_strtoul(str, &end, 0); 307 if((*end != '\0') || (end == str)){ 308 *error_out = "Didn't parse major number"; 309 goto out1; 310 } 311 312 mutex_lock(&ubd_lock); 313 if(fake_major != MAJOR_NR){ 314 *error_out = "Can't assign a fake major twice"; 315 goto out1; 316 } 317 318 fake_major = major; 319 320 printk(KERN_INFO "Setting extra ubd major number to %d\n", 321 major); 322 err = 0; 323 out1: 324 mutex_unlock(&ubd_lock); 325 return err; 326 } 327 328 n = parse_unit(&str); 329 if(n < 0){ 330 *error_out = "Couldn't parse device number"; 331 return -EINVAL; 332 } 333 if(n >= MAX_DEV){ 334 *error_out = "Device number out of range"; 335 return 1; 336 } 337 338 err = -EBUSY; 339 mutex_lock(&ubd_lock); 340 341 ubd_dev = &ubd_devs[n]; 342 if(ubd_dev->file != NULL){ 343 *error_out = "Device is already configured"; 344 goto out; 345 } 346 347 if (index_out) 348 *index_out = n; 349 350 err = -EINVAL; 351 for (i = 0; i < sizeof("rscd="); i++) { 352 switch (*str) { 353 case 'r': 354 flags.w = 0; 355 break; 356 case 's': 357 flags.s = 1; 358 break; 359 case 'd': 360 ubd_dev->no_cow = 1; 361 break; 362 case 'c': 363 ubd_dev->shared = 1; 364 break; 365 case '=': 366 str++; 367 goto break_loop; 368 default: 369 *error_out = "Expected '=' or flag letter " 370 "(r, s, c, or d)"; 371 goto out; 372 } 373 str++; 374 } 375 376 if (*str == '=') 377 *error_out = "Too many flags specified"; 378 else 379 *error_out = "Missing '='"; 380 goto out; 381 382break_loop: 383 backing_file = strchr(str, ','); 384 385 if (backing_file == NULL) 386 backing_file = strchr(str, ':'); 387 388 if(backing_file != NULL){ 389 if(ubd_dev->no_cow){ 390 *error_out = "Can't specify both 'd' and a cow file"; 391 goto out; 392 } 393 else { 394 *backing_file = '\0'; 395 backing_file++; 396 } 397 } 398 err = 0; 399 ubd_dev->file = str; 400 ubd_dev->cow.file = backing_file; 401 ubd_dev->boot_openflags = flags; 402out: 403 mutex_unlock(&ubd_lock); 404 return err; 405} 406 407static int ubd_setup(char *str) 408{ 409 char *error; 410 int err; 411 412 err = ubd_setup_common(str, NULL, &error); 413 if(err) 414 printk(KERN_ERR "Failed to initialize device with \"%s\" : " 415 "%s\n", str, error); 416 return 1; 417} 418 419__setup("ubd", ubd_setup); 420__uml_help(ubd_setup, 421"ubd<n><flags>=<filename>[(:|,)<filename2>]\n" 422" This is used to associate a device with a file in the underlying\n" 423" filesystem. When specifying two filenames, the first one is the\n" 424" COW name and the second is the backing file name. As separator you can\n" 425" use either a ':' or a ',': the first one allows writing things like;\n" 426" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n" 427" while with a ',' the shell would not expand the 2nd '~'.\n" 428" When using only one filename, UML will detect whether to treat it like\n" 429" a COW file or a backing file. To override this detection, add the 'd'\n" 430" flag:\n" 431" ubd0d=BackingFile\n" 432" Usually, there is a filesystem in the file, but \n" 433" that's not required. Swap devices containing swap files can be\n" 434" specified like this. Also, a file which doesn't contain a\n" 435" filesystem can have its contents read in the virtual \n" 436" machine by running 'dd' on the device. <n> must be in the range\n" 437" 0 to 7. Appending an 'r' to the number will cause that device\n" 438" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" 439" an 's' will cause data to be written to disk on the host immediately.\n\n" 440); 441 442static int udb_setup(char *str) 443{ 444 printk("udb%s specified on command line is almost certainly a ubd -> " 445 "udb TYPO\n", str); 446 return 1; 447} 448 449__setup("udb", udb_setup); 450__uml_help(udb_setup, 451"udb\n" 452" This option is here solely to catch ubd -> udb typos, which can be\n" 453" to impossible to catch visually unless you specifically look for\n" 454" them. The only result of any option starting with 'udb' is an error\n" 455" in the boot output.\n\n" 456); 457 458static int fakehd_set = 0; 459static int fakehd(char *str) 460{ 461 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n"); 462 fakehd_set = 1; 463 return 1; 464} 465 466__setup("fakehd", fakehd); 467__uml_help(fakehd, 468"fakehd\n" 469" Change the ubd device name to \"hd\".\n\n" 470); 471 472static void do_ubd_request(request_queue_t * q); 473 474/* Only changed by ubd_init, which is an initcall. */ 475int thread_fd = -1; 476 477static void ubd_end_request(struct request *req, int bytes, int uptodate) 478{ 479 if (!end_that_request_first(req, uptodate, bytes >> 9)) { 480 struct ubd *dev = req->rq_disk->private_data; 481 unsigned long flags; 482 483 add_disk_randomness(req->rq_disk); 484 spin_lock_irqsave(&dev->lock, flags); 485 end_that_request_last(req, uptodate); 486 spin_unlock_irqrestore(&dev->lock, flags); 487 } 488} 489 490/* Callable only from interrupt context - otherwise you need to do 491 * spin_lock_irq()/spin_lock_irqsave() */ 492static inline void ubd_finish(struct request *req, int bytes) 493{ 494 if(bytes < 0){ 495 ubd_end_request(req, 0, 0); 496 return; 497 } 498 ubd_end_request(req, bytes, 1); 499} 500 501static LIST_HEAD(restart); 502 503/* Called without dev->lock held, and only in interrupt context. */ 504static void ubd_handler(void) 505{ 506 struct io_thread_req *req; 507 struct request *rq; 508 struct ubd *ubd; 509 struct list_head *list, *next_ele; 510 unsigned long flags; 511 int n; 512 513 while(1){ 514 n = os_read_file(thread_fd, &req, 515 sizeof(struct io_thread_req *)); 516 if(n != sizeof(req)){ 517 if(n == -EAGAIN) 518 break; 519 printk(KERN_ERR "spurious interrupt in ubd_handler, " 520 "err = %d\n", -n); 521 return; 522 } 523 524 rq = req->req; 525 rq->nr_sectors -= req->length >> 9; 526 if(rq->nr_sectors == 0) 527 ubd_finish(rq, rq->hard_nr_sectors << 9); 528 kfree(req); 529 } 530 reactivate_fd(thread_fd, UBD_IRQ); 531 532 list_for_each_safe(list, next_ele, &restart){ 533 ubd = container_of(list, struct ubd, restart); 534 list_del_init(&ubd->restart); 535 spin_lock_irqsave(&ubd->lock, flags); 536 do_ubd_request(ubd->queue); 537 spin_unlock_irqrestore(&ubd->lock, flags); 538 } 539} 540 541static irqreturn_t ubd_intr(int irq, void *dev) 542{ 543 ubd_handler(); 544 return IRQ_HANDLED; 545} 546 547/* Only changed by ubd_init, which is an initcall. */ 548static int io_pid = -1; 549 550void kill_io_thread(void) 551{ 552 if(io_pid != -1) 553 os_kill_process(io_pid, 1); 554} 555 556__uml_exitcall(kill_io_thread); 557 558static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out) 559{ 560 char *file; 561 562 file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file; 563 return os_file_size(file, size_out); 564} 565 566static void ubd_close_dev(struct ubd *ubd_dev) 567{ 568 os_close_file(ubd_dev->fd); 569 if(ubd_dev->cow.file == NULL) 570 return; 571 572 os_close_file(ubd_dev->cow.fd); 573 vfree(ubd_dev->cow.bitmap); 574 ubd_dev->cow.bitmap = NULL; 575} 576 577static int ubd_open_dev(struct ubd *ubd_dev) 578{ 579 struct openflags flags; 580 char **back_ptr; 581 int err, create_cow, *create_ptr; 582 int fd; 583 584 ubd_dev->openflags = ubd_dev->boot_openflags; 585 create_cow = 0; 586 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL; 587 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file; 588 589 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared, 590 back_ptr, &ubd_dev->cow.bitmap_offset, 591 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset, 592 create_ptr); 593 594 if((fd == -ENOENT) && create_cow){ 595 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file, 596 ubd_dev->openflags, 1 << 9, PAGE_SIZE, 597 &ubd_dev->cow.bitmap_offset, 598 &ubd_dev->cow.bitmap_len, 599 &ubd_dev->cow.data_offset); 600 if(fd >= 0){ 601 printk(KERN_INFO "Creating \"%s\" as COW file for " 602 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file); 603 } 604 } 605 606 if(fd < 0){ 607 printk("Failed to open '%s', errno = %d\n", ubd_dev->file, 608 -fd); 609 return fd; 610 } 611 ubd_dev->fd = fd; 612 613 if(ubd_dev->cow.file != NULL){ 614 err = -ENOMEM; 615 ubd_dev->cow.bitmap = (void *) vmalloc(ubd_dev->cow.bitmap_len); 616 if(ubd_dev->cow.bitmap == NULL){ 617 printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); 618 goto error; 619 } 620 flush_tlb_kernel_vm(); 621 622 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap, 623 ubd_dev->cow.bitmap_offset, 624 ubd_dev->cow.bitmap_len); 625 if(err < 0) 626 goto error; 627 628 flags = ubd_dev->openflags; 629 flags.w = 0; 630 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL, 631 NULL, NULL, NULL, NULL); 632 if(err < 0) goto error; 633 ubd_dev->cow.fd = err; 634 } 635 return 0; 636 error: 637 os_close_file(ubd_dev->fd); 638 return err; 639} 640 641static void ubd_device_release(struct device *dev) 642{ 643 struct ubd *ubd_dev = dev->driver_data; 644 645 blk_cleanup_queue(ubd_dev->queue); 646 *ubd_dev = ((struct ubd) DEFAULT_UBD); 647} 648 649static int ubd_disk_register(int major, u64 size, int unit, 650 struct gendisk **disk_out) 651{ 652 struct gendisk *disk; 653 654 disk = alloc_disk(1 << UBD_SHIFT); 655 if(disk == NULL) 656 return -ENOMEM; 657 658 disk->major = major; 659 disk->first_minor = unit << UBD_SHIFT; 660 disk->fops = &ubd_blops; 661 set_capacity(disk, size / 512); 662 if(major == MAJOR_NR) 663 sprintf(disk->disk_name, "ubd%c", 'a' + unit); 664 else 665 sprintf(disk->disk_name, "ubd_fake%d", unit); 666 667 /* sysfs register (not for ide fake devices) */ 668 if (major == MAJOR_NR) { 669 ubd_devs[unit].pdev.id = unit; 670 ubd_devs[unit].pdev.name = DRIVER_NAME; 671 ubd_devs[unit].pdev.dev.release = ubd_device_release; 672 ubd_devs[unit].pdev.dev.driver_data = &ubd_devs[unit]; 673 platform_device_register(&ubd_devs[unit].pdev); 674 disk->driverfs_dev = &ubd_devs[unit].pdev.dev; 675 } 676 677 disk->private_data = &ubd_devs[unit]; 678 disk->queue = ubd_devs[unit].queue; 679 add_disk(disk); 680 681 *disk_out = disk; 682 return 0; 683} 684 685#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) 686 687static int ubd_add(int n, char **error_out) 688{ 689 struct ubd *ubd_dev = &ubd_devs[n]; 690 int err = 0; 691 692 if(ubd_dev->file == NULL) 693 goto out; 694 695 err = ubd_file_size(ubd_dev, &ubd_dev->size); 696 if(err < 0){ 697 *error_out = "Couldn't determine size of device's file"; 698 goto out; 699 } 700 701 ubd_dev->size = ROUND_BLOCK(ubd_dev->size); 702 703 INIT_LIST_HEAD(&ubd_dev->restart); 704 705 err = -ENOMEM; 706 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); 707 if (ubd_dev->queue == NULL) { 708 *error_out = "Failed to initialize device queue"; 709 goto out; 710 } 711 ubd_dev->queue->queuedata = ubd_dev; 712 713 blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG); 714 err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]); 715 if(err){ 716 *error_out = "Failed to register device"; 717 goto out_cleanup; 718 } 719 720 if(fake_major != MAJOR_NR) 721 ubd_disk_register(fake_major, ubd_dev->size, n, 722 &fake_gendisk[n]); 723 724 /* perhaps this should also be under the "if (fake_major)" above */ 725 /* using the fake_disk->disk_name and also the fakehd_set name */ 726 if (fake_ide) 727 make_ide_entries(ubd_gendisk[n]->disk_name); 728 729 err = 0; 730out: 731 return err; 732 733out_cleanup: 734 blk_cleanup_queue(ubd_dev->queue); 735 goto out; 736} 737 738static int ubd_config(char *str, char **error_out) 739{ 740 int n, ret; 741 742 /* This string is possibly broken up and stored, so it's only 743 * freed if ubd_setup_common fails, or if only general options 744 * were set. 745 */ 746 str = kstrdup(str, GFP_KERNEL); 747 if (str == NULL) { 748 *error_out = "Failed to allocate memory"; 749 return -ENOMEM; 750 } 751 752 ret = ubd_setup_common(str, &n, error_out); 753 if (ret) 754 goto err_free; 755 756 if (n == -1) { 757 ret = 0; 758 goto err_free; 759 } 760 761 mutex_lock(&ubd_lock); 762 ret = ubd_add(n, error_out); 763 if (ret) 764 ubd_devs[n].file = NULL; 765 mutex_unlock(&ubd_lock); 766 767out: 768 return ret; 769 770err_free: 771 kfree(str); 772 goto out; 773} 774 775static int ubd_get_config(char *name, char *str, int size, char **error_out) 776{ 777 struct ubd *ubd_dev; 778 int n, len = 0; 779 780 n = parse_unit(&name); 781 if((n >= MAX_DEV) || (n < 0)){ 782 *error_out = "ubd_get_config : device number out of range"; 783 return -1; 784 } 785 786 ubd_dev = &ubd_devs[n]; 787 mutex_lock(&ubd_lock); 788 789 if(ubd_dev->file == NULL){ 790 CONFIG_CHUNK(str, size, len, "", 1); 791 goto out; 792 } 793 794 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0); 795 796 if(ubd_dev->cow.file != NULL){ 797 CONFIG_CHUNK(str, size, len, ",", 0); 798 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1); 799 } 800 else CONFIG_CHUNK(str, size, len, "", 1); 801 802 out: 803 mutex_unlock(&ubd_lock); 804 return len; 805} 806 807static int ubd_id(char **str, int *start_out, int *end_out) 808{ 809 int n; 810 811 n = parse_unit(str); 812 *start_out = 0; 813 *end_out = MAX_DEV - 1; 814 return n; 815} 816 817static int ubd_remove(int n, char **error_out) 818{ 819 struct gendisk *disk = ubd_gendisk[n]; 820 struct ubd *ubd_dev; 821 int err = -ENODEV; 822 823 mutex_lock(&ubd_lock); 824 825 ubd_dev = &ubd_devs[n]; 826 827 if(ubd_dev->file == NULL) 828 goto out; 829 830 /* you cannot remove a open disk */ 831 err = -EBUSY; 832 if(ubd_dev->count > 0) 833 goto out; 834 835 ubd_gendisk[n] = NULL; 836 if(disk != NULL){ 837 del_gendisk(disk); 838 put_disk(disk); 839 } 840 841 if(fake_gendisk[n] != NULL){ 842 del_gendisk(fake_gendisk[n]); 843 put_disk(fake_gendisk[n]); 844 fake_gendisk[n] = NULL; 845 } 846 847 err = 0; 848 platform_device_unregister(&ubd_dev->pdev); 849out: 850 mutex_unlock(&ubd_lock); 851 return err; 852} 853 854/* All these are called by mconsole in process context and without 855 * ubd-specific locks. The structure itself is const except for .list. 856 */ 857static struct mc_device ubd_mc = { 858 .list = LIST_HEAD_INIT(ubd_mc.list), 859 .name = "ubd", 860 .config = ubd_config, 861 .get_config = ubd_get_config, 862 .id = ubd_id, 863 .remove = ubd_remove, 864}; 865 866static int __init ubd_mc_init(void) 867{ 868 mconsole_register_dev(&ubd_mc); 869 return 0; 870} 871 872__initcall(ubd_mc_init); 873 874static int __init ubd0_init(void) 875{ 876 struct ubd *ubd_dev = &ubd_devs[0]; 877 878 mutex_lock(&ubd_lock); 879 if(ubd_dev->file == NULL) 880 ubd_dev->file = "root_fs"; 881 mutex_unlock(&ubd_lock); 882 883 return 0; 884} 885 886__initcall(ubd0_init); 887 888/* Used in ubd_init, which is an initcall */ 889static struct platform_driver ubd_driver = { 890 .driver = { 891 .name = DRIVER_NAME, 892 }, 893}; 894 895static int __init ubd_init(void) 896{ 897 char *error; 898 int i, err; 899 900 if (register_blkdev(MAJOR_NR, "ubd")) 901 return -1; 902 903 if (fake_major != MAJOR_NR) { 904 char name[sizeof("ubd_nnn\0")]; 905 906 snprintf(name, sizeof(name), "ubd_%d", fake_major); 907 if (register_blkdev(fake_major, "ubd")) 908 return -1; 909 } 910 platform_driver_register(&ubd_driver); 911 mutex_lock(&ubd_lock); 912 for (i = 0; i < MAX_DEV; i++){ 913 err = ubd_add(i, &error); 914 if(err) 915 printk(KERN_ERR "Failed to initialize ubd device %d :" 916 "%s\n", i, error); 917 } 918 mutex_unlock(&ubd_lock); 919 return 0; 920} 921 922late_initcall(ubd_init); 923 924static int __init ubd_driver_init(void){ 925 unsigned long stack; 926 int err; 927 928 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ 929 if(global_openflags.s){ 930 printk(KERN_INFO "ubd: Synchronous mode\n"); 931 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is 932 * enough. So use anyway the io thread. */ 933 } 934 stack = alloc_stack(0, 0); 935 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), 936 &thread_fd); 937 if(io_pid < 0){ 938 printk(KERN_ERR 939 "ubd : Failed to start I/O thread (errno = %d) - " 940 "falling back to synchronous I/O\n", -io_pid); 941 io_pid = -1; 942 return 0; 943 } 944 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, 945 IRQF_DISABLED, "ubd", ubd_devs); 946 if(err != 0) 947 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); 948 return 0; 949} 950 951device_initcall(ubd_driver_init); 952 953static int ubd_open(struct inode *inode, struct file *filp) 954{ 955 struct gendisk *disk = inode->i_bdev->bd_disk; 956 struct ubd *ubd_dev = disk->private_data; 957 int err = 0; 958 959 if(ubd_dev->count == 0){ 960 err = ubd_open_dev(ubd_dev); 961 if(err){ 962 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n", 963 disk->disk_name, ubd_dev->file, -err); 964 goto out; 965 } 966 } 967 ubd_dev->count++; 968 set_disk_ro(disk, !ubd_dev->openflags.w); 969 970 /* This should no more be needed. And it didn't work anyway to exclude 971 * read-write remounting of filesystems.*/ 972 /*if((filp->f_mode & FMODE_WRITE) && !ubd_dev->openflags.w){ 973 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev); 974 err = -EROFS; 975 }*/ 976 out: 977 return err; 978} 979 980static int ubd_release(struct inode * inode, struct file * file) 981{ 982 struct gendisk *disk = inode->i_bdev->bd_disk; 983 struct ubd *ubd_dev = disk->private_data; 984 985 if(--ubd_dev->count == 0) 986 ubd_close_dev(ubd_dev); 987 return 0; 988} 989 990static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, 991 __u64 *cow_offset, unsigned long *bitmap, 992 __u64 bitmap_offset, unsigned long *bitmap_words, 993 __u64 bitmap_len) 994{ 995 __u64 sector = io_offset >> 9; 996 int i, update_bitmap = 0; 997 998 for(i = 0; i < length >> 9; i++){ 999 if(cow_mask != NULL) 1000 ubd_set_bit(i, (unsigned char *) cow_mask); 1001 if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) 1002 continue; 1003 1004 update_bitmap = 1; 1005 ubd_set_bit(sector + i, (unsigned char *) bitmap); 1006 } 1007 1008 if(!update_bitmap) 1009 return; 1010 1011 *cow_offset = sector / (sizeof(unsigned long) * 8); 1012 1013 /* This takes care of the case where we're exactly at the end of the 1014 * device, and *cow_offset + 1 is off the end. So, just back it up 1015 * by one word. Thanks to Lynn Kerby for the fix and James McMechan 1016 * for the original diagnosis. 1017 */ 1018 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / 1019 sizeof(unsigned long) - 1)) 1020 (*cow_offset)--; 1021 1022 bitmap_words[0] = bitmap[*cow_offset]; 1023 bitmap_words[1] = bitmap[*cow_offset + 1]; 1024 1025 *cow_offset *= sizeof(unsigned long); 1026 *cow_offset += bitmap_offset; 1027} 1028 1029static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, 1030 __u64 bitmap_offset, __u64 bitmap_len) 1031{ 1032 __u64 sector = req->offset >> 9; 1033 int i; 1034 1035 if(req->length > (sizeof(req->sector_mask) * 8) << 9) 1036 panic("Operation too long"); 1037 1038 if(req->op == UBD_READ) { 1039 for(i = 0; i < req->length >> 9; i++){ 1040 if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) 1041 ubd_set_bit(i, (unsigned char *) 1042 &req->sector_mask); 1043 } 1044 } 1045 else cowify_bitmap(req->offset, req->length, &req->sector_mask, 1046 &req->cow_offset, bitmap, bitmap_offset, 1047 req->bitmap_words, bitmap_len); 1048} 1049 1050/* Called with dev->lock held */ 1051static void prepare_request(struct request *req, struct io_thread_req *io_req, 1052 unsigned long long offset, int page_offset, 1053 int len, struct page *page) 1054{ 1055 struct gendisk *disk = req->rq_disk; 1056 struct ubd *ubd_dev = disk->private_data; 1057 1058 io_req->req = req; 1059 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : 1060 ubd_dev->fd; 1061 io_req->fds[1] = ubd_dev->fd; 1062 io_req->cow_offset = -1; 1063 io_req->offset = offset; 1064 io_req->length = len; 1065 io_req->error = 0; 1066 io_req->sector_mask = 0; 1067 1068 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; 1069 io_req->offsets[0] = 0; 1070 io_req->offsets[1] = ubd_dev->cow.data_offset; 1071 io_req->buffer = page_address(page) + page_offset; 1072 io_req->sectorsize = 1 << 9; 1073 1074 if(ubd_dev->cow.file != NULL) 1075 cowify_req(io_req, ubd_dev->cow.bitmap, 1076 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len); 1077 1078} 1079 1080/* Called with dev->lock held */ 1081static void do_ubd_request(request_queue_t *q) 1082{ 1083 struct io_thread_req *io_req; 1084 struct request *req; 1085 int n; 1086 1087 while(1){ 1088 struct ubd *dev = q->queuedata; 1089 if(dev->end_sg == 0){ 1090 struct request *req = elv_next_request(q); 1091 if(req == NULL) 1092 return; 1093 1094 dev->request = req; 1095 blkdev_dequeue_request(req); 1096 dev->start_sg = 0; 1097 dev->end_sg = blk_rq_map_sg(q, req, dev->sg); 1098 } 1099 1100 req = dev->request; 1101 while(dev->start_sg < dev->end_sg){ 1102 struct scatterlist *sg = &dev->sg[dev->start_sg]; 1103 1104 io_req = kmalloc(sizeof(struct io_thread_req), 1105 GFP_ATOMIC); 1106 if(io_req == NULL){ 1107 if(list_empty(&dev->restart)) 1108 list_add(&dev->restart, &restart); 1109 return; 1110 } 1111 prepare_request(req, io_req, 1112 (unsigned long long) req->sector << 9, 1113 sg->offset, sg->length, sg->page); 1114 1115 n = os_write_file(thread_fd, &io_req, 1116 sizeof(struct io_thread_req *)); 1117 if(n != sizeof(struct io_thread_req *)){ 1118 if(n != -EAGAIN) 1119 printk("write to io thread failed, " 1120 "errno = %d\n", -n); 1121 else if(list_empty(&dev->restart)) 1122 list_add(&dev->restart, &restart); 1123 return; 1124 } 1125 1126 req->sector += sg->length >> 9; 1127 dev->start_sg++; 1128 } 1129 dev->end_sg = 0; 1130 dev->request = NULL; 1131 } 1132} 1133 1134static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) 1135{ 1136 struct ubd *ubd_dev = bdev->bd_disk->private_data; 1137 1138 geo->heads = 128; 1139 geo->sectors = 32; 1140 geo->cylinders = ubd_dev->size / (128 * 32 * 512); 1141 return 0; 1142} 1143 1144static int ubd_ioctl(struct inode * inode, struct file * file, 1145 unsigned int cmd, unsigned long arg) 1146{ 1147 struct ubd *ubd_dev = inode->i_bdev->bd_disk->private_data; 1148 struct hd_driveid ubd_id = { 1149 .cyls = 0, 1150 .heads = 128, 1151 .sectors = 32, 1152 }; 1153 1154 switch (cmd) { 1155 struct cdrom_volctrl volume; 1156 case HDIO_GET_IDENTITY: 1157 ubd_id.cyls = ubd_dev->size / (128 * 32 * 512); 1158 if(copy_to_user((char __user *) arg, (char *) &ubd_id, 1159 sizeof(ubd_id))) 1160 return -EFAULT; 1161 return 0; 1162 1163 case CDROMVOLREAD: 1164 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume))) 1165 return -EFAULT; 1166 volume.channel0 = 255; 1167 volume.channel1 = 255; 1168 volume.channel2 = 255; 1169 volume.channel3 = 255; 1170 if(copy_to_user((char __user *) arg, &volume, sizeof(volume))) 1171 return -EFAULT; 1172 return 0; 1173 } 1174 return -EINVAL; 1175} 1176 1177static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow) 1178{ 1179 struct uml_stat buf1, buf2; 1180 int err; 1181 1182 if(from_cmdline == NULL) 1183 return 0; 1184 if(!strcmp(from_cmdline, from_cow)) 1185 return 0; 1186 1187 err = os_stat_file(from_cmdline, &buf1); 1188 if(err < 0){ 1189 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err); 1190 return 0; 1191 } 1192 err = os_stat_file(from_cow, &buf2); 1193 if(err < 0){ 1194 printk("Couldn't stat '%s', err = %d\n", from_cow, -err); 1195 return 1; 1196 } 1197 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) 1198 return 0; 1199 1200 printk("Backing file mismatch - \"%s\" requested,\n" 1201 "\"%s\" specified in COW header of \"%s\"\n", 1202 from_cmdline, from_cow, cow); 1203 return 1; 1204} 1205 1206static int backing_file_mismatch(char *file, __u64 size, time_t mtime) 1207{ 1208 unsigned long modtime; 1209 unsigned long long actual; 1210 int err; 1211 1212 err = os_file_modtime(file, &modtime); 1213 if(err < 0){ 1214 printk("Failed to get modification time of backing file " 1215 "\"%s\", err = %d\n", file, -err); 1216 return err; 1217 } 1218 1219 err = os_file_size(file, &actual); 1220 if(err < 0){ 1221 printk("Failed to get size of backing file \"%s\", " 1222 "err = %d\n", file, -err); 1223 return err; 1224 } 1225 1226 if(actual != size){ 1227 /*__u64 can be a long on AMD64 and with %lu GCC complains; so 1228 * the typecast.*/ 1229 printk("Size mismatch (%llu vs %llu) of COW header vs backing " 1230 "file\n", (unsigned long long) size, actual); 1231 return -EINVAL; 1232 } 1233 if(modtime != mtime){ 1234 printk("mtime mismatch (%ld vs %ld) of COW header vs backing " 1235 "file\n", mtime, modtime); 1236 return -EINVAL; 1237 } 1238 return 0; 1239} 1240 1241int read_cow_bitmap(int fd, void *buf, int offset, int len) 1242{ 1243 int err; 1244 1245 err = os_seek_file(fd, offset); 1246 if(err < 0) 1247 return err; 1248 1249 err = os_read_file(fd, buf, len); 1250 if(err < 0) 1251 return err; 1252 1253 return 0; 1254} 1255 1256int open_ubd_file(char *file, struct openflags *openflags, int shared, 1257 char **backing_file_out, int *bitmap_offset_out, 1258 unsigned long *bitmap_len_out, int *data_offset_out, 1259 int *create_cow_out) 1260{ 1261 time_t mtime; 1262 unsigned long long size; 1263 __u32 version, align; 1264 char *backing_file; 1265 int fd, err, sectorsize, asked_switch, mode = 0644; 1266 1267 fd = os_open_file(file, *openflags, mode); 1268 if (fd < 0) { 1269 if ((fd == -ENOENT) && (create_cow_out != NULL)) 1270 *create_cow_out = 1; 1271 if (!openflags->w || 1272 ((fd != -EROFS) && (fd != -EACCES))) 1273 return fd; 1274 openflags->w = 0; 1275 fd = os_open_file(file, *openflags, mode); 1276 if (fd < 0) 1277 return fd; 1278 } 1279 1280 if(shared) 1281 printk("Not locking \"%s\" on the host\n", file); 1282 else { 1283 err = os_lock_file(fd, openflags->w); 1284 if(err < 0){ 1285 printk("Failed to lock '%s', err = %d\n", file, -err); 1286 goto out_close; 1287 } 1288 } 1289 1290 /* Successful return case! */ 1291 if(backing_file_out == NULL) 1292 return fd; 1293 1294 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, 1295 &size, §orsize, &align, bitmap_offset_out); 1296 if(err && (*backing_file_out != NULL)){ 1297 printk("Failed to read COW header from COW file \"%s\", " 1298 "errno = %d\n", file, -err); 1299 goto out_close; 1300 } 1301 if(err) 1302 return fd; 1303 1304 asked_switch = path_requires_switch(*backing_file_out, backing_file, file); 1305 1306 /* Allow switching only if no mismatch. */ 1307 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) { 1308 printk("Switching backing file to '%s'\n", *backing_file_out); 1309 err = write_cow_header(file, fd, *backing_file_out, 1310 sectorsize, align, &size); 1311 if (err) { 1312 printk("Switch failed, errno = %d\n", -err); 1313 goto out_close; 1314 } 1315 } else { 1316 *backing_file_out = backing_file; 1317 err = backing_file_mismatch(*backing_file_out, size, mtime); 1318 if (err) 1319 goto out_close; 1320 } 1321 1322 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, 1323 bitmap_len_out, data_offset_out); 1324 1325 return fd; 1326 out_close: 1327 os_close_file(fd); 1328 return err; 1329} 1330 1331int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, 1332 int sectorsize, int alignment, int *bitmap_offset_out, 1333 unsigned long *bitmap_len_out, int *data_offset_out) 1334{ 1335 int err, fd; 1336 1337 flags.c = 1; 1338 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL); 1339 if(fd < 0){ 1340 err = fd; 1341 printk("Open of COW file '%s' failed, errno = %d\n", cow_file, 1342 -err); 1343 goto out; 1344 } 1345 1346 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, 1347 bitmap_offset_out, bitmap_len_out, 1348 data_offset_out); 1349 if(!err) 1350 return fd; 1351 os_close_file(fd); 1352 out: 1353 return err; 1354} 1355 1356static int update_bitmap(struct io_thread_req *req) 1357{ 1358 int n; 1359 1360 if(req->cow_offset == -1) 1361 return 0; 1362 1363 n = os_seek_file(req->fds[1], req->cow_offset); 1364 if(n < 0){ 1365 printk("do_io - bitmap lseek failed : err = %d\n", -n); 1366 return 1; 1367 } 1368 1369 n = os_write_file(req->fds[1], &req->bitmap_words, 1370 sizeof(req->bitmap_words)); 1371 if(n != sizeof(req->bitmap_words)){ 1372 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, 1373 req->fds[1]); 1374 return 1; 1375 } 1376 1377 return 0; 1378} 1379 1380void do_io(struct io_thread_req *req) 1381{ 1382 char *buf; 1383 unsigned long len; 1384 int n, nsectors, start, end, bit; 1385 int err; 1386 __u64 off; 1387 1388 nsectors = req->length / req->sectorsize; 1389 start = 0; 1390 do { 1391 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); 1392 end = start; 1393 while((end < nsectors) && 1394 (ubd_test_bit(end, (unsigned char *) 1395 &req->sector_mask) == bit)) 1396 end++; 1397 1398 off = req->offset + req->offsets[bit] + 1399 start * req->sectorsize; 1400 len = (end - start) * req->sectorsize; 1401 buf = &req->buffer[start * req->sectorsize]; 1402 1403 err = os_seek_file(req->fds[bit], off); 1404 if(err < 0){ 1405 printk("do_io - lseek failed : err = %d\n", -err); 1406 req->error = 1; 1407 return; 1408 } 1409 if(req->op == UBD_READ){ 1410 n = 0; 1411 do { 1412 buf = &buf[n]; 1413 len -= n; 1414 n = os_read_file(req->fds[bit], buf, len); 1415 if (n < 0) { 1416 printk("do_io - read failed, err = %d " 1417 "fd = %d\n", -n, req->fds[bit]); 1418 req->error = 1; 1419 return; 1420 } 1421 } while((n < len) && (n != 0)); 1422 if (n < len) memset(&buf[n], 0, len - n); 1423 } else { 1424 n = os_write_file(req->fds[bit], buf, len); 1425 if(n != len){ 1426 printk("do_io - write failed err = %d " 1427 "fd = %d\n", -n, req->fds[bit]); 1428 req->error = 1; 1429 return; 1430 } 1431 } 1432 1433 start = end; 1434 } while(start < nsectors); 1435 1436 req->error = update_bitmap(req); 1437} 1438 1439/* Changed in start_io_thread, which is serialized by being called only 1440 * from ubd_init, which is an initcall. 1441 */ 1442int kernel_fd = -1; 1443 1444static int io_count = 0; 1445 1446int io_thread(void *arg) 1447{ 1448 struct io_thread_req *req; 1449 int n; 1450 1451 ignore_sigwinch_sig(); 1452 while(1){ 1453 n = os_read_file(kernel_fd, &req, 1454 sizeof(struct io_thread_req *)); 1455 if(n != sizeof(struct io_thread_req *)){ 1456 if(n < 0) 1457 printk("io_thread - read failed, fd = %d, " 1458 "err = %d\n", kernel_fd, -n); 1459 else { 1460 printk("io_thread - short read, fd = %d, " 1461 "length = %d\n", kernel_fd, n); 1462 } 1463 continue; 1464 } 1465 io_count++; 1466 do_io(req); 1467 n = os_write_file(kernel_fd, &req, 1468 sizeof(struct io_thread_req *)); 1469 if(n != sizeof(struct io_thread_req *)) 1470 printk("io_thread - write failed, fd = %d, err = %d\n", 1471 kernel_fd, -n); 1472 } 1473 1474 return 0; 1475} 1476