1/* 2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) 3 * Licensed under the GPL 4 */ 5 6/* 2001-09-28...2002-04-17 7 * Partition stuff by James_McMechan@hotmail.com 8 * old style ubd by setting UBD_SHIFT to 0 9 * 2002-09-27...2002-10-18 massive tinkering for 2.5 10 * partitions have changed in 2.5 11 * 2003-01-29 more tinkering for 2.5.59-1 12 * This should now address the sysfs problems and has 13 * the symlink for devfs to allow for booting with 14 * the common /dev/ubd/discX/... names rather than 15 * only /dev/ubdN/discN this version also has lots of 16 * clean ups preparing for ubd-many. 17 * James McMechan 18 */ 19 20#define UBD_SHIFT 4 21 22#include "linux/kernel.h" 23#include "linux/module.h" 24#include "linux/blkdev.h" 25#include "linux/ata.h" 26#include "linux/hdreg.h" 27#include "linux/init.h" 28#include "linux/cdrom.h" 29#include "linux/proc_fs.h" 30#include "linux/seq_file.h" 31#include "linux/ctype.h" 32#include "linux/capability.h" 33#include "linux/mm.h" 34#include "linux/slab.h" 35#include "linux/vmalloc.h" 36#include "linux/smp_lock.h" 37#include "linux/blkpg.h" 38#include "linux/genhd.h" 39#include "linux/spinlock.h" 40#include "linux/platform_device.h" 41#include "linux/scatterlist.h" 42#include "asm/segment.h" 43#include "asm/uaccess.h" 44#include "asm/irq.h" 45#include "asm/types.h" 46#include "asm/tlbflush.h" 47#include "mem_user.h" 48#include "kern_util.h" 49#include "kern.h" 50#include "mconsole_kern.h" 51#include "init.h" 52#include "irq_user.h" 53#include "irq_kern.h" 54#include "ubd_user.h" 55#include "os.h" 56#include "mem.h" 57#include "mem_kern.h" 58#include "cow.h" 59 60enum ubd_req { UBD_READ, UBD_WRITE }; 61 62struct io_thread_req { 63 struct request *req; 64 enum ubd_req op; 65 int fds[2]; 66 unsigned long offsets[2]; 67 unsigned long long offset; 68 unsigned long length; 69 char *buffer; 70 int sectorsize; 71 unsigned long sector_mask; 72 unsigned long long cow_offset; 73 unsigned long bitmap_words[2]; 74 int error; 75}; 76 77static inline int ubd_test_bit(__u64 bit, unsigned char *data) 78{ 79 __u64 n; 80 int bits, off; 81 82 bits = sizeof(data[0]) * 8; 83 n = bit / bits; 84 off = bit % bits; 85 return (data[n] & (1 << off)) != 0; 86} 87 88static inline void ubd_set_bit(__u64 bit, unsigned char *data) 89{ 90 __u64 n; 91 int bits, off; 92 93 bits = sizeof(data[0]) * 8; 94 n = bit / bits; 95 off = bit % bits; 96 data[n] |= (1 << off); 97} 98/*End stuff from ubd_user.h*/ 99 100#define DRIVER_NAME "uml-blkdev" 101 102static DEFINE_MUTEX(ubd_lock); 103 104static int ubd_open(struct block_device *bdev, fmode_t mode); 105static int ubd_release(struct gendisk *disk, fmode_t mode); 106static int ubd_ioctl(struct block_device *bdev, fmode_t mode, 107 unsigned int cmd, unsigned long arg); 108static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); 109 110#define MAX_DEV (16) 111 112static const struct block_device_operations ubd_blops = { 113 .owner = THIS_MODULE, 114 .open = ubd_open, 115 .release = ubd_release, 116 .ioctl = ubd_ioctl, 117 .getgeo = ubd_getgeo, 118}; 119 120/* Protected by ubd_lock */ 121static int fake_major = UBD_MAJOR; 122static struct gendisk *ubd_gendisk[MAX_DEV]; 123static struct gendisk *fake_gendisk[MAX_DEV]; 124 125#ifdef CONFIG_BLK_DEV_UBD_SYNC 126#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ 127 .cl = 1 }) 128#else 129#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \ 130 .cl = 1 }) 131#endif 132static struct openflags global_openflags = OPEN_FLAGS; 133 134struct cow { 135 /* backing file name */ 136 char *file; 137 /* backing file fd */ 138 int fd; 139 unsigned long *bitmap; 140 unsigned long bitmap_len; 141 int bitmap_offset; 142 int data_offset; 143}; 144 145#define MAX_SG 64 146 147struct ubd { 148 struct list_head restart; 149 /* name (and fd, below) of the file opened for writing, either the 150 * backing or the cow file. */ 151 char *file; 152 int count; 153 int fd; 154 __u64 size; 155 struct openflags boot_openflags; 156 struct openflags openflags; 157 unsigned shared:1; 158 unsigned no_cow:1; 159 struct cow cow; 160 struct platform_device pdev; 161 struct request_queue *queue; 162 spinlock_t lock; 163 struct scatterlist sg[MAX_SG]; 164 struct request *request; 165 int start_sg, end_sg; 166 sector_t rq_pos; 167}; 168 169#define DEFAULT_COW { \ 170 .file = NULL, \ 171 .fd = -1, \ 172 .bitmap = NULL, \ 173 .bitmap_offset = 0, \ 174 .data_offset = 0, \ 175} 176 177#define DEFAULT_UBD { \ 178 .file = NULL, \ 179 .count = 0, \ 180 .fd = -1, \ 181 .size = -1, \ 182 .boot_openflags = OPEN_FLAGS, \ 183 .openflags = OPEN_FLAGS, \ 184 .no_cow = 0, \ 185 .shared = 0, \ 186 .cow = DEFAULT_COW, \ 187 .lock = SPIN_LOCK_UNLOCKED, \ 188 .request = NULL, \ 189 .start_sg = 0, \ 190 .end_sg = 0, \ 191 .rq_pos = 0, \ 192} 193 194/* Protected by ubd_lock */ 195static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD }; 196 197/* Only changed by fake_ide_setup which is a setup */ 198static int fake_ide = 0; 199static struct proc_dir_entry *proc_ide_root = NULL; 200static struct proc_dir_entry *proc_ide = NULL; 201 202static void make_proc_ide(void) 203{ 204 proc_ide_root = proc_mkdir("ide", NULL); 205 proc_ide = proc_mkdir("ide0", proc_ide_root); 206} 207 208static int fake_ide_media_proc_show(struct seq_file *m, void *v) 209{ 210 seq_puts(m, "disk\n"); 211 return 0; 212} 213 214static int fake_ide_media_proc_open(struct inode *inode, struct file *file) 215{ 216 return single_open(file, fake_ide_media_proc_show, NULL); 217} 218 219static const struct file_operations fake_ide_media_proc_fops = { 220 .owner = THIS_MODULE, 221 .open = fake_ide_media_proc_open, 222 .read = seq_read, 223 .llseek = seq_lseek, 224 .release = single_release, 225}; 226 227static void make_ide_entries(const char *dev_name) 228{ 229 struct proc_dir_entry *dir, *ent; 230 char name[64]; 231 232 if(proc_ide_root == NULL) make_proc_ide(); 233 234 dir = proc_mkdir(dev_name, proc_ide); 235 if(!dir) return; 236 237 ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops); 238 if(!ent) return; 239 snprintf(name, sizeof(name), "ide0/%s", dev_name); 240 proc_symlink(dev_name, proc_ide_root, name); 241} 242 243static int fake_ide_setup(char *str) 244{ 245 fake_ide = 1; 246 return 1; 247} 248 249__setup("fake_ide", fake_ide_setup); 250 251__uml_help(fake_ide_setup, 252"fake_ide\n" 253" Create ide0 entries that map onto ubd devices.\n\n" 254); 255 256static int parse_unit(char **ptr) 257{ 258 char *str = *ptr, *end; 259 int n = -1; 260 261 if(isdigit(*str)) { 262 n = simple_strtoul(str, &end, 0); 263 if(end == str) 264 return -1; 265 *ptr = end; 266 } 267 else if (('a' <= *str) && (*str <= 'z')) { 268 n = *str - 'a'; 269 str++; 270 *ptr = str; 271 } 272 return n; 273} 274 275/* If *index_out == -1 at exit, the passed option was a general one; 276 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it 277 * should not be freed on exit. 278 */ 279static int ubd_setup_common(char *str, int *index_out, char **error_out) 280{ 281 struct ubd *ubd_dev; 282 struct openflags flags = global_openflags; 283 char *backing_file; 284 int n, err = 0, i; 285 286 if(index_out) *index_out = -1; 287 n = *str; 288 if(n == '='){ 289 char *end; 290 int major; 291 292 str++; 293 if(!strcmp(str, "sync")){ 294 global_openflags = of_sync(global_openflags); 295 goto out1; 296 } 297 298 err = -EINVAL; 299 major = simple_strtoul(str, &end, 0); 300 if((*end != '\0') || (end == str)){ 301 *error_out = "Didn't parse major number"; 302 goto out1; 303 } 304 305 mutex_lock(&ubd_lock); 306 if (fake_major != UBD_MAJOR) { 307 *error_out = "Can't assign a fake major twice"; 308 goto out1; 309 } 310 311 fake_major = major; 312 313 printk(KERN_INFO "Setting extra ubd major number to %d\n", 314 major); 315 err = 0; 316 out1: 317 mutex_unlock(&ubd_lock); 318 return err; 319 } 320 321 n = parse_unit(&str); 322 if(n < 0){ 323 *error_out = "Couldn't parse device number"; 324 return -EINVAL; 325 } 326 if(n >= MAX_DEV){ 327 *error_out = "Device number out of range"; 328 return 1; 329 } 330 331 err = -EBUSY; 332 mutex_lock(&ubd_lock); 333 334 ubd_dev = &ubd_devs[n]; 335 if(ubd_dev->file != NULL){ 336 *error_out = "Device is already configured"; 337 goto out; 338 } 339 340 if (index_out) 341 *index_out = n; 342 343 err = -EINVAL; 344 for (i = 0; i < sizeof("rscd="); i++) { 345 switch (*str) { 346 case 'r': 347 flags.w = 0; 348 break; 349 case 's': 350 flags.s = 1; 351 break; 352 case 'd': 353 ubd_dev->no_cow = 1; 354 break; 355 case 'c': 356 ubd_dev->shared = 1; 357 break; 358 case '=': 359 str++; 360 goto break_loop; 361 default: 362 *error_out = "Expected '=' or flag letter " 363 "(r, s, c, or d)"; 364 goto out; 365 } 366 str++; 367 } 368 369 if (*str == '=') 370 *error_out = "Too many flags specified"; 371 else 372 *error_out = "Missing '='"; 373 goto out; 374 375break_loop: 376 backing_file = strchr(str, ','); 377 378 if (backing_file == NULL) 379 backing_file = strchr(str, ':'); 380 381 if(backing_file != NULL){ 382 if(ubd_dev->no_cow){ 383 *error_out = "Can't specify both 'd' and a cow file"; 384 goto out; 385 } 386 else { 387 *backing_file = '\0'; 388 backing_file++; 389 } 390 } 391 err = 0; 392 ubd_dev->file = str; 393 ubd_dev->cow.file = backing_file; 394 ubd_dev->boot_openflags = flags; 395out: 396 mutex_unlock(&ubd_lock); 397 return err; 398} 399 400static int ubd_setup(char *str) 401{ 402 char *error; 403 int err; 404 405 err = ubd_setup_common(str, NULL, &error); 406 if(err) 407 printk(KERN_ERR "Failed to initialize device with \"%s\" : " 408 "%s\n", str, error); 409 return 1; 410} 411 412__setup("ubd", ubd_setup); 413__uml_help(ubd_setup, 414"ubd<n><flags>=<filename>[(:|,)<filename2>]\n" 415" This is used to associate a device with a file in the underlying\n" 416" filesystem. When specifying two filenames, the first one is the\n" 417" COW name and the second is the backing file name. As separator you can\n" 418" use either a ':' or a ',': the first one allows writing things like;\n" 419" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n" 420" while with a ',' the shell would not expand the 2nd '~'.\n" 421" When using only one filename, UML will detect whether to treat it like\n" 422" a COW file or a backing file. To override this detection, add the 'd'\n" 423" flag:\n" 424" ubd0d=BackingFile\n" 425" Usually, there is a filesystem in the file, but \n" 426" that's not required. Swap devices containing swap files can be\n" 427" specified like this. Also, a file which doesn't contain a\n" 428" filesystem can have its contents read in the virtual \n" 429" machine by running 'dd' on the device. <n> must be in the range\n" 430" 0 to 7. Appending an 'r' to the number will cause that device\n" 431" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" 432" an 's' will cause data to be written to disk on the host immediately.\n" 433" 'c' will cause the device to be treated as being shared between multiple\n" 434" UMLs and file locking will be turned off - this is appropriate for a\n" 435" cluster filesystem and inappropriate at almost all other times.\n\n" 436); 437 438static int udb_setup(char *str) 439{ 440 printk("udb%s specified on command line is almost certainly a ubd -> " 441 "udb TYPO\n", str); 442 return 1; 443} 444 445__setup("udb", udb_setup); 446__uml_help(udb_setup, 447"udb\n" 448" This option is here solely to catch ubd -> udb typos, which can be\n" 449" to impossible to catch visually unless you specifically look for\n" 450" them. The only result of any option starting with 'udb' is an error\n" 451" in the boot output.\n\n" 452); 453 454static void do_ubd_request(struct request_queue * q); 455 456/* Only changed by ubd_init, which is an initcall. */ 457static int thread_fd = -1; 458static LIST_HEAD(restart); 459 460/* Called without dev->lock held, and only in interrupt context. */ 461static void ubd_handler(void) 462{ 463 struct io_thread_req *req; 464 struct ubd *ubd; 465 struct list_head *list, *next_ele; 466 unsigned long flags; 467 int n; 468 469 while(1){ 470 n = os_read_file(thread_fd, &req, 471 sizeof(struct io_thread_req *)); 472 if(n != sizeof(req)){ 473 if(n == -EAGAIN) 474 break; 475 printk(KERN_ERR "spurious interrupt in ubd_handler, " 476 "err = %d\n", -n); 477 return; 478 } 479 480 blk_end_request(req->req, 0, req->length); 481 kfree(req); 482 } 483 reactivate_fd(thread_fd, UBD_IRQ); 484 485 list_for_each_safe(list, next_ele, &restart){ 486 ubd = container_of(list, struct ubd, restart); 487 list_del_init(&ubd->restart); 488 spin_lock_irqsave(&ubd->lock, flags); 489 do_ubd_request(ubd->queue); 490 spin_unlock_irqrestore(&ubd->lock, flags); 491 } 492} 493 494static irqreturn_t ubd_intr(int irq, void *dev) 495{ 496 ubd_handler(); 497 return IRQ_HANDLED; 498} 499 500/* Only changed by ubd_init, which is an initcall. */ 501static int io_pid = -1; 502 503static void kill_io_thread(void) 504{ 505 if(io_pid != -1) 506 os_kill_process(io_pid, 1); 507} 508 509__uml_exitcall(kill_io_thread); 510 511static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out) 512{ 513 char *file; 514 515 file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file; 516 return os_file_size(file, size_out); 517} 518 519static int read_cow_bitmap(int fd, void *buf, int offset, int len) 520{ 521 int err; 522 523 err = os_seek_file(fd, offset); 524 if (err < 0) 525 return err; 526 527 err = os_read_file(fd, buf, len); 528 if (err < 0) 529 return err; 530 531 return 0; 532} 533 534static int backing_file_mismatch(char *file, __u64 size, time_t mtime) 535{ 536 unsigned long modtime; 537 unsigned long long actual; 538 int err; 539 540 err = os_file_modtime(file, &modtime); 541 if (err < 0) { 542 printk(KERN_ERR "Failed to get modification time of backing " 543 "file \"%s\", err = %d\n", file, -err); 544 return err; 545 } 546 547 err = os_file_size(file, &actual); 548 if (err < 0) { 549 printk(KERN_ERR "Failed to get size of backing file \"%s\", " 550 "err = %d\n", file, -err); 551 return err; 552 } 553 554 if (actual != size) { 555 /*__u64 can be a long on AMD64 and with %lu GCC complains; so 556 * the typecast.*/ 557 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header " 558 "vs backing file\n", (unsigned long long) size, actual); 559 return -EINVAL; 560 } 561 if (modtime != mtime) { 562 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs " 563 "backing file\n", mtime, modtime); 564 return -EINVAL; 565 } 566 return 0; 567} 568 569static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow) 570{ 571 struct uml_stat buf1, buf2; 572 int err; 573 574 if (from_cmdline == NULL) 575 return 0; 576 if (!strcmp(from_cmdline, from_cow)) 577 return 0; 578 579 err = os_stat_file(from_cmdline, &buf1); 580 if (err < 0) { 581 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline, 582 -err); 583 return 0; 584 } 585 err = os_stat_file(from_cow, &buf2); 586 if (err < 0) { 587 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow, 588 -err); 589 return 1; 590 } 591 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) 592 return 0; 593 594 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, " 595 "\"%s\" specified in COW header of \"%s\"\n", 596 from_cmdline, from_cow, cow); 597 return 1; 598} 599 600static int open_ubd_file(char *file, struct openflags *openflags, int shared, 601 char **backing_file_out, int *bitmap_offset_out, 602 unsigned long *bitmap_len_out, int *data_offset_out, 603 int *create_cow_out) 604{ 605 time_t mtime; 606 unsigned long long size; 607 __u32 version, align; 608 char *backing_file; 609 int fd, err, sectorsize, asked_switch, mode = 0644; 610 611 fd = os_open_file(file, *openflags, mode); 612 if (fd < 0) { 613 if ((fd == -ENOENT) && (create_cow_out != NULL)) 614 *create_cow_out = 1; 615 if (!openflags->w || 616 ((fd != -EROFS) && (fd != -EACCES))) 617 return fd; 618 openflags->w = 0; 619 fd = os_open_file(file, *openflags, mode); 620 if (fd < 0) 621 return fd; 622 } 623 624 if (shared) 625 printk(KERN_INFO "Not locking \"%s\" on the host\n", file); 626 else { 627 err = os_lock_file(fd, openflags->w); 628 if (err < 0) { 629 printk(KERN_ERR "Failed to lock '%s', err = %d\n", 630 file, -err); 631 goto out_close; 632 } 633 } 634 635 /* Successful return case! */ 636 if (backing_file_out == NULL) 637 return fd; 638 639 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, 640 &size, §orsize, &align, bitmap_offset_out); 641 if (err && (*backing_file_out != NULL)) { 642 printk(KERN_ERR "Failed to read COW header from COW file " 643 "\"%s\", errno = %d\n", file, -err); 644 goto out_close; 645 } 646 if (err) 647 return fd; 648 649 asked_switch = path_requires_switch(*backing_file_out, backing_file, 650 file); 651 652 /* Allow switching only if no mismatch. */ 653 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, 654 mtime)) { 655 printk(KERN_ERR "Switching backing file to '%s'\n", 656 *backing_file_out); 657 err = write_cow_header(file, fd, *backing_file_out, 658 sectorsize, align, &size); 659 if (err) { 660 printk(KERN_ERR "Switch failed, errno = %d\n", -err); 661 goto out_close; 662 } 663 } else { 664 *backing_file_out = backing_file; 665 err = backing_file_mismatch(*backing_file_out, size, mtime); 666 if (err) 667 goto out_close; 668 } 669 670 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, 671 bitmap_len_out, data_offset_out); 672 673 return fd; 674 out_close: 675 os_close_file(fd); 676 return err; 677} 678 679static int create_cow_file(char *cow_file, char *backing_file, 680 struct openflags flags, 681 int sectorsize, int alignment, int *bitmap_offset_out, 682 unsigned long *bitmap_len_out, int *data_offset_out) 683{ 684 int err, fd; 685 686 flags.c = 1; 687 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL); 688 if (fd < 0) { 689 err = fd; 690 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n", 691 cow_file, -err); 692 goto out; 693 } 694 695 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, 696 bitmap_offset_out, bitmap_len_out, 697 data_offset_out); 698 if (!err) 699 return fd; 700 os_close_file(fd); 701 out: 702 return err; 703} 704 705static void ubd_close_dev(struct ubd *ubd_dev) 706{ 707 os_close_file(ubd_dev->fd); 708 if(ubd_dev->cow.file == NULL) 709 return; 710 711 os_close_file(ubd_dev->cow.fd); 712 vfree(ubd_dev->cow.bitmap); 713 ubd_dev->cow.bitmap = NULL; 714} 715 716static int ubd_open_dev(struct ubd *ubd_dev) 717{ 718 struct openflags flags; 719 char **back_ptr; 720 int err, create_cow, *create_ptr; 721 int fd; 722 723 ubd_dev->openflags = ubd_dev->boot_openflags; 724 create_cow = 0; 725 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL; 726 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file; 727 728 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared, 729 back_ptr, &ubd_dev->cow.bitmap_offset, 730 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset, 731 create_ptr); 732 733 if((fd == -ENOENT) && create_cow){ 734 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file, 735 ubd_dev->openflags, 1 << 9, PAGE_SIZE, 736 &ubd_dev->cow.bitmap_offset, 737 &ubd_dev->cow.bitmap_len, 738 &ubd_dev->cow.data_offset); 739 if(fd >= 0){ 740 printk(KERN_INFO "Creating \"%s\" as COW file for " 741 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file); 742 } 743 } 744 745 if(fd < 0){ 746 printk("Failed to open '%s', errno = %d\n", ubd_dev->file, 747 -fd); 748 return fd; 749 } 750 ubd_dev->fd = fd; 751 752 if(ubd_dev->cow.file != NULL){ 753 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long)); 754 755 err = -ENOMEM; 756 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len); 757 if(ubd_dev->cow.bitmap == NULL){ 758 printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); 759 goto error; 760 } 761 flush_tlb_kernel_vm(); 762 763 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap, 764 ubd_dev->cow.bitmap_offset, 765 ubd_dev->cow.bitmap_len); 766 if(err < 0) 767 goto error; 768 769 flags = ubd_dev->openflags; 770 flags.w = 0; 771 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL, 772 NULL, NULL, NULL, NULL); 773 if(err < 0) goto error; 774 ubd_dev->cow.fd = err; 775 } 776 return 0; 777 error: 778 os_close_file(ubd_dev->fd); 779 return err; 780} 781 782static void ubd_device_release(struct device *dev) 783{ 784 struct ubd *ubd_dev = dev_get_drvdata(dev); 785 786 blk_cleanup_queue(ubd_dev->queue); 787 *ubd_dev = ((struct ubd) DEFAULT_UBD); 788} 789 790static int ubd_disk_register(int major, u64 size, int unit, 791 struct gendisk **disk_out) 792{ 793 struct gendisk *disk; 794 795 disk = alloc_disk(1 << UBD_SHIFT); 796 if(disk == NULL) 797 return -ENOMEM; 798 799 disk->major = major; 800 disk->first_minor = unit << UBD_SHIFT; 801 disk->fops = &ubd_blops; 802 set_capacity(disk, size / 512); 803 if (major == UBD_MAJOR) 804 sprintf(disk->disk_name, "ubd%c", 'a' + unit); 805 else 806 sprintf(disk->disk_name, "ubd_fake%d", unit); 807 808 /* sysfs register (not for ide fake devices) */ 809 if (major == UBD_MAJOR) { 810 ubd_devs[unit].pdev.id = unit; 811 ubd_devs[unit].pdev.name = DRIVER_NAME; 812 ubd_devs[unit].pdev.dev.release = ubd_device_release; 813 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]); 814 platform_device_register(&ubd_devs[unit].pdev); 815 disk->driverfs_dev = &ubd_devs[unit].pdev.dev; 816 } 817 818 disk->private_data = &ubd_devs[unit]; 819 disk->queue = ubd_devs[unit].queue; 820 add_disk(disk); 821 822 *disk_out = disk; 823 return 0; 824} 825 826#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) 827 828static int ubd_add(int n, char **error_out) 829{ 830 struct ubd *ubd_dev = &ubd_devs[n]; 831 int err = 0; 832 833 if(ubd_dev->file == NULL) 834 goto out; 835 836 err = ubd_file_size(ubd_dev, &ubd_dev->size); 837 if(err < 0){ 838 *error_out = "Couldn't determine size of device's file"; 839 goto out; 840 } 841 842 ubd_dev->size = ROUND_BLOCK(ubd_dev->size); 843 844 INIT_LIST_HEAD(&ubd_dev->restart); 845 sg_init_table(ubd_dev->sg, MAX_SG); 846 847 err = -ENOMEM; 848 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); 849 if (ubd_dev->queue == NULL) { 850 *error_out = "Failed to initialize device queue"; 851 goto out; 852 } 853 ubd_dev->queue->queuedata = ubd_dev; 854 855 blk_queue_max_segments(ubd_dev->queue, MAX_SG); 856 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); 857 if(err){ 858 *error_out = "Failed to register device"; 859 goto out_cleanup; 860 } 861 862 if (fake_major != UBD_MAJOR) 863 ubd_disk_register(fake_major, ubd_dev->size, n, 864 &fake_gendisk[n]); 865 866 /* 867 * Perhaps this should also be under the "if (fake_major)" above 868 * using the fake_disk->disk_name 869 */ 870 if (fake_ide) 871 make_ide_entries(ubd_gendisk[n]->disk_name); 872 873 err = 0; 874out: 875 return err; 876 877out_cleanup: 878 blk_cleanup_queue(ubd_dev->queue); 879 goto out; 880} 881 882static int ubd_config(char *str, char **error_out) 883{ 884 int n, ret; 885 886 /* This string is possibly broken up and stored, so it's only 887 * freed if ubd_setup_common fails, or if only general options 888 * were set. 889 */ 890 str = kstrdup(str, GFP_KERNEL); 891 if (str == NULL) { 892 *error_out = "Failed to allocate memory"; 893 return -ENOMEM; 894 } 895 896 ret = ubd_setup_common(str, &n, error_out); 897 if (ret) 898 goto err_free; 899 900 if (n == -1) { 901 ret = 0; 902 goto err_free; 903 } 904 905 mutex_lock(&ubd_lock); 906 ret = ubd_add(n, error_out); 907 if (ret) 908 ubd_devs[n].file = NULL; 909 mutex_unlock(&ubd_lock); 910 911out: 912 return ret; 913 914err_free: 915 kfree(str); 916 goto out; 917} 918 919static int ubd_get_config(char *name, char *str, int size, char **error_out) 920{ 921 struct ubd *ubd_dev; 922 int n, len = 0; 923 924 n = parse_unit(&name); 925 if((n >= MAX_DEV) || (n < 0)){ 926 *error_out = "ubd_get_config : device number out of range"; 927 return -1; 928 } 929 930 ubd_dev = &ubd_devs[n]; 931 mutex_lock(&ubd_lock); 932 933 if(ubd_dev->file == NULL){ 934 CONFIG_CHUNK(str, size, len, "", 1); 935 goto out; 936 } 937 938 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0); 939 940 if(ubd_dev->cow.file != NULL){ 941 CONFIG_CHUNK(str, size, len, ",", 0); 942 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1); 943 } 944 else CONFIG_CHUNK(str, size, len, "", 1); 945 946 out: 947 mutex_unlock(&ubd_lock); 948 return len; 949} 950 951static int ubd_id(char **str, int *start_out, int *end_out) 952{ 953 int n; 954 955 n = parse_unit(str); 956 *start_out = 0; 957 *end_out = MAX_DEV - 1; 958 return n; 959} 960 961static int ubd_remove(int n, char **error_out) 962{ 963 struct gendisk *disk = ubd_gendisk[n]; 964 struct ubd *ubd_dev; 965 int err = -ENODEV; 966 967 mutex_lock(&ubd_lock); 968 969 ubd_dev = &ubd_devs[n]; 970 971 if(ubd_dev->file == NULL) 972 goto out; 973 974 /* you cannot remove a open disk */ 975 err = -EBUSY; 976 if(ubd_dev->count > 0) 977 goto out; 978 979 ubd_gendisk[n] = NULL; 980 if(disk != NULL){ 981 del_gendisk(disk); 982 put_disk(disk); 983 } 984 985 if(fake_gendisk[n] != NULL){ 986 del_gendisk(fake_gendisk[n]); 987 put_disk(fake_gendisk[n]); 988 fake_gendisk[n] = NULL; 989 } 990 991 err = 0; 992 platform_device_unregister(&ubd_dev->pdev); 993out: 994 mutex_unlock(&ubd_lock); 995 return err; 996} 997 998/* All these are called by mconsole in process context and without 999 * ubd-specific locks. The structure itself is const except for .list. 1000 */ 1001static struct mc_device ubd_mc = { 1002 .list = LIST_HEAD_INIT(ubd_mc.list), 1003 .name = "ubd", 1004 .config = ubd_config, 1005 .get_config = ubd_get_config, 1006 .id = ubd_id, 1007 .remove = ubd_remove, 1008}; 1009 1010static int __init ubd_mc_init(void) 1011{ 1012 mconsole_register_dev(&ubd_mc); 1013 return 0; 1014} 1015 1016__initcall(ubd_mc_init); 1017 1018static int __init ubd0_init(void) 1019{ 1020 struct ubd *ubd_dev = &ubd_devs[0]; 1021 1022 mutex_lock(&ubd_lock); 1023 if(ubd_dev->file == NULL) 1024 ubd_dev->file = "root_fs"; 1025 mutex_unlock(&ubd_lock); 1026 1027 return 0; 1028} 1029 1030__initcall(ubd0_init); 1031 1032/* Used in ubd_init, which is an initcall */ 1033static struct platform_driver ubd_driver = { 1034 .driver = { 1035 .name = DRIVER_NAME, 1036 }, 1037}; 1038 1039static int __init ubd_init(void) 1040{ 1041 char *error; 1042 int i, err; 1043 1044 if (register_blkdev(UBD_MAJOR, "ubd")) 1045 return -1; 1046 1047 if (fake_major != UBD_MAJOR) { 1048 char name[sizeof("ubd_nnn\0")]; 1049 1050 snprintf(name, sizeof(name), "ubd_%d", fake_major); 1051 if (register_blkdev(fake_major, "ubd")) 1052 return -1; 1053 } 1054 platform_driver_register(&ubd_driver); 1055 mutex_lock(&ubd_lock); 1056 for (i = 0; i < MAX_DEV; i++){ 1057 err = ubd_add(i, &error); 1058 if(err) 1059 printk(KERN_ERR "Failed to initialize ubd device %d :" 1060 "%s\n", i, error); 1061 } 1062 mutex_unlock(&ubd_lock); 1063 return 0; 1064} 1065 1066late_initcall(ubd_init); 1067 1068static int __init ubd_driver_init(void){ 1069 unsigned long stack; 1070 int err; 1071 1072 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ 1073 if(global_openflags.s){ 1074 printk(KERN_INFO "ubd: Synchronous mode\n"); 1075 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is 1076 * enough. So use anyway the io thread. */ 1077 } 1078 stack = alloc_stack(0, 0); 1079 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), 1080 &thread_fd); 1081 if(io_pid < 0){ 1082 printk(KERN_ERR 1083 "ubd : Failed to start I/O thread (errno = %d) - " 1084 "falling back to synchronous I/O\n", -io_pid); 1085 io_pid = -1; 1086 return 0; 1087 } 1088 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, 1089 IRQF_DISABLED, "ubd", ubd_devs); 1090 if(err != 0) 1091 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); 1092 return 0; 1093} 1094 1095device_initcall(ubd_driver_init); 1096 1097static int ubd_open(struct block_device *bdev, fmode_t mode) 1098{ 1099 struct gendisk *disk = bdev->bd_disk; 1100 struct ubd *ubd_dev = disk->private_data; 1101 int err = 0; 1102 1103 lock_kernel(); 1104 if(ubd_dev->count == 0){ 1105 err = ubd_open_dev(ubd_dev); 1106 if(err){ 1107 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n", 1108 disk->disk_name, ubd_dev->file, -err); 1109 goto out; 1110 } 1111 } 1112 ubd_dev->count++; 1113 set_disk_ro(disk, !ubd_dev->openflags.w); 1114 1115 /* This should no more be needed. And it didn't work anyway to exclude 1116 * read-write remounting of filesystems.*/ 1117 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){ 1118 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev); 1119 err = -EROFS; 1120 }*/ 1121out: 1122 unlock_kernel(); 1123 return err; 1124} 1125 1126static int ubd_release(struct gendisk *disk, fmode_t mode) 1127{ 1128 struct ubd *ubd_dev = disk->private_data; 1129 1130 lock_kernel(); 1131 if(--ubd_dev->count == 0) 1132 ubd_close_dev(ubd_dev); 1133 unlock_kernel(); 1134 return 0; 1135} 1136 1137static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, 1138 __u64 *cow_offset, unsigned long *bitmap, 1139 __u64 bitmap_offset, unsigned long *bitmap_words, 1140 __u64 bitmap_len) 1141{ 1142 __u64 sector = io_offset >> 9; 1143 int i, update_bitmap = 0; 1144 1145 for(i = 0; i < length >> 9; i++){ 1146 if(cow_mask != NULL) 1147 ubd_set_bit(i, (unsigned char *) cow_mask); 1148 if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) 1149 continue; 1150 1151 update_bitmap = 1; 1152 ubd_set_bit(sector + i, (unsigned char *) bitmap); 1153 } 1154 1155 if(!update_bitmap) 1156 return; 1157 1158 *cow_offset = sector / (sizeof(unsigned long) * 8); 1159 1160 /* This takes care of the case where we're exactly at the end of the 1161 * device, and *cow_offset + 1 is off the end. So, just back it up 1162 * by one word. Thanks to Lynn Kerby for the fix and James McMechan 1163 * for the original diagnosis. 1164 */ 1165 if (*cow_offset == (DIV_ROUND_UP(bitmap_len, 1166 sizeof(unsigned long)) - 1)) 1167 (*cow_offset)--; 1168 1169 bitmap_words[0] = bitmap[*cow_offset]; 1170 bitmap_words[1] = bitmap[*cow_offset + 1]; 1171 1172 *cow_offset *= sizeof(unsigned long); 1173 *cow_offset += bitmap_offset; 1174} 1175 1176static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, 1177 __u64 bitmap_offset, __u64 bitmap_len) 1178{ 1179 __u64 sector = req->offset >> 9; 1180 int i; 1181 1182 if(req->length > (sizeof(req->sector_mask) * 8) << 9) 1183 panic("Operation too long"); 1184 1185 if(req->op == UBD_READ) { 1186 for(i = 0; i < req->length >> 9; i++){ 1187 if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) 1188 ubd_set_bit(i, (unsigned char *) 1189 &req->sector_mask); 1190 } 1191 } 1192 else cowify_bitmap(req->offset, req->length, &req->sector_mask, 1193 &req->cow_offset, bitmap, bitmap_offset, 1194 req->bitmap_words, bitmap_len); 1195} 1196 1197/* Called with dev->lock held */ 1198static void prepare_request(struct request *req, struct io_thread_req *io_req, 1199 unsigned long long offset, int page_offset, 1200 int len, struct page *page) 1201{ 1202 struct gendisk *disk = req->rq_disk; 1203 struct ubd *ubd_dev = disk->private_data; 1204 1205 io_req->req = req; 1206 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : 1207 ubd_dev->fd; 1208 io_req->fds[1] = ubd_dev->fd; 1209 io_req->cow_offset = -1; 1210 io_req->offset = offset; 1211 io_req->length = len; 1212 io_req->error = 0; 1213 io_req->sector_mask = 0; 1214 1215 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; 1216 io_req->offsets[0] = 0; 1217 io_req->offsets[1] = ubd_dev->cow.data_offset; 1218 io_req->buffer = page_address(page) + page_offset; 1219 io_req->sectorsize = 1 << 9; 1220 1221 if(ubd_dev->cow.file != NULL) 1222 cowify_req(io_req, ubd_dev->cow.bitmap, 1223 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len); 1224 1225} 1226 1227/* Called with dev->lock held */ 1228static void do_ubd_request(struct request_queue *q) 1229{ 1230 struct io_thread_req *io_req; 1231 struct request *req; 1232 int n; 1233 1234 while(1){ 1235 struct ubd *dev = q->queuedata; 1236 if(dev->end_sg == 0){ 1237 struct request *req = blk_fetch_request(q); 1238 if(req == NULL) 1239 return; 1240 1241 dev->request = req; 1242 dev->rq_pos = blk_rq_pos(req); 1243 dev->start_sg = 0; 1244 dev->end_sg = blk_rq_map_sg(q, req, dev->sg); 1245 } 1246 1247 req = dev->request; 1248 while(dev->start_sg < dev->end_sg){ 1249 struct scatterlist *sg = &dev->sg[dev->start_sg]; 1250 1251 io_req = kmalloc(sizeof(struct io_thread_req), 1252 GFP_ATOMIC); 1253 if(io_req == NULL){ 1254 if(list_empty(&dev->restart)) 1255 list_add(&dev->restart, &restart); 1256 return; 1257 } 1258 prepare_request(req, io_req, 1259 (unsigned long long)dev->rq_pos << 9, 1260 sg->offset, sg->length, sg_page(sg)); 1261 1262 n = os_write_file(thread_fd, &io_req, 1263 sizeof(struct io_thread_req *)); 1264 if(n != sizeof(struct io_thread_req *)){ 1265 if(n != -EAGAIN) 1266 printk("write to io thread failed, " 1267 "errno = %d\n", -n); 1268 else if(list_empty(&dev->restart)) 1269 list_add(&dev->restart, &restart); 1270 kfree(io_req); 1271 return; 1272 } 1273 1274 dev->rq_pos += sg->length >> 9; 1275 dev->start_sg++; 1276 } 1277 dev->end_sg = 0; 1278 dev->request = NULL; 1279 } 1280} 1281 1282static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) 1283{ 1284 struct ubd *ubd_dev = bdev->bd_disk->private_data; 1285 1286 geo->heads = 128; 1287 geo->sectors = 32; 1288 geo->cylinders = ubd_dev->size / (128 * 32 * 512); 1289 return 0; 1290} 1291 1292static int ubd_ioctl(struct block_device *bdev, fmode_t mode, 1293 unsigned int cmd, unsigned long arg) 1294{ 1295 struct ubd *ubd_dev = bdev->bd_disk->private_data; 1296 u16 ubd_id[ATA_ID_WORDS]; 1297 1298 switch (cmd) { 1299 struct cdrom_volctrl volume; 1300 case HDIO_GET_IDENTITY: 1301 memset(&ubd_id, 0, ATA_ID_WORDS * 2); 1302 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512); 1303 ubd_id[ATA_ID_HEADS] = 128; 1304 ubd_id[ATA_ID_SECTORS] = 32; 1305 if(copy_to_user((char __user *) arg, (char *) &ubd_id, 1306 sizeof(ubd_id))) 1307 return -EFAULT; 1308 return 0; 1309 1310 case CDROMVOLREAD: 1311 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume))) 1312 return -EFAULT; 1313 volume.channel0 = 255; 1314 volume.channel1 = 255; 1315 volume.channel2 = 255; 1316 volume.channel3 = 255; 1317 if(copy_to_user((char __user *) arg, &volume, sizeof(volume))) 1318 return -EFAULT; 1319 return 0; 1320 } 1321 return -EINVAL; 1322} 1323 1324static int update_bitmap(struct io_thread_req *req) 1325{ 1326 int n; 1327 1328 if(req->cow_offset == -1) 1329 return 0; 1330 1331 n = os_seek_file(req->fds[1], req->cow_offset); 1332 if(n < 0){ 1333 printk("do_io - bitmap lseek failed : err = %d\n", -n); 1334 return 1; 1335 } 1336 1337 n = os_write_file(req->fds[1], &req->bitmap_words, 1338 sizeof(req->bitmap_words)); 1339 if(n != sizeof(req->bitmap_words)){ 1340 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, 1341 req->fds[1]); 1342 return 1; 1343 } 1344 1345 return 0; 1346} 1347 1348static void do_io(struct io_thread_req *req) 1349{ 1350 char *buf; 1351 unsigned long len; 1352 int n, nsectors, start, end, bit; 1353 int err; 1354 __u64 off; 1355 1356 nsectors = req->length / req->sectorsize; 1357 start = 0; 1358 do { 1359 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); 1360 end = start; 1361 while((end < nsectors) && 1362 (ubd_test_bit(end, (unsigned char *) 1363 &req->sector_mask) == bit)) 1364 end++; 1365 1366 off = req->offset + req->offsets[bit] + 1367 start * req->sectorsize; 1368 len = (end - start) * req->sectorsize; 1369 buf = &req->buffer[start * req->sectorsize]; 1370 1371 err = os_seek_file(req->fds[bit], off); 1372 if(err < 0){ 1373 printk("do_io - lseek failed : err = %d\n", -err); 1374 req->error = 1; 1375 return; 1376 } 1377 if(req->op == UBD_READ){ 1378 n = 0; 1379 do { 1380 buf = &buf[n]; 1381 len -= n; 1382 n = os_read_file(req->fds[bit], buf, len); 1383 if (n < 0) { 1384 printk("do_io - read failed, err = %d " 1385 "fd = %d\n", -n, req->fds[bit]); 1386 req->error = 1; 1387 return; 1388 } 1389 } while((n < len) && (n != 0)); 1390 if (n < len) memset(&buf[n], 0, len - n); 1391 } else { 1392 n = os_write_file(req->fds[bit], buf, len); 1393 if(n != len){ 1394 printk("do_io - write failed err = %d " 1395 "fd = %d\n", -n, req->fds[bit]); 1396 req->error = 1; 1397 return; 1398 } 1399 } 1400 1401 start = end; 1402 } while(start < nsectors); 1403 1404 req->error = update_bitmap(req); 1405} 1406 1407/* Changed in start_io_thread, which is serialized by being called only 1408 * from ubd_init, which is an initcall. 1409 */ 1410int kernel_fd = -1; 1411 1412static int io_count = 0; 1413 1414int io_thread(void *arg) 1415{ 1416 struct io_thread_req *req; 1417 int n; 1418 1419 ignore_sigwinch_sig(); 1420 while(1){ 1421 n = os_read_file(kernel_fd, &req, 1422 sizeof(struct io_thread_req *)); 1423 if(n != sizeof(struct io_thread_req *)){ 1424 if(n < 0) 1425 printk("io_thread - read failed, fd = %d, " 1426 "err = %d\n", kernel_fd, -n); 1427 else { 1428 printk("io_thread - short read, fd = %d, " 1429 "length = %d\n", kernel_fd, n); 1430 } 1431 continue; 1432 } 1433 io_count++; 1434 do_io(req); 1435 n = os_write_file(kernel_fd, &req, 1436 sizeof(struct io_thread_req *)); 1437 if(n != sizeof(struct io_thread_req *)) 1438 printk("io_thread - write failed, fd = %d, err = %d\n", 1439 kernel_fd, -n); 1440 } 1441 1442 return 0; 1443} 1444