• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/arch/um/drivers/
1/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define UBD_SHIFT 4
21
22#include "linux/kernel.h"
23#include "linux/module.h"
24#include "linux/blkdev.h"
25#include "linux/ata.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
28#include "linux/cdrom.h"
29#include "linux/proc_fs.h"
30#include "linux/seq_file.h"
31#include "linux/ctype.h"
32#include "linux/capability.h"
33#include "linux/mm.h"
34#include "linux/slab.h"
35#include "linux/vmalloc.h"
36#include "linux/smp_lock.h"
37#include "linux/blkpg.h"
38#include "linux/genhd.h"
39#include "linux/spinlock.h"
40#include "linux/platform_device.h"
41#include "linux/scatterlist.h"
42#include "asm/segment.h"
43#include "asm/uaccess.h"
44#include "asm/irq.h"
45#include "asm/types.h"
46#include "asm/tlbflush.h"
47#include "mem_user.h"
48#include "kern_util.h"
49#include "kern.h"
50#include "mconsole_kern.h"
51#include "init.h"
52#include "irq_user.h"
53#include "irq_kern.h"
54#include "ubd_user.h"
55#include "os.h"
56#include "mem.h"
57#include "mem_kern.h"
58#include "cow.h"
59
60enum ubd_req { UBD_READ, UBD_WRITE };
61
62struct io_thread_req {
63	struct request *req;
64	enum ubd_req op;
65	int fds[2];
66	unsigned long offsets[2];
67	unsigned long long offset;
68	unsigned long length;
69	char *buffer;
70	int sectorsize;
71	unsigned long sector_mask;
72	unsigned long long cow_offset;
73	unsigned long bitmap_words[2];
74	int error;
75};
76
77static inline int ubd_test_bit(__u64 bit, unsigned char *data)
78{
79	__u64 n;
80	int bits, off;
81
82	bits = sizeof(data[0]) * 8;
83	n = bit / bits;
84	off = bit % bits;
85	return (data[n] & (1 << off)) != 0;
86}
87
88static inline void ubd_set_bit(__u64 bit, unsigned char *data)
89{
90	__u64 n;
91	int bits, off;
92
93	bits = sizeof(data[0]) * 8;
94	n = bit / bits;
95	off = bit % bits;
96	data[n] |= (1 << off);
97}
98/*End stuff from ubd_user.h*/
99
100#define DRIVER_NAME "uml-blkdev"
101
102static DEFINE_MUTEX(ubd_lock);
103
104static int ubd_open(struct block_device *bdev, fmode_t mode);
105static int ubd_release(struct gendisk *disk, fmode_t mode);
106static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
107		     unsigned int cmd, unsigned long arg);
108static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
109
110#define MAX_DEV (16)
111
112static const struct block_device_operations ubd_blops = {
113        .owner		= THIS_MODULE,
114        .open		= ubd_open,
115        .release	= ubd_release,
116        .ioctl		= ubd_ioctl,
117	.getgeo		= ubd_getgeo,
118};
119
120/* Protected by ubd_lock */
121static int fake_major = UBD_MAJOR;
122static struct gendisk *ubd_gendisk[MAX_DEV];
123static struct gendisk *fake_gendisk[MAX_DEV];
124
125#ifdef CONFIG_BLK_DEV_UBD_SYNC
126#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
127					 .cl = 1 })
128#else
129#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
130					 .cl = 1 })
131#endif
132static struct openflags global_openflags = OPEN_FLAGS;
133
134struct cow {
135	/* backing file name */
136	char *file;
137	/* backing file fd */
138	int fd;
139	unsigned long *bitmap;
140	unsigned long bitmap_len;
141	int bitmap_offset;
142	int data_offset;
143};
144
145#define MAX_SG 64
146
147struct ubd {
148	struct list_head restart;
149	/* name (and fd, below) of the file opened for writing, either the
150	 * backing or the cow file. */
151	char *file;
152	int count;
153	int fd;
154	__u64 size;
155	struct openflags boot_openflags;
156	struct openflags openflags;
157	unsigned shared:1;
158	unsigned no_cow:1;
159	struct cow cow;
160	struct platform_device pdev;
161	struct request_queue *queue;
162	spinlock_t lock;
163	struct scatterlist sg[MAX_SG];
164	struct request *request;
165	int start_sg, end_sg;
166	sector_t rq_pos;
167};
168
169#define DEFAULT_COW { \
170	.file =			NULL, \
171	.fd =			-1,	\
172	.bitmap =		NULL, \
173	.bitmap_offset =	0, \
174	.data_offset =		0, \
175}
176
177#define DEFAULT_UBD { \
178	.file = 		NULL, \
179	.count =		0, \
180	.fd =			-1, \
181	.size =			-1, \
182	.boot_openflags =	OPEN_FLAGS, \
183	.openflags =		OPEN_FLAGS, \
184	.no_cow =               0, \
185	.shared =		0, \
186	.cow =			DEFAULT_COW, \
187	.lock =			SPIN_LOCK_UNLOCKED,	\
188	.request =		NULL, \
189	.start_sg =		0, \
190	.end_sg =		0, \
191	.rq_pos =		0, \
192}
193
194/* Protected by ubd_lock */
195static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
196
197/* Only changed by fake_ide_setup which is a setup */
198static int fake_ide = 0;
199static struct proc_dir_entry *proc_ide_root = NULL;
200static struct proc_dir_entry *proc_ide = NULL;
201
202static void make_proc_ide(void)
203{
204	proc_ide_root = proc_mkdir("ide", NULL);
205	proc_ide = proc_mkdir("ide0", proc_ide_root);
206}
207
208static int fake_ide_media_proc_show(struct seq_file *m, void *v)
209{
210	seq_puts(m, "disk\n");
211	return 0;
212}
213
214static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
215{
216	return single_open(file, fake_ide_media_proc_show, NULL);
217}
218
219static const struct file_operations fake_ide_media_proc_fops = {
220	.owner		= THIS_MODULE,
221	.open		= fake_ide_media_proc_open,
222	.read		= seq_read,
223	.llseek		= seq_lseek,
224	.release	= single_release,
225};
226
227static void make_ide_entries(const char *dev_name)
228{
229	struct proc_dir_entry *dir, *ent;
230	char name[64];
231
232	if(proc_ide_root == NULL) make_proc_ide();
233
234	dir = proc_mkdir(dev_name, proc_ide);
235	if(!dir) return;
236
237	ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
238	if(!ent) return;
239	snprintf(name, sizeof(name), "ide0/%s", dev_name);
240	proc_symlink(dev_name, proc_ide_root, name);
241}
242
243static int fake_ide_setup(char *str)
244{
245	fake_ide = 1;
246	return 1;
247}
248
249__setup("fake_ide", fake_ide_setup);
250
251__uml_help(fake_ide_setup,
252"fake_ide\n"
253"    Create ide0 entries that map onto ubd devices.\n\n"
254);
255
256static int parse_unit(char **ptr)
257{
258	char *str = *ptr, *end;
259	int n = -1;
260
261	if(isdigit(*str)) {
262		n = simple_strtoul(str, &end, 0);
263		if(end == str)
264			return -1;
265		*ptr = end;
266	}
267	else if (('a' <= *str) && (*str <= 'z')) {
268		n = *str - 'a';
269		str++;
270		*ptr = str;
271	}
272	return n;
273}
274
275/* If *index_out == -1 at exit, the passed option was a general one;
276 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
277 * should not be freed on exit.
278 */
279static int ubd_setup_common(char *str, int *index_out, char **error_out)
280{
281	struct ubd *ubd_dev;
282	struct openflags flags = global_openflags;
283	char *backing_file;
284	int n, err = 0, i;
285
286	if(index_out) *index_out = -1;
287	n = *str;
288	if(n == '='){
289		char *end;
290		int major;
291
292		str++;
293		if(!strcmp(str, "sync")){
294			global_openflags = of_sync(global_openflags);
295			goto out1;
296		}
297
298		err = -EINVAL;
299		major = simple_strtoul(str, &end, 0);
300		if((*end != '\0') || (end == str)){
301			*error_out = "Didn't parse major number";
302			goto out1;
303		}
304
305		mutex_lock(&ubd_lock);
306		if (fake_major != UBD_MAJOR) {
307			*error_out = "Can't assign a fake major twice";
308			goto out1;
309		}
310
311		fake_major = major;
312
313		printk(KERN_INFO "Setting extra ubd major number to %d\n",
314		       major);
315		err = 0;
316	out1:
317		mutex_unlock(&ubd_lock);
318		return err;
319	}
320
321	n = parse_unit(&str);
322	if(n < 0){
323		*error_out = "Couldn't parse device number";
324		return -EINVAL;
325	}
326	if(n >= MAX_DEV){
327		*error_out = "Device number out of range";
328		return 1;
329	}
330
331	err = -EBUSY;
332	mutex_lock(&ubd_lock);
333
334	ubd_dev = &ubd_devs[n];
335	if(ubd_dev->file != NULL){
336		*error_out = "Device is already configured";
337		goto out;
338	}
339
340	if (index_out)
341		*index_out = n;
342
343	err = -EINVAL;
344	for (i = 0; i < sizeof("rscd="); i++) {
345		switch (*str) {
346		case 'r':
347			flags.w = 0;
348			break;
349		case 's':
350			flags.s = 1;
351			break;
352		case 'd':
353			ubd_dev->no_cow = 1;
354			break;
355		case 'c':
356			ubd_dev->shared = 1;
357			break;
358		case '=':
359			str++;
360			goto break_loop;
361		default:
362			*error_out = "Expected '=' or flag letter "
363				"(r, s, c, or d)";
364			goto out;
365		}
366		str++;
367	}
368
369	if (*str == '=')
370		*error_out = "Too many flags specified";
371	else
372		*error_out = "Missing '='";
373	goto out;
374
375break_loop:
376	backing_file = strchr(str, ',');
377
378	if (backing_file == NULL)
379		backing_file = strchr(str, ':');
380
381	if(backing_file != NULL){
382		if(ubd_dev->no_cow){
383			*error_out = "Can't specify both 'd' and a cow file";
384			goto out;
385		}
386		else {
387			*backing_file = '\0';
388			backing_file++;
389		}
390	}
391	err = 0;
392	ubd_dev->file = str;
393	ubd_dev->cow.file = backing_file;
394	ubd_dev->boot_openflags = flags;
395out:
396	mutex_unlock(&ubd_lock);
397	return err;
398}
399
400static int ubd_setup(char *str)
401{
402	char *error;
403	int err;
404
405	err = ubd_setup_common(str, NULL, &error);
406	if(err)
407		printk(KERN_ERR "Failed to initialize device with \"%s\" : "
408		       "%s\n", str, error);
409	return 1;
410}
411
412__setup("ubd", ubd_setup);
413__uml_help(ubd_setup,
414"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
415"    This is used to associate a device with a file in the underlying\n"
416"    filesystem. When specifying two filenames, the first one is the\n"
417"    COW name and the second is the backing file name. As separator you can\n"
418"    use either a ':' or a ',': the first one allows writing things like;\n"
419"	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
420"    while with a ',' the shell would not expand the 2nd '~'.\n"
421"    When using only one filename, UML will detect whether to treat it like\n"
422"    a COW file or a backing file. To override this detection, add the 'd'\n"
423"    flag:\n"
424"	ubd0d=BackingFile\n"
425"    Usually, there is a filesystem in the file, but \n"
426"    that's not required. Swap devices containing swap files can be\n"
427"    specified like this. Also, a file which doesn't contain a\n"
428"    filesystem can have its contents read in the virtual \n"
429"    machine by running 'dd' on the device. <n> must be in the range\n"
430"    0 to 7. Appending an 'r' to the number will cause that device\n"
431"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
432"    an 's' will cause data to be written to disk on the host immediately.\n"
433"    'c' will cause the device to be treated as being shared between multiple\n"
434"    UMLs and file locking will be turned off - this is appropriate for a\n"
435"    cluster filesystem and inappropriate at almost all other times.\n\n"
436);
437
438static int udb_setup(char *str)
439{
440	printk("udb%s specified on command line is almost certainly a ubd -> "
441	       "udb TYPO\n", str);
442	return 1;
443}
444
445__setup("udb", udb_setup);
446__uml_help(udb_setup,
447"udb\n"
448"    This option is here solely to catch ubd -> udb typos, which can be\n"
449"    to impossible to catch visually unless you specifically look for\n"
450"    them.  The only result of any option starting with 'udb' is an error\n"
451"    in the boot output.\n\n"
452);
453
454static void do_ubd_request(struct request_queue * q);
455
456/* Only changed by ubd_init, which is an initcall. */
457static int thread_fd = -1;
458static LIST_HEAD(restart);
459
460/* Called without dev->lock held, and only in interrupt context. */
461static void ubd_handler(void)
462{
463	struct io_thread_req *req;
464	struct ubd *ubd;
465	struct list_head *list, *next_ele;
466	unsigned long flags;
467	int n;
468
469	while(1){
470		n = os_read_file(thread_fd, &req,
471				 sizeof(struct io_thread_req *));
472		if(n != sizeof(req)){
473			if(n == -EAGAIN)
474				break;
475			printk(KERN_ERR "spurious interrupt in ubd_handler, "
476			       "err = %d\n", -n);
477			return;
478		}
479
480		blk_end_request(req->req, 0, req->length);
481		kfree(req);
482	}
483	reactivate_fd(thread_fd, UBD_IRQ);
484
485	list_for_each_safe(list, next_ele, &restart){
486		ubd = container_of(list, struct ubd, restart);
487		list_del_init(&ubd->restart);
488		spin_lock_irqsave(&ubd->lock, flags);
489		do_ubd_request(ubd->queue);
490		spin_unlock_irqrestore(&ubd->lock, flags);
491	}
492}
493
494static irqreturn_t ubd_intr(int irq, void *dev)
495{
496	ubd_handler();
497	return IRQ_HANDLED;
498}
499
500/* Only changed by ubd_init, which is an initcall. */
501static int io_pid = -1;
502
503static void kill_io_thread(void)
504{
505	if(io_pid != -1)
506		os_kill_process(io_pid, 1);
507}
508
509__uml_exitcall(kill_io_thread);
510
511static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
512{
513	char *file;
514
515	file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file;
516	return os_file_size(file, size_out);
517}
518
519static int read_cow_bitmap(int fd, void *buf, int offset, int len)
520{
521	int err;
522
523	err = os_seek_file(fd, offset);
524	if (err < 0)
525		return err;
526
527	err = os_read_file(fd, buf, len);
528	if (err < 0)
529		return err;
530
531	return 0;
532}
533
534static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
535{
536	unsigned long modtime;
537	unsigned long long actual;
538	int err;
539
540	err = os_file_modtime(file, &modtime);
541	if (err < 0) {
542		printk(KERN_ERR "Failed to get modification time of backing "
543		       "file \"%s\", err = %d\n", file, -err);
544		return err;
545	}
546
547	err = os_file_size(file, &actual);
548	if (err < 0) {
549		printk(KERN_ERR "Failed to get size of backing file \"%s\", "
550		       "err = %d\n", file, -err);
551		return err;
552	}
553
554	if (actual != size) {
555		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
556		 * the typecast.*/
557		printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
558		       "vs backing file\n", (unsigned long long) size, actual);
559		return -EINVAL;
560	}
561	if (modtime != mtime) {
562		printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
563		       "backing file\n", mtime, modtime);
564		return -EINVAL;
565	}
566	return 0;
567}
568
569static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
570{
571	struct uml_stat buf1, buf2;
572	int err;
573
574	if (from_cmdline == NULL)
575		return 0;
576	if (!strcmp(from_cmdline, from_cow))
577		return 0;
578
579	err = os_stat_file(from_cmdline, &buf1);
580	if (err < 0) {
581		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
582		       -err);
583		return 0;
584	}
585	err = os_stat_file(from_cow, &buf2);
586	if (err < 0) {
587		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
588		       -err);
589		return 1;
590	}
591	if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
592		return 0;
593
594	printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
595	       "\"%s\" specified in COW header of \"%s\"\n",
596	       from_cmdline, from_cow, cow);
597	return 1;
598}
599
600static int open_ubd_file(char *file, struct openflags *openflags, int shared,
601		  char **backing_file_out, int *bitmap_offset_out,
602		  unsigned long *bitmap_len_out, int *data_offset_out,
603		  int *create_cow_out)
604{
605	time_t mtime;
606	unsigned long long size;
607	__u32 version, align;
608	char *backing_file;
609	int fd, err, sectorsize, asked_switch, mode = 0644;
610
611	fd = os_open_file(file, *openflags, mode);
612	if (fd < 0) {
613		if ((fd == -ENOENT) && (create_cow_out != NULL))
614			*create_cow_out = 1;
615		if (!openflags->w ||
616		    ((fd != -EROFS) && (fd != -EACCES)))
617			return fd;
618		openflags->w = 0;
619		fd = os_open_file(file, *openflags, mode);
620		if (fd < 0)
621			return fd;
622	}
623
624	if (shared)
625		printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
626	else {
627		err = os_lock_file(fd, openflags->w);
628		if (err < 0) {
629			printk(KERN_ERR "Failed to lock '%s', err = %d\n",
630			       file, -err);
631			goto out_close;
632		}
633	}
634
635	/* Successful return case! */
636	if (backing_file_out == NULL)
637		return fd;
638
639	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
640			      &size, &sectorsize, &align, bitmap_offset_out);
641	if (err && (*backing_file_out != NULL)) {
642		printk(KERN_ERR "Failed to read COW header from COW file "
643		       "\"%s\", errno = %d\n", file, -err);
644		goto out_close;
645	}
646	if (err)
647		return fd;
648
649	asked_switch = path_requires_switch(*backing_file_out, backing_file,
650					    file);
651
652	/* Allow switching only if no mismatch. */
653	if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
654						   mtime)) {
655		printk(KERN_ERR "Switching backing file to '%s'\n",
656		       *backing_file_out);
657		err = write_cow_header(file, fd, *backing_file_out,
658				       sectorsize, align, &size);
659		if (err) {
660			printk(KERN_ERR "Switch failed, errno = %d\n", -err);
661			goto out_close;
662		}
663	} else {
664		*backing_file_out = backing_file;
665		err = backing_file_mismatch(*backing_file_out, size, mtime);
666		if (err)
667			goto out_close;
668	}
669
670	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
671		  bitmap_len_out, data_offset_out);
672
673	return fd;
674 out_close:
675	os_close_file(fd);
676	return err;
677}
678
679static int create_cow_file(char *cow_file, char *backing_file,
680		    struct openflags flags,
681		    int sectorsize, int alignment, int *bitmap_offset_out,
682		    unsigned long *bitmap_len_out, int *data_offset_out)
683{
684	int err, fd;
685
686	flags.c = 1;
687	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
688	if (fd < 0) {
689		err = fd;
690		printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
691		       cow_file, -err);
692		goto out;
693	}
694
695	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
696			    bitmap_offset_out, bitmap_len_out,
697			    data_offset_out);
698	if (!err)
699		return fd;
700	os_close_file(fd);
701 out:
702	return err;
703}
704
705static void ubd_close_dev(struct ubd *ubd_dev)
706{
707	os_close_file(ubd_dev->fd);
708	if(ubd_dev->cow.file == NULL)
709		return;
710
711	os_close_file(ubd_dev->cow.fd);
712	vfree(ubd_dev->cow.bitmap);
713	ubd_dev->cow.bitmap = NULL;
714}
715
716static int ubd_open_dev(struct ubd *ubd_dev)
717{
718	struct openflags flags;
719	char **back_ptr;
720	int err, create_cow, *create_ptr;
721	int fd;
722
723	ubd_dev->openflags = ubd_dev->boot_openflags;
724	create_cow = 0;
725	create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
726	back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
727
728	fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
729				back_ptr, &ubd_dev->cow.bitmap_offset,
730				&ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
731				create_ptr);
732
733	if((fd == -ENOENT) && create_cow){
734		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
735					  ubd_dev->openflags, 1 << 9, PAGE_SIZE,
736					  &ubd_dev->cow.bitmap_offset,
737					  &ubd_dev->cow.bitmap_len,
738					  &ubd_dev->cow.data_offset);
739		if(fd >= 0){
740			printk(KERN_INFO "Creating \"%s\" as COW file for "
741			       "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
742		}
743	}
744
745	if(fd < 0){
746		printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
747		       -fd);
748		return fd;
749	}
750	ubd_dev->fd = fd;
751
752	if(ubd_dev->cow.file != NULL){
753		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
754
755		err = -ENOMEM;
756		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
757		if(ubd_dev->cow.bitmap == NULL){
758			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
759			goto error;
760		}
761		flush_tlb_kernel_vm();
762
763		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
764				      ubd_dev->cow.bitmap_offset,
765				      ubd_dev->cow.bitmap_len);
766		if(err < 0)
767			goto error;
768
769		flags = ubd_dev->openflags;
770		flags.w = 0;
771		err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
772				    NULL, NULL, NULL, NULL);
773		if(err < 0) goto error;
774		ubd_dev->cow.fd = err;
775	}
776	return 0;
777 error:
778	os_close_file(ubd_dev->fd);
779	return err;
780}
781
782static void ubd_device_release(struct device *dev)
783{
784	struct ubd *ubd_dev = dev_get_drvdata(dev);
785
786	blk_cleanup_queue(ubd_dev->queue);
787	*ubd_dev = ((struct ubd) DEFAULT_UBD);
788}
789
790static int ubd_disk_register(int major, u64 size, int unit,
791			     struct gendisk **disk_out)
792{
793	struct gendisk *disk;
794
795	disk = alloc_disk(1 << UBD_SHIFT);
796	if(disk == NULL)
797		return -ENOMEM;
798
799	disk->major = major;
800	disk->first_minor = unit << UBD_SHIFT;
801	disk->fops = &ubd_blops;
802	set_capacity(disk, size / 512);
803	if (major == UBD_MAJOR)
804		sprintf(disk->disk_name, "ubd%c", 'a' + unit);
805	else
806		sprintf(disk->disk_name, "ubd_fake%d", unit);
807
808	/* sysfs register (not for ide fake devices) */
809	if (major == UBD_MAJOR) {
810		ubd_devs[unit].pdev.id   = unit;
811		ubd_devs[unit].pdev.name = DRIVER_NAME;
812		ubd_devs[unit].pdev.dev.release = ubd_device_release;
813		dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
814		platform_device_register(&ubd_devs[unit].pdev);
815		disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
816	}
817
818	disk->private_data = &ubd_devs[unit];
819	disk->queue = ubd_devs[unit].queue;
820	add_disk(disk);
821
822	*disk_out = disk;
823	return 0;
824}
825
826#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
827
828static int ubd_add(int n, char **error_out)
829{
830	struct ubd *ubd_dev = &ubd_devs[n];
831	int err = 0;
832
833	if(ubd_dev->file == NULL)
834		goto out;
835
836	err = ubd_file_size(ubd_dev, &ubd_dev->size);
837	if(err < 0){
838		*error_out = "Couldn't determine size of device's file";
839		goto out;
840	}
841
842	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
843
844	INIT_LIST_HEAD(&ubd_dev->restart);
845	sg_init_table(ubd_dev->sg, MAX_SG);
846
847	err = -ENOMEM;
848	ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
849	if (ubd_dev->queue == NULL) {
850		*error_out = "Failed to initialize device queue";
851		goto out;
852	}
853	ubd_dev->queue->queuedata = ubd_dev;
854
855	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
856	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
857	if(err){
858		*error_out = "Failed to register device";
859		goto out_cleanup;
860	}
861
862	if (fake_major != UBD_MAJOR)
863		ubd_disk_register(fake_major, ubd_dev->size, n,
864				  &fake_gendisk[n]);
865
866	/*
867	 * Perhaps this should also be under the "if (fake_major)" above
868	 * using the fake_disk->disk_name
869	 */
870	if (fake_ide)
871		make_ide_entries(ubd_gendisk[n]->disk_name);
872
873	err = 0;
874out:
875	return err;
876
877out_cleanup:
878	blk_cleanup_queue(ubd_dev->queue);
879	goto out;
880}
881
882static int ubd_config(char *str, char **error_out)
883{
884	int n, ret;
885
886	/* This string is possibly broken up and stored, so it's only
887	 * freed if ubd_setup_common fails, or if only general options
888	 * were set.
889	 */
890	str = kstrdup(str, GFP_KERNEL);
891	if (str == NULL) {
892		*error_out = "Failed to allocate memory";
893		return -ENOMEM;
894	}
895
896	ret = ubd_setup_common(str, &n, error_out);
897	if (ret)
898		goto err_free;
899
900	if (n == -1) {
901		ret = 0;
902		goto err_free;
903	}
904
905	mutex_lock(&ubd_lock);
906	ret = ubd_add(n, error_out);
907	if (ret)
908		ubd_devs[n].file = NULL;
909	mutex_unlock(&ubd_lock);
910
911out:
912	return ret;
913
914err_free:
915	kfree(str);
916	goto out;
917}
918
919static int ubd_get_config(char *name, char *str, int size, char **error_out)
920{
921	struct ubd *ubd_dev;
922	int n, len = 0;
923
924	n = parse_unit(&name);
925	if((n >= MAX_DEV) || (n < 0)){
926		*error_out = "ubd_get_config : device number out of range";
927		return -1;
928	}
929
930	ubd_dev = &ubd_devs[n];
931	mutex_lock(&ubd_lock);
932
933	if(ubd_dev->file == NULL){
934		CONFIG_CHUNK(str, size, len, "", 1);
935		goto out;
936	}
937
938	CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
939
940	if(ubd_dev->cow.file != NULL){
941		CONFIG_CHUNK(str, size, len, ",", 0);
942		CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
943	}
944	else CONFIG_CHUNK(str, size, len, "", 1);
945
946 out:
947	mutex_unlock(&ubd_lock);
948	return len;
949}
950
951static int ubd_id(char **str, int *start_out, int *end_out)
952{
953	int n;
954
955	n = parse_unit(str);
956	*start_out = 0;
957	*end_out = MAX_DEV - 1;
958	return n;
959}
960
961static int ubd_remove(int n, char **error_out)
962{
963	struct gendisk *disk = ubd_gendisk[n];
964	struct ubd *ubd_dev;
965	int err = -ENODEV;
966
967	mutex_lock(&ubd_lock);
968
969	ubd_dev = &ubd_devs[n];
970
971	if(ubd_dev->file == NULL)
972		goto out;
973
974	/* you cannot remove a open disk */
975	err = -EBUSY;
976	if(ubd_dev->count > 0)
977		goto out;
978
979	ubd_gendisk[n] = NULL;
980	if(disk != NULL){
981		del_gendisk(disk);
982		put_disk(disk);
983	}
984
985	if(fake_gendisk[n] != NULL){
986		del_gendisk(fake_gendisk[n]);
987		put_disk(fake_gendisk[n]);
988		fake_gendisk[n] = NULL;
989	}
990
991	err = 0;
992	platform_device_unregister(&ubd_dev->pdev);
993out:
994	mutex_unlock(&ubd_lock);
995	return err;
996}
997
998/* All these are called by mconsole in process context and without
999 * ubd-specific locks.  The structure itself is const except for .list.
1000 */
1001static struct mc_device ubd_mc = {
1002	.list		= LIST_HEAD_INIT(ubd_mc.list),
1003	.name		= "ubd",
1004	.config		= ubd_config,
1005	.get_config	= ubd_get_config,
1006	.id		= ubd_id,
1007	.remove		= ubd_remove,
1008};
1009
1010static int __init ubd_mc_init(void)
1011{
1012	mconsole_register_dev(&ubd_mc);
1013	return 0;
1014}
1015
1016__initcall(ubd_mc_init);
1017
1018static int __init ubd0_init(void)
1019{
1020	struct ubd *ubd_dev = &ubd_devs[0];
1021
1022	mutex_lock(&ubd_lock);
1023	if(ubd_dev->file == NULL)
1024		ubd_dev->file = "root_fs";
1025	mutex_unlock(&ubd_lock);
1026
1027	return 0;
1028}
1029
1030__initcall(ubd0_init);
1031
1032/* Used in ubd_init, which is an initcall */
1033static struct platform_driver ubd_driver = {
1034	.driver = {
1035		.name  = DRIVER_NAME,
1036	},
1037};
1038
1039static int __init ubd_init(void)
1040{
1041	char *error;
1042	int i, err;
1043
1044	if (register_blkdev(UBD_MAJOR, "ubd"))
1045		return -1;
1046
1047	if (fake_major != UBD_MAJOR) {
1048		char name[sizeof("ubd_nnn\0")];
1049
1050		snprintf(name, sizeof(name), "ubd_%d", fake_major);
1051		if (register_blkdev(fake_major, "ubd"))
1052			return -1;
1053	}
1054	platform_driver_register(&ubd_driver);
1055	mutex_lock(&ubd_lock);
1056	for (i = 0; i < MAX_DEV; i++){
1057		err = ubd_add(i, &error);
1058		if(err)
1059			printk(KERN_ERR "Failed to initialize ubd device %d :"
1060			       "%s\n", i, error);
1061	}
1062	mutex_unlock(&ubd_lock);
1063	return 0;
1064}
1065
1066late_initcall(ubd_init);
1067
1068static int __init ubd_driver_init(void){
1069	unsigned long stack;
1070	int err;
1071
1072	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1073	if(global_openflags.s){
1074		printk(KERN_INFO "ubd: Synchronous mode\n");
1075		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1076		 * enough. So use anyway the io thread. */
1077	}
1078	stack = alloc_stack(0, 0);
1079	io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1080				 &thread_fd);
1081	if(io_pid < 0){
1082		printk(KERN_ERR
1083		       "ubd : Failed to start I/O thread (errno = %d) - "
1084		       "falling back to synchronous I/O\n", -io_pid);
1085		io_pid = -1;
1086		return 0;
1087	}
1088	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1089			     IRQF_DISABLED, "ubd", ubd_devs);
1090	if(err != 0)
1091		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1092	return 0;
1093}
1094
1095device_initcall(ubd_driver_init);
1096
1097static int ubd_open(struct block_device *bdev, fmode_t mode)
1098{
1099	struct gendisk *disk = bdev->bd_disk;
1100	struct ubd *ubd_dev = disk->private_data;
1101	int err = 0;
1102
1103	lock_kernel();
1104	if(ubd_dev->count == 0){
1105		err = ubd_open_dev(ubd_dev);
1106		if(err){
1107			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1108			       disk->disk_name, ubd_dev->file, -err);
1109			goto out;
1110		}
1111	}
1112	ubd_dev->count++;
1113	set_disk_ro(disk, !ubd_dev->openflags.w);
1114
1115	/* This should no more be needed. And it didn't work anyway to exclude
1116	 * read-write remounting of filesystems.*/
1117	/*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1118	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1119	        err = -EROFS;
1120	}*/
1121out:
1122	unlock_kernel();
1123	return err;
1124}
1125
1126static int ubd_release(struct gendisk *disk, fmode_t mode)
1127{
1128	struct ubd *ubd_dev = disk->private_data;
1129
1130	lock_kernel();
1131	if(--ubd_dev->count == 0)
1132		ubd_close_dev(ubd_dev);
1133	unlock_kernel();
1134	return 0;
1135}
1136
1137static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1138			  __u64 *cow_offset, unsigned long *bitmap,
1139			  __u64 bitmap_offset, unsigned long *bitmap_words,
1140			  __u64 bitmap_len)
1141{
1142	__u64 sector = io_offset >> 9;
1143	int i, update_bitmap = 0;
1144
1145	for(i = 0; i < length >> 9; i++){
1146		if(cow_mask != NULL)
1147			ubd_set_bit(i, (unsigned char *) cow_mask);
1148		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1149			continue;
1150
1151		update_bitmap = 1;
1152		ubd_set_bit(sector + i, (unsigned char *) bitmap);
1153	}
1154
1155	if(!update_bitmap)
1156		return;
1157
1158	*cow_offset = sector / (sizeof(unsigned long) * 8);
1159
1160	/* This takes care of the case where we're exactly at the end of the
1161	 * device, and *cow_offset + 1 is off the end.  So, just back it up
1162	 * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1163	 * for the original diagnosis.
1164	 */
1165	if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1166					 sizeof(unsigned long)) - 1))
1167		(*cow_offset)--;
1168
1169	bitmap_words[0] = bitmap[*cow_offset];
1170	bitmap_words[1] = bitmap[*cow_offset + 1];
1171
1172	*cow_offset *= sizeof(unsigned long);
1173	*cow_offset += bitmap_offset;
1174}
1175
1176static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1177		       __u64 bitmap_offset, __u64 bitmap_len)
1178{
1179	__u64 sector = req->offset >> 9;
1180	int i;
1181
1182	if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1183		panic("Operation too long");
1184
1185	if(req->op == UBD_READ) {
1186		for(i = 0; i < req->length >> 9; i++){
1187			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1188				ubd_set_bit(i, (unsigned char *)
1189					    &req->sector_mask);
1190		}
1191	}
1192	else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1193			   &req->cow_offset, bitmap, bitmap_offset,
1194			   req->bitmap_words, bitmap_len);
1195}
1196
1197/* Called with dev->lock held */
1198static void prepare_request(struct request *req, struct io_thread_req *io_req,
1199			    unsigned long long offset, int page_offset,
1200			    int len, struct page *page)
1201{
1202	struct gendisk *disk = req->rq_disk;
1203	struct ubd *ubd_dev = disk->private_data;
1204
1205	io_req->req = req;
1206	io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1207		ubd_dev->fd;
1208	io_req->fds[1] = ubd_dev->fd;
1209	io_req->cow_offset = -1;
1210	io_req->offset = offset;
1211	io_req->length = len;
1212	io_req->error = 0;
1213	io_req->sector_mask = 0;
1214
1215	io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1216	io_req->offsets[0] = 0;
1217	io_req->offsets[1] = ubd_dev->cow.data_offset;
1218	io_req->buffer = page_address(page) + page_offset;
1219	io_req->sectorsize = 1 << 9;
1220
1221	if(ubd_dev->cow.file != NULL)
1222		cowify_req(io_req, ubd_dev->cow.bitmap,
1223			   ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1224
1225}
1226
1227/* Called with dev->lock held */
1228static void do_ubd_request(struct request_queue *q)
1229{
1230	struct io_thread_req *io_req;
1231	struct request *req;
1232	int n;
1233
1234	while(1){
1235		struct ubd *dev = q->queuedata;
1236		if(dev->end_sg == 0){
1237			struct request *req = blk_fetch_request(q);
1238			if(req == NULL)
1239				return;
1240
1241			dev->request = req;
1242			dev->rq_pos = blk_rq_pos(req);
1243			dev->start_sg = 0;
1244			dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1245		}
1246
1247		req = dev->request;
1248		while(dev->start_sg < dev->end_sg){
1249			struct scatterlist *sg = &dev->sg[dev->start_sg];
1250
1251			io_req = kmalloc(sizeof(struct io_thread_req),
1252					 GFP_ATOMIC);
1253			if(io_req == NULL){
1254				if(list_empty(&dev->restart))
1255					list_add(&dev->restart, &restart);
1256				return;
1257			}
1258			prepare_request(req, io_req,
1259					(unsigned long long)dev->rq_pos << 9,
1260					sg->offset, sg->length, sg_page(sg));
1261
1262			n = os_write_file(thread_fd, &io_req,
1263					  sizeof(struct io_thread_req *));
1264			if(n != sizeof(struct io_thread_req *)){
1265				if(n != -EAGAIN)
1266					printk("write to io thread failed, "
1267					       "errno = %d\n", -n);
1268				else if(list_empty(&dev->restart))
1269					list_add(&dev->restart, &restart);
1270				kfree(io_req);
1271				return;
1272			}
1273
1274			dev->rq_pos += sg->length >> 9;
1275			dev->start_sg++;
1276		}
1277		dev->end_sg = 0;
1278		dev->request = NULL;
1279	}
1280}
1281
1282static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1283{
1284	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1285
1286	geo->heads = 128;
1287	geo->sectors = 32;
1288	geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1289	return 0;
1290}
1291
1292static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1293		     unsigned int cmd, unsigned long arg)
1294{
1295	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1296	u16 ubd_id[ATA_ID_WORDS];
1297
1298	switch (cmd) {
1299		struct cdrom_volctrl volume;
1300	case HDIO_GET_IDENTITY:
1301		memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1302		ubd_id[ATA_ID_CYLS]	= ubd_dev->size / (128 * 32 * 512);
1303		ubd_id[ATA_ID_HEADS]	= 128;
1304		ubd_id[ATA_ID_SECTORS]	= 32;
1305		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1306				 sizeof(ubd_id)))
1307			return -EFAULT;
1308		return 0;
1309
1310	case CDROMVOLREAD:
1311		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1312			return -EFAULT;
1313		volume.channel0 = 255;
1314		volume.channel1 = 255;
1315		volume.channel2 = 255;
1316		volume.channel3 = 255;
1317		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1318			return -EFAULT;
1319		return 0;
1320	}
1321	return -EINVAL;
1322}
1323
1324static int update_bitmap(struct io_thread_req *req)
1325{
1326	int n;
1327
1328	if(req->cow_offset == -1)
1329		return 0;
1330
1331	n = os_seek_file(req->fds[1], req->cow_offset);
1332	if(n < 0){
1333		printk("do_io - bitmap lseek failed : err = %d\n", -n);
1334		return 1;
1335	}
1336
1337	n = os_write_file(req->fds[1], &req->bitmap_words,
1338			  sizeof(req->bitmap_words));
1339	if(n != sizeof(req->bitmap_words)){
1340		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1341		       req->fds[1]);
1342		return 1;
1343	}
1344
1345	return 0;
1346}
1347
1348static void do_io(struct io_thread_req *req)
1349{
1350	char *buf;
1351	unsigned long len;
1352	int n, nsectors, start, end, bit;
1353	int err;
1354	__u64 off;
1355
1356	nsectors = req->length / req->sectorsize;
1357	start = 0;
1358	do {
1359		bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1360		end = start;
1361		while((end < nsectors) &&
1362		      (ubd_test_bit(end, (unsigned char *)
1363				    &req->sector_mask) == bit))
1364			end++;
1365
1366		off = req->offset + req->offsets[bit] +
1367			start * req->sectorsize;
1368		len = (end - start) * req->sectorsize;
1369		buf = &req->buffer[start * req->sectorsize];
1370
1371		err = os_seek_file(req->fds[bit], off);
1372		if(err < 0){
1373			printk("do_io - lseek failed : err = %d\n", -err);
1374			req->error = 1;
1375			return;
1376		}
1377		if(req->op == UBD_READ){
1378			n = 0;
1379			do {
1380				buf = &buf[n];
1381				len -= n;
1382				n = os_read_file(req->fds[bit], buf, len);
1383				if (n < 0) {
1384					printk("do_io - read failed, err = %d "
1385					       "fd = %d\n", -n, req->fds[bit]);
1386					req->error = 1;
1387					return;
1388				}
1389			} while((n < len) && (n != 0));
1390			if (n < len) memset(&buf[n], 0, len - n);
1391		} else {
1392			n = os_write_file(req->fds[bit], buf, len);
1393			if(n != len){
1394				printk("do_io - write failed err = %d "
1395				       "fd = %d\n", -n, req->fds[bit]);
1396				req->error = 1;
1397				return;
1398			}
1399		}
1400
1401		start = end;
1402	} while(start < nsectors);
1403
1404	req->error = update_bitmap(req);
1405}
1406
1407/* Changed in start_io_thread, which is serialized by being called only
1408 * from ubd_init, which is an initcall.
1409 */
1410int kernel_fd = -1;
1411
1412static int io_count = 0;
1413
1414int io_thread(void *arg)
1415{
1416	struct io_thread_req *req;
1417	int n;
1418
1419	ignore_sigwinch_sig();
1420	while(1){
1421		n = os_read_file(kernel_fd, &req,
1422				 sizeof(struct io_thread_req *));
1423		if(n != sizeof(struct io_thread_req *)){
1424			if(n < 0)
1425				printk("io_thread - read failed, fd = %d, "
1426				       "err = %d\n", kernel_fd, -n);
1427			else {
1428				printk("io_thread - short read, fd = %d, "
1429				       "length = %d\n", kernel_fd, n);
1430			}
1431			continue;
1432		}
1433		io_count++;
1434		do_io(req);
1435		n = os_write_file(kernel_fd, &req,
1436				  sizeof(struct io_thread_req *));
1437		if(n != sizeof(struct io_thread_req *))
1438			printk("io_thread - write failed, fd = %d, err = %d\n",
1439			       kernel_fd, -n);
1440	}
1441
1442	return 0;
1443}
1444