1/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
23#include "linux/kernel.h"
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
28#include "linux/cdrom.h"
29#include "linux/proc_fs.h"
30#include "linux/ctype.h"
31#include "linux/capability.h"
32#include "linux/mm.h"
33#include "linux/vmalloc.h"
34#include "linux/blkpg.h"
35#include "linux/genhd.h"
36#include "linux/spinlock.h"
37#include "linux/platform_device.h"
38#include "asm/segment.h"
39#include "asm/uaccess.h"
40#include "asm/irq.h"
41#include "asm/types.h"
42#include "asm/tlbflush.h"
43#include "mem_user.h"
44#include "kern_util.h"
45#include "kern.h"
46#include "mconsole_kern.h"
47#include "init.h"
48#include "irq_user.h"
49#include "irq_kern.h"
50#include "ubd_user.h"
51#include "os.h"
52#include "mem.h"
53#include "mem_kern.h"
54#include "cow.h"
55
56enum ubd_req { UBD_READ, UBD_WRITE };
57
58struct io_thread_req {
59	struct request *req;
60	enum ubd_req op;
61	int fds[2];
62	unsigned long offsets[2];
63	unsigned long long offset;
64	unsigned long length;
65	char *buffer;
66	int sectorsize;
67	unsigned long sector_mask;
68	unsigned long long cow_offset;
69	unsigned long bitmap_words[2];
70	int error;
71};
72
73extern int open_ubd_file(char *file, struct openflags *openflags, int shared,
74			 char **backing_file_out, int *bitmap_offset_out,
75			 unsigned long *bitmap_len_out, int *data_offset_out,
76			 int *create_cow_out);
77extern int create_cow_file(char *cow_file, char *backing_file,
78			   struct openflags flags, int sectorsize,
79			   int alignment, int *bitmap_offset_out,
80			   unsigned long *bitmap_len_out,
81			   int *data_offset_out);
82extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
83extern void do_io(struct io_thread_req *req);
84
85static inline int ubd_test_bit(__u64 bit, unsigned char *data)
86{
87	__u64 n;
88	int bits, off;
89
90	bits = sizeof(data[0]) * 8;
91	n = bit / bits;
92	off = bit % bits;
93	return (data[n] & (1 << off)) != 0;
94}
95
96static inline void ubd_set_bit(__u64 bit, unsigned char *data)
97{
98	__u64 n;
99	int bits, off;
100
101	bits = sizeof(data[0]) * 8;
102	n = bit / bits;
103	off = bit % bits;
104	data[n] |= (1 << off);
105}
106/*End stuff from ubd_user.h*/
107
108#define DRIVER_NAME "uml-blkdev"
109
110static DEFINE_MUTEX(ubd_lock);
111
112static int ubd_open(struct inode * inode, struct file * filp);
113static int ubd_release(struct inode * inode, struct file * file);
114static int ubd_ioctl(struct inode * inode, struct file * file,
115		     unsigned int cmd, unsigned long arg);
116static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
117
118#define MAX_DEV (16)
119
120static struct block_device_operations ubd_blops = {
121        .owner		= THIS_MODULE,
122        .open		= ubd_open,
123        .release	= ubd_release,
124        .ioctl		= ubd_ioctl,
125	.getgeo		= ubd_getgeo,
126};
127
128/* Protected by ubd_lock */
129static int fake_major = MAJOR_NR;
130static struct gendisk *ubd_gendisk[MAX_DEV];
131static struct gendisk *fake_gendisk[MAX_DEV];
132
133#ifdef CONFIG_BLK_DEV_UBD_SYNC
134#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
135					 .cl = 1 })
136#else
137#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
138					 .cl = 1 })
139#endif
140static struct openflags global_openflags = OPEN_FLAGS;
141
142struct cow {
143	/* backing file name */
144	char *file;
145	/* backing file fd */
146	int fd;
147	unsigned long *bitmap;
148	unsigned long bitmap_len;
149	int bitmap_offset;
150	int data_offset;
151};
152
153#define MAX_SG 64
154
155struct ubd {
156	struct list_head restart;
157	/* name (and fd, below) of the file opened for writing, either the
158	 * backing or the cow file. */
159	char *file;
160	int count;
161	int fd;
162	__u64 size;
163	struct openflags boot_openflags;
164	struct openflags openflags;
165	unsigned shared:1;
166	unsigned no_cow:1;
167	struct cow cow;
168	struct platform_device pdev;
169	struct request_queue *queue;
170	spinlock_t lock;
171	struct scatterlist sg[MAX_SG];
172	struct request *request;
173	int start_sg, end_sg;
174};
175
176#define DEFAULT_COW { \
177	.file =			NULL, \
178	.fd =			-1,	\
179	.bitmap =		NULL, \
180	.bitmap_offset =	0, \
181	.data_offset =		0, \
182}
183
184#define DEFAULT_UBD { \
185	.file = 		NULL, \
186	.count =		0, \
187	.fd =			-1, \
188	.size =			-1, \
189	.boot_openflags =	OPEN_FLAGS, \
190	.openflags =		OPEN_FLAGS, \
191	.no_cow =               0, \
192	.shared =		0, \
193	.cow =			DEFAULT_COW, \
194	.lock =			SPIN_LOCK_UNLOCKED,	\
195	.request =		NULL, \
196	.start_sg =		0, \
197	.end_sg =		0, \
198}
199
200/* Protected by ubd_lock */
201struct ubd ubd_devs[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
202
203/* Only changed by fake_ide_setup which is a setup */
204static int fake_ide = 0;
205static struct proc_dir_entry *proc_ide_root = NULL;
206static struct proc_dir_entry *proc_ide = NULL;
207
208static void make_proc_ide(void)
209{
210	proc_ide_root = proc_mkdir("ide", NULL);
211	proc_ide = proc_mkdir("ide0", proc_ide_root);
212}
213
214static int proc_ide_read_media(char *page, char **start, off_t off, int count,
215			       int *eof, void *data)
216{
217	int len;
218
219	strcpy(page, "disk\n");
220	len = strlen("disk\n");
221	len -= off;
222	if (len < count){
223		*eof = 1;
224		if (len <= 0) return 0;
225	}
226	else len = count;
227	*start = page + off;
228	return len;
229}
230
231static void make_ide_entries(char *dev_name)
232{
233	struct proc_dir_entry *dir, *ent;
234	char name[64];
235
236	if(proc_ide_root == NULL) make_proc_ide();
237
238	dir = proc_mkdir(dev_name, proc_ide);
239	if(!dir) return;
240
241	ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
242	if(!ent) return;
243	ent->data = NULL;
244	ent->read_proc = proc_ide_read_media;
245	ent->write_proc = NULL;
246	sprintf(name,"ide0/%s", dev_name);
247	proc_symlink(dev_name, proc_ide_root, name);
248}
249
250static int fake_ide_setup(char *str)
251{
252	fake_ide = 1;
253	return 1;
254}
255
256__setup("fake_ide", fake_ide_setup);
257
258__uml_help(fake_ide_setup,
259"fake_ide\n"
260"    Create ide0 entries that map onto ubd devices.\n\n"
261);
262
263static int parse_unit(char **ptr)
264{
265	char *str = *ptr, *end;
266	int n = -1;
267
268	if(isdigit(*str)) {
269		n = simple_strtoul(str, &end, 0);
270		if(end == str)
271			return -1;
272		*ptr = end;
273	}
274	else if (('a' <= *str) && (*str <= 'z')) {
275		n = *str - 'a';
276		str++;
277		*ptr = str;
278	}
279	return n;
280}
281
282/* If *index_out == -1 at exit, the passed option was a general one;
283 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
284 * should not be freed on exit.
285 */
286static int ubd_setup_common(char *str, int *index_out, char **error_out)
287{
288	struct ubd *ubd_dev;
289	struct openflags flags = global_openflags;
290	char *backing_file;
291	int n, err = 0, i;
292
293	if(index_out) *index_out = -1;
294	n = *str;
295	if(n == '='){
296		char *end;
297		int major;
298
299		str++;
300		if(!strcmp(str, "sync")){
301			global_openflags = of_sync(global_openflags);
302			goto out1;
303		}
304
305		err = -EINVAL;
306		major = simple_strtoul(str, &end, 0);
307		if((*end != '\0') || (end == str)){
308			*error_out = "Didn't parse major number";
309			goto out1;
310		}
311
312		mutex_lock(&ubd_lock);
313		if(fake_major != MAJOR_NR){
314			*error_out = "Can't assign a fake major twice";
315			goto out1;
316		}
317
318		fake_major = major;
319
320		printk(KERN_INFO "Setting extra ubd major number to %d\n",
321		       major);
322		err = 0;
323	out1:
324		mutex_unlock(&ubd_lock);
325		return err;
326	}
327
328	n = parse_unit(&str);
329	if(n < 0){
330		*error_out = "Couldn't parse device number";
331		return -EINVAL;
332	}
333	if(n >= MAX_DEV){
334		*error_out = "Device number out of range";
335		return 1;
336	}
337
338	err = -EBUSY;
339	mutex_lock(&ubd_lock);
340
341	ubd_dev = &ubd_devs[n];
342	if(ubd_dev->file != NULL){
343		*error_out = "Device is already configured";
344		goto out;
345	}
346
347	if (index_out)
348		*index_out = n;
349
350	err = -EINVAL;
351	for (i = 0; i < sizeof("rscd="); i++) {
352		switch (*str) {
353		case 'r':
354			flags.w = 0;
355			break;
356		case 's':
357			flags.s = 1;
358			break;
359		case 'd':
360			ubd_dev->no_cow = 1;
361			break;
362		case 'c':
363			ubd_dev->shared = 1;
364			break;
365		case '=':
366			str++;
367			goto break_loop;
368		default:
369			*error_out = "Expected '=' or flag letter "
370				"(r, s, c, or d)";
371			goto out;
372		}
373		str++;
374	}
375
376	if (*str == '=')
377		*error_out = "Too many flags specified";
378	else
379		*error_out = "Missing '='";
380	goto out;
381
382break_loop:
383	backing_file = strchr(str, ',');
384
385	if (backing_file == NULL)
386		backing_file = strchr(str, ':');
387
388	if(backing_file != NULL){
389		if(ubd_dev->no_cow){
390			*error_out = "Can't specify both 'd' and a cow file";
391			goto out;
392		}
393		else {
394			*backing_file = '\0';
395			backing_file++;
396		}
397	}
398	err = 0;
399	ubd_dev->file = str;
400	ubd_dev->cow.file = backing_file;
401	ubd_dev->boot_openflags = flags;
402out:
403	mutex_unlock(&ubd_lock);
404	return err;
405}
406
407static int ubd_setup(char *str)
408{
409	char *error;
410	int err;
411
412	err = ubd_setup_common(str, NULL, &error);
413	if(err)
414		printk(KERN_ERR "Failed to initialize device with \"%s\" : "
415		       "%s\n", str, error);
416	return 1;
417}
418
419__setup("ubd", ubd_setup);
420__uml_help(ubd_setup,
421"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
422"    This is used to associate a device with a file in the underlying\n"
423"    filesystem. When specifying two filenames, the first one is the\n"
424"    COW name and the second is the backing file name. As separator you can\n"
425"    use either a ':' or a ',': the first one allows writing things like;\n"
426"	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
427"    while with a ',' the shell would not expand the 2nd '~'.\n"
428"    When using only one filename, UML will detect whether to treat it like\n"
429"    a COW file or a backing file. To override this detection, add the 'd'\n"
430"    flag:\n"
431"	ubd0d=BackingFile\n"
432"    Usually, there is a filesystem in the file, but \n"
433"    that's not required. Swap devices containing swap files can be\n"
434"    specified like this. Also, a file which doesn't contain a\n"
435"    filesystem can have its contents read in the virtual \n"
436"    machine by running 'dd' on the device. <n> must be in the range\n"
437"    0 to 7. Appending an 'r' to the number will cause that device\n"
438"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
439"    an 's' will cause data to be written to disk on the host immediately.\n\n"
440);
441
442static int udb_setup(char *str)
443{
444	printk("udb%s specified on command line is almost certainly a ubd -> "
445	       "udb TYPO\n", str);
446	return 1;
447}
448
449__setup("udb", udb_setup);
450__uml_help(udb_setup,
451"udb\n"
452"    This option is here solely to catch ubd -> udb typos, which can be\n"
453"    to impossible to catch visually unless you specifically look for\n"
454"    them.  The only result of any option starting with 'udb' is an error\n"
455"    in the boot output.\n\n"
456);
457
458static int fakehd_set = 0;
459static int fakehd(char *str)
460{
461	printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
462	fakehd_set = 1;
463	return 1;
464}
465
466__setup("fakehd", fakehd);
467__uml_help(fakehd,
468"fakehd\n"
469"    Change the ubd device name to \"hd\".\n\n"
470);
471
472static void do_ubd_request(request_queue_t * q);
473
474/* Only changed by ubd_init, which is an initcall. */
475int thread_fd = -1;
476
477static void ubd_end_request(struct request *req, int bytes, int uptodate)
478{
479	if (!end_that_request_first(req, uptodate, bytes >> 9)) {
480		struct ubd *dev = req->rq_disk->private_data;
481		unsigned long flags;
482
483		add_disk_randomness(req->rq_disk);
484		spin_lock_irqsave(&dev->lock, flags);
485		end_that_request_last(req, uptodate);
486		spin_unlock_irqrestore(&dev->lock, flags);
487	}
488}
489
490/* Callable only from interrupt context - otherwise you need to do
491 * spin_lock_irq()/spin_lock_irqsave() */
492static inline void ubd_finish(struct request *req, int bytes)
493{
494	if(bytes < 0){
495		ubd_end_request(req, 0, 0);
496		return;
497	}
498	ubd_end_request(req, bytes, 1);
499}
500
501static LIST_HEAD(restart);
502
503/* Called without dev->lock held, and only in interrupt context. */
504static void ubd_handler(void)
505{
506	struct io_thread_req *req;
507	struct request *rq;
508	struct ubd *ubd;
509	struct list_head *list, *next_ele;
510	unsigned long flags;
511	int n;
512
513	while(1){
514		n = os_read_file(thread_fd, &req,
515				 sizeof(struct io_thread_req *));
516		if(n != sizeof(req)){
517			if(n == -EAGAIN)
518				break;
519			printk(KERN_ERR "spurious interrupt in ubd_handler, "
520			       "err = %d\n", -n);
521			return;
522		}
523
524		rq = req->req;
525		rq->nr_sectors -= req->length >> 9;
526		if(rq->nr_sectors == 0)
527			ubd_finish(rq, rq->hard_nr_sectors << 9);
528		kfree(req);
529	}
530	reactivate_fd(thread_fd, UBD_IRQ);
531
532	list_for_each_safe(list, next_ele, &restart){
533		ubd = container_of(list, struct ubd, restart);
534		list_del_init(&ubd->restart);
535		spin_lock_irqsave(&ubd->lock, flags);
536		do_ubd_request(ubd->queue);
537		spin_unlock_irqrestore(&ubd->lock, flags);
538	}
539}
540
541static irqreturn_t ubd_intr(int irq, void *dev)
542{
543	ubd_handler();
544	return IRQ_HANDLED;
545}
546
547/* Only changed by ubd_init, which is an initcall. */
548static int io_pid = -1;
549
550void kill_io_thread(void)
551{
552	if(io_pid != -1)
553		os_kill_process(io_pid, 1);
554}
555
556__uml_exitcall(kill_io_thread);
557
558static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
559{
560	char *file;
561
562	file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file;
563	return os_file_size(file, size_out);
564}
565
566static void ubd_close_dev(struct ubd *ubd_dev)
567{
568	os_close_file(ubd_dev->fd);
569	if(ubd_dev->cow.file == NULL)
570		return;
571
572	os_close_file(ubd_dev->cow.fd);
573	vfree(ubd_dev->cow.bitmap);
574	ubd_dev->cow.bitmap = NULL;
575}
576
577static int ubd_open_dev(struct ubd *ubd_dev)
578{
579	struct openflags flags;
580	char **back_ptr;
581	int err, create_cow, *create_ptr;
582	int fd;
583
584	ubd_dev->openflags = ubd_dev->boot_openflags;
585	create_cow = 0;
586	create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
587	back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
588
589	fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
590				back_ptr, &ubd_dev->cow.bitmap_offset,
591				&ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
592				create_ptr);
593
594	if((fd == -ENOENT) && create_cow){
595		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
596					  ubd_dev->openflags, 1 << 9, PAGE_SIZE,
597					  &ubd_dev->cow.bitmap_offset,
598					  &ubd_dev->cow.bitmap_len,
599					  &ubd_dev->cow.data_offset);
600		if(fd >= 0){
601			printk(KERN_INFO "Creating \"%s\" as COW file for "
602			       "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
603		}
604	}
605
606	if(fd < 0){
607		printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
608		       -fd);
609		return fd;
610	}
611	ubd_dev->fd = fd;
612
613	if(ubd_dev->cow.file != NULL){
614		err = -ENOMEM;
615		ubd_dev->cow.bitmap = (void *) vmalloc(ubd_dev->cow.bitmap_len);
616		if(ubd_dev->cow.bitmap == NULL){
617			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
618			goto error;
619		}
620		flush_tlb_kernel_vm();
621
622		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
623				      ubd_dev->cow.bitmap_offset,
624				      ubd_dev->cow.bitmap_len);
625		if(err < 0)
626			goto error;
627
628		flags = ubd_dev->openflags;
629		flags.w = 0;
630		err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
631				    NULL, NULL, NULL, NULL);
632		if(err < 0) goto error;
633		ubd_dev->cow.fd = err;
634	}
635	return 0;
636 error:
637	os_close_file(ubd_dev->fd);
638	return err;
639}
640
641static void ubd_device_release(struct device *dev)
642{
643	struct ubd *ubd_dev = dev->driver_data;
644
645	blk_cleanup_queue(ubd_dev->queue);
646	*ubd_dev = ((struct ubd) DEFAULT_UBD);
647}
648
649static int ubd_disk_register(int major, u64 size, int unit,
650			     struct gendisk **disk_out)
651{
652	struct gendisk *disk;
653
654	disk = alloc_disk(1 << UBD_SHIFT);
655	if(disk == NULL)
656		return -ENOMEM;
657
658	disk->major = major;
659	disk->first_minor = unit << UBD_SHIFT;
660	disk->fops = &ubd_blops;
661	set_capacity(disk, size / 512);
662	if(major == MAJOR_NR)
663		sprintf(disk->disk_name, "ubd%c", 'a' + unit);
664	else
665		sprintf(disk->disk_name, "ubd_fake%d", unit);
666
667	/* sysfs register (not for ide fake devices) */
668	if (major == MAJOR_NR) {
669		ubd_devs[unit].pdev.id   = unit;
670		ubd_devs[unit].pdev.name = DRIVER_NAME;
671		ubd_devs[unit].pdev.dev.release = ubd_device_release;
672		ubd_devs[unit].pdev.dev.driver_data = &ubd_devs[unit];
673		platform_device_register(&ubd_devs[unit].pdev);
674		disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
675	}
676
677	disk->private_data = &ubd_devs[unit];
678	disk->queue = ubd_devs[unit].queue;
679	add_disk(disk);
680
681	*disk_out = disk;
682	return 0;
683}
684
685#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
686
687static int ubd_add(int n, char **error_out)
688{
689	struct ubd *ubd_dev = &ubd_devs[n];
690	int err = 0;
691
692	if(ubd_dev->file == NULL)
693		goto out;
694
695	err = ubd_file_size(ubd_dev, &ubd_dev->size);
696	if(err < 0){
697		*error_out = "Couldn't determine size of device's file";
698		goto out;
699	}
700
701	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
702
703	INIT_LIST_HEAD(&ubd_dev->restart);
704
705	err = -ENOMEM;
706	ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
707	if (ubd_dev->queue == NULL) {
708		*error_out = "Failed to initialize device queue";
709		goto out;
710	}
711	ubd_dev->queue->queuedata = ubd_dev;
712
713	blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG);
714	err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]);
715	if(err){
716		*error_out = "Failed to register device";
717		goto out_cleanup;
718	}
719
720	if(fake_major != MAJOR_NR)
721		ubd_disk_register(fake_major, ubd_dev->size, n,
722				  &fake_gendisk[n]);
723
724	/* perhaps this should also be under the "if (fake_major)" above */
725	/* using the fake_disk->disk_name and also the fakehd_set name */
726	if (fake_ide)
727		make_ide_entries(ubd_gendisk[n]->disk_name);
728
729	err = 0;
730out:
731	return err;
732
733out_cleanup:
734	blk_cleanup_queue(ubd_dev->queue);
735	goto out;
736}
737
738static int ubd_config(char *str, char **error_out)
739{
740	int n, ret;
741
742	/* This string is possibly broken up and stored, so it's only
743	 * freed if ubd_setup_common fails, or if only general options
744	 * were set.
745	 */
746	str = kstrdup(str, GFP_KERNEL);
747	if (str == NULL) {
748		*error_out = "Failed to allocate memory";
749		return -ENOMEM;
750	}
751
752	ret = ubd_setup_common(str, &n, error_out);
753	if (ret)
754		goto err_free;
755
756	if (n == -1) {
757		ret = 0;
758		goto err_free;
759	}
760
761	mutex_lock(&ubd_lock);
762	ret = ubd_add(n, error_out);
763	if (ret)
764		ubd_devs[n].file = NULL;
765	mutex_unlock(&ubd_lock);
766
767out:
768	return ret;
769
770err_free:
771	kfree(str);
772	goto out;
773}
774
775static int ubd_get_config(char *name, char *str, int size, char **error_out)
776{
777	struct ubd *ubd_dev;
778	int n, len = 0;
779
780	n = parse_unit(&name);
781	if((n >= MAX_DEV) || (n < 0)){
782		*error_out = "ubd_get_config : device number out of range";
783		return -1;
784	}
785
786	ubd_dev = &ubd_devs[n];
787	mutex_lock(&ubd_lock);
788
789	if(ubd_dev->file == NULL){
790		CONFIG_CHUNK(str, size, len, "", 1);
791		goto out;
792	}
793
794	CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
795
796	if(ubd_dev->cow.file != NULL){
797		CONFIG_CHUNK(str, size, len, ",", 0);
798		CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
799	}
800	else CONFIG_CHUNK(str, size, len, "", 1);
801
802 out:
803	mutex_unlock(&ubd_lock);
804	return len;
805}
806
807static int ubd_id(char **str, int *start_out, int *end_out)
808{
809	int n;
810
811	n = parse_unit(str);
812	*start_out = 0;
813	*end_out = MAX_DEV - 1;
814	return n;
815}
816
817static int ubd_remove(int n, char **error_out)
818{
819	struct gendisk *disk = ubd_gendisk[n];
820	struct ubd *ubd_dev;
821	int err = -ENODEV;
822
823	mutex_lock(&ubd_lock);
824
825	ubd_dev = &ubd_devs[n];
826
827	if(ubd_dev->file == NULL)
828		goto out;
829
830	/* you cannot remove a open disk */
831	err = -EBUSY;
832	if(ubd_dev->count > 0)
833		goto out;
834
835	ubd_gendisk[n] = NULL;
836	if(disk != NULL){
837		del_gendisk(disk);
838		put_disk(disk);
839	}
840
841	if(fake_gendisk[n] != NULL){
842		del_gendisk(fake_gendisk[n]);
843		put_disk(fake_gendisk[n]);
844		fake_gendisk[n] = NULL;
845	}
846
847	err = 0;
848	platform_device_unregister(&ubd_dev->pdev);
849out:
850	mutex_unlock(&ubd_lock);
851	return err;
852}
853
854/* All these are called by mconsole in process context and without
855 * ubd-specific locks.  The structure itself is const except for .list.
856 */
857static struct mc_device ubd_mc = {
858	.list		= LIST_HEAD_INIT(ubd_mc.list),
859	.name		= "ubd",
860	.config		= ubd_config,
861	.get_config	= ubd_get_config,
862	.id		= ubd_id,
863	.remove		= ubd_remove,
864};
865
866static int __init ubd_mc_init(void)
867{
868	mconsole_register_dev(&ubd_mc);
869	return 0;
870}
871
872__initcall(ubd_mc_init);
873
874static int __init ubd0_init(void)
875{
876	struct ubd *ubd_dev = &ubd_devs[0];
877
878	mutex_lock(&ubd_lock);
879	if(ubd_dev->file == NULL)
880		ubd_dev->file = "root_fs";
881	mutex_unlock(&ubd_lock);
882
883	return 0;
884}
885
886__initcall(ubd0_init);
887
888/* Used in ubd_init, which is an initcall */
889static struct platform_driver ubd_driver = {
890	.driver = {
891		.name  = DRIVER_NAME,
892	},
893};
894
895static int __init ubd_init(void)
896{
897	char *error;
898	int i, err;
899
900	if (register_blkdev(MAJOR_NR, "ubd"))
901		return -1;
902
903	if (fake_major != MAJOR_NR) {
904		char name[sizeof("ubd_nnn\0")];
905
906		snprintf(name, sizeof(name), "ubd_%d", fake_major);
907		if (register_blkdev(fake_major, "ubd"))
908			return -1;
909	}
910	platform_driver_register(&ubd_driver);
911	mutex_lock(&ubd_lock);
912	for (i = 0; i < MAX_DEV; i++){
913		err = ubd_add(i, &error);
914		if(err)
915			printk(KERN_ERR "Failed to initialize ubd device %d :"
916			       "%s\n", i, error);
917	}
918	mutex_unlock(&ubd_lock);
919	return 0;
920}
921
922late_initcall(ubd_init);
923
924static int __init ubd_driver_init(void){
925	unsigned long stack;
926	int err;
927
928	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
929	if(global_openflags.s){
930		printk(KERN_INFO "ubd: Synchronous mode\n");
931		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
932		 * enough. So use anyway the io thread. */
933	}
934	stack = alloc_stack(0, 0);
935	io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
936				 &thread_fd);
937	if(io_pid < 0){
938		printk(KERN_ERR
939		       "ubd : Failed to start I/O thread (errno = %d) - "
940		       "falling back to synchronous I/O\n", -io_pid);
941		io_pid = -1;
942		return 0;
943	}
944	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
945			     IRQF_DISABLED, "ubd", ubd_devs);
946	if(err != 0)
947		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
948	return 0;
949}
950
951device_initcall(ubd_driver_init);
952
953static int ubd_open(struct inode *inode, struct file *filp)
954{
955	struct gendisk *disk = inode->i_bdev->bd_disk;
956	struct ubd *ubd_dev = disk->private_data;
957	int err = 0;
958
959	if(ubd_dev->count == 0){
960		err = ubd_open_dev(ubd_dev);
961		if(err){
962			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
963			       disk->disk_name, ubd_dev->file, -err);
964			goto out;
965		}
966	}
967	ubd_dev->count++;
968	set_disk_ro(disk, !ubd_dev->openflags.w);
969
970	/* This should no more be needed. And it didn't work anyway to exclude
971	 * read-write remounting of filesystems.*/
972	/*if((filp->f_mode & FMODE_WRITE) && !ubd_dev->openflags.w){
973	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
974	        err = -EROFS;
975	}*/
976 out:
977	return err;
978}
979
980static int ubd_release(struct inode * inode, struct file * file)
981{
982	struct gendisk *disk = inode->i_bdev->bd_disk;
983	struct ubd *ubd_dev = disk->private_data;
984
985	if(--ubd_dev->count == 0)
986		ubd_close_dev(ubd_dev);
987	return 0;
988}
989
990static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
991			  __u64 *cow_offset, unsigned long *bitmap,
992			  __u64 bitmap_offset, unsigned long *bitmap_words,
993			  __u64 bitmap_len)
994{
995	__u64 sector = io_offset >> 9;
996	int i, update_bitmap = 0;
997
998	for(i = 0; i < length >> 9; i++){
999		if(cow_mask != NULL)
1000			ubd_set_bit(i, (unsigned char *) cow_mask);
1001		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1002			continue;
1003
1004		update_bitmap = 1;
1005		ubd_set_bit(sector + i, (unsigned char *) bitmap);
1006	}
1007
1008	if(!update_bitmap)
1009		return;
1010
1011	*cow_offset = sector / (sizeof(unsigned long) * 8);
1012
1013	/* This takes care of the case where we're exactly at the end of the
1014	 * device, and *cow_offset + 1 is off the end.  So, just back it up
1015	 * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1016	 * for the original diagnosis.
1017	 */
1018	if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
1019			   sizeof(unsigned long) - 1))
1020		(*cow_offset)--;
1021
1022	bitmap_words[0] = bitmap[*cow_offset];
1023	bitmap_words[1] = bitmap[*cow_offset + 1];
1024
1025	*cow_offset *= sizeof(unsigned long);
1026	*cow_offset += bitmap_offset;
1027}
1028
1029static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1030		       __u64 bitmap_offset, __u64 bitmap_len)
1031{
1032	__u64 sector = req->offset >> 9;
1033	int i;
1034
1035	if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1036		panic("Operation too long");
1037
1038	if(req->op == UBD_READ) {
1039		for(i = 0; i < req->length >> 9; i++){
1040			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1041				ubd_set_bit(i, (unsigned char *)
1042					    &req->sector_mask);
1043		}
1044	}
1045	else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1046			   &req->cow_offset, bitmap, bitmap_offset,
1047			   req->bitmap_words, bitmap_len);
1048}
1049
1050/* Called with dev->lock held */
1051static void prepare_request(struct request *req, struct io_thread_req *io_req,
1052			    unsigned long long offset, int page_offset,
1053			    int len, struct page *page)
1054{
1055	struct gendisk *disk = req->rq_disk;
1056	struct ubd *ubd_dev = disk->private_data;
1057
1058	io_req->req = req;
1059	io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1060		ubd_dev->fd;
1061	io_req->fds[1] = ubd_dev->fd;
1062	io_req->cow_offset = -1;
1063	io_req->offset = offset;
1064	io_req->length = len;
1065	io_req->error = 0;
1066	io_req->sector_mask = 0;
1067
1068	io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1069	io_req->offsets[0] = 0;
1070	io_req->offsets[1] = ubd_dev->cow.data_offset;
1071	io_req->buffer = page_address(page) + page_offset;
1072	io_req->sectorsize = 1 << 9;
1073
1074	if(ubd_dev->cow.file != NULL)
1075		cowify_req(io_req, ubd_dev->cow.bitmap,
1076			   ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1077
1078}
1079
1080/* Called with dev->lock held */
1081static void do_ubd_request(request_queue_t *q)
1082{
1083	struct io_thread_req *io_req;
1084	struct request *req;
1085	int n;
1086
1087	while(1){
1088		struct ubd *dev = q->queuedata;
1089		if(dev->end_sg == 0){
1090			struct request *req = elv_next_request(q);
1091			if(req == NULL)
1092				return;
1093
1094			dev->request = req;
1095			blkdev_dequeue_request(req);
1096			dev->start_sg = 0;
1097			dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1098		}
1099
1100		req = dev->request;
1101		while(dev->start_sg < dev->end_sg){
1102			struct scatterlist *sg = &dev->sg[dev->start_sg];
1103
1104			io_req = kmalloc(sizeof(struct io_thread_req),
1105					 GFP_ATOMIC);
1106			if(io_req == NULL){
1107				if(list_empty(&dev->restart))
1108					list_add(&dev->restart, &restart);
1109				return;
1110			}
1111			prepare_request(req, io_req,
1112					(unsigned long long) req->sector << 9,
1113					sg->offset, sg->length, sg->page);
1114
1115			n = os_write_file(thread_fd, &io_req,
1116					  sizeof(struct io_thread_req *));
1117			if(n != sizeof(struct io_thread_req *)){
1118				if(n != -EAGAIN)
1119					printk("write to io thread failed, "
1120					       "errno = %d\n", -n);
1121				else if(list_empty(&dev->restart))
1122					list_add(&dev->restart, &restart);
1123				return;
1124			}
1125
1126			req->sector += sg->length >> 9;
1127			dev->start_sg++;
1128		}
1129		dev->end_sg = 0;
1130		dev->request = NULL;
1131	}
1132}
1133
1134static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1135{
1136	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1137
1138	geo->heads = 128;
1139	geo->sectors = 32;
1140	geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1141	return 0;
1142}
1143
1144static int ubd_ioctl(struct inode * inode, struct file * file,
1145		     unsigned int cmd, unsigned long arg)
1146{
1147	struct ubd *ubd_dev = inode->i_bdev->bd_disk->private_data;
1148	struct hd_driveid ubd_id = {
1149		.cyls		= 0,
1150		.heads		= 128,
1151		.sectors	= 32,
1152	};
1153
1154	switch (cmd) {
1155		struct cdrom_volctrl volume;
1156	case HDIO_GET_IDENTITY:
1157		ubd_id.cyls = ubd_dev->size / (128 * 32 * 512);
1158		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1159				 sizeof(ubd_id)))
1160			return -EFAULT;
1161		return 0;
1162
1163	case CDROMVOLREAD:
1164		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1165			return -EFAULT;
1166		volume.channel0 = 255;
1167		volume.channel1 = 255;
1168		volume.channel2 = 255;
1169		volume.channel3 = 255;
1170		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1171			return -EFAULT;
1172		return 0;
1173	}
1174	return -EINVAL;
1175}
1176
1177static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
1178{
1179	struct uml_stat buf1, buf2;
1180	int err;
1181
1182	if(from_cmdline == NULL)
1183		return 0;
1184	if(!strcmp(from_cmdline, from_cow))
1185		return 0;
1186
1187	err = os_stat_file(from_cmdline, &buf1);
1188	if(err < 0){
1189		printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
1190		return 0;
1191	}
1192	err = os_stat_file(from_cow, &buf2);
1193	if(err < 0){
1194		printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
1195		return 1;
1196	}
1197	if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
1198		return 0;
1199
1200	printk("Backing file mismatch - \"%s\" requested,\n"
1201	       "\"%s\" specified in COW header of \"%s\"\n",
1202	       from_cmdline, from_cow, cow);
1203	return 1;
1204}
1205
1206static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1207{
1208	unsigned long modtime;
1209	unsigned long long actual;
1210	int err;
1211
1212	err = os_file_modtime(file, &modtime);
1213	if(err < 0){
1214		printk("Failed to get modification time of backing file "
1215		       "\"%s\", err = %d\n", file, -err);
1216		return err;
1217	}
1218
1219	err = os_file_size(file, &actual);
1220	if(err < 0){
1221		printk("Failed to get size of backing file \"%s\", "
1222		       "err = %d\n", file, -err);
1223		return err;
1224	}
1225
1226	if(actual != size){
1227		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
1228		 * the typecast.*/
1229		printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1230		       "file\n", (unsigned long long) size, actual);
1231		return -EINVAL;
1232	}
1233	if(modtime != mtime){
1234		printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1235		       "file\n", mtime, modtime);
1236		return -EINVAL;
1237	}
1238	return 0;
1239}
1240
1241int read_cow_bitmap(int fd, void *buf, int offset, int len)
1242{
1243	int err;
1244
1245	err = os_seek_file(fd, offset);
1246	if(err < 0)
1247		return err;
1248
1249	err = os_read_file(fd, buf, len);
1250	if(err < 0)
1251		return err;
1252
1253	return 0;
1254}
1255
1256int open_ubd_file(char *file, struct openflags *openflags, int shared,
1257		  char **backing_file_out, int *bitmap_offset_out,
1258		  unsigned long *bitmap_len_out, int *data_offset_out,
1259		  int *create_cow_out)
1260{
1261	time_t mtime;
1262	unsigned long long size;
1263	__u32 version, align;
1264	char *backing_file;
1265	int fd, err, sectorsize, asked_switch, mode = 0644;
1266
1267	fd = os_open_file(file, *openflags, mode);
1268	if (fd < 0) {
1269		if ((fd == -ENOENT) && (create_cow_out != NULL))
1270			*create_cow_out = 1;
1271		if (!openflags->w ||
1272		    ((fd != -EROFS) && (fd != -EACCES)))
1273			return fd;
1274		openflags->w = 0;
1275		fd = os_open_file(file, *openflags, mode);
1276		if (fd < 0)
1277			return fd;
1278	}
1279
1280	if(shared)
1281		printk("Not locking \"%s\" on the host\n", file);
1282	else {
1283		err = os_lock_file(fd, openflags->w);
1284		if(err < 0){
1285			printk("Failed to lock '%s', err = %d\n", file, -err);
1286			goto out_close;
1287		}
1288	}
1289
1290	/* Successful return case! */
1291	if(backing_file_out == NULL)
1292		return fd;
1293
1294	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1295			      &size, &sectorsize, &align, bitmap_offset_out);
1296	if(err && (*backing_file_out != NULL)){
1297		printk("Failed to read COW header from COW file \"%s\", "
1298		       "errno = %d\n", file, -err);
1299		goto out_close;
1300	}
1301	if(err)
1302		return fd;
1303
1304	asked_switch = path_requires_switch(*backing_file_out, backing_file, file);
1305
1306	/* Allow switching only if no mismatch. */
1307	if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) {
1308		printk("Switching backing file to '%s'\n", *backing_file_out);
1309		err = write_cow_header(file, fd, *backing_file_out,
1310				       sectorsize, align, &size);
1311		if (err) {
1312			printk("Switch failed, errno = %d\n", -err);
1313			goto out_close;
1314		}
1315	} else {
1316		*backing_file_out = backing_file;
1317		err = backing_file_mismatch(*backing_file_out, size, mtime);
1318		if (err)
1319			goto out_close;
1320	}
1321
1322	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1323		  bitmap_len_out, data_offset_out);
1324
1325	return fd;
1326 out_close:
1327	os_close_file(fd);
1328	return err;
1329}
1330
1331int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1332		    int sectorsize, int alignment, int *bitmap_offset_out,
1333		    unsigned long *bitmap_len_out, int *data_offset_out)
1334{
1335	int err, fd;
1336
1337	flags.c = 1;
1338	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
1339	if(fd < 0){
1340		err = fd;
1341		printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1342		       -err);
1343		goto out;
1344	}
1345
1346	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1347			    bitmap_offset_out, bitmap_len_out,
1348			    data_offset_out);
1349	if(!err)
1350		return fd;
1351	os_close_file(fd);
1352 out:
1353	return err;
1354}
1355
1356static int update_bitmap(struct io_thread_req *req)
1357{
1358	int n;
1359
1360	if(req->cow_offset == -1)
1361		return 0;
1362
1363	n = os_seek_file(req->fds[1], req->cow_offset);
1364	if(n < 0){
1365		printk("do_io - bitmap lseek failed : err = %d\n", -n);
1366		return 1;
1367	}
1368
1369	n = os_write_file(req->fds[1], &req->bitmap_words,
1370			  sizeof(req->bitmap_words));
1371	if(n != sizeof(req->bitmap_words)){
1372		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1373		       req->fds[1]);
1374		return 1;
1375	}
1376
1377	return 0;
1378}
1379
1380void do_io(struct io_thread_req *req)
1381{
1382	char *buf;
1383	unsigned long len;
1384	int n, nsectors, start, end, bit;
1385	int err;
1386	__u64 off;
1387
1388	nsectors = req->length / req->sectorsize;
1389	start = 0;
1390	do {
1391		bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1392		end = start;
1393		while((end < nsectors) &&
1394		      (ubd_test_bit(end, (unsigned char *)
1395				    &req->sector_mask) == bit))
1396			end++;
1397
1398		off = req->offset + req->offsets[bit] +
1399			start * req->sectorsize;
1400		len = (end - start) * req->sectorsize;
1401		buf = &req->buffer[start * req->sectorsize];
1402
1403		err = os_seek_file(req->fds[bit], off);
1404		if(err < 0){
1405			printk("do_io - lseek failed : err = %d\n", -err);
1406			req->error = 1;
1407			return;
1408		}
1409		if(req->op == UBD_READ){
1410			n = 0;
1411			do {
1412				buf = &buf[n];
1413				len -= n;
1414				n = os_read_file(req->fds[bit], buf, len);
1415				if (n < 0) {
1416					printk("do_io - read failed, err = %d "
1417					       "fd = %d\n", -n, req->fds[bit]);
1418					req->error = 1;
1419					return;
1420				}
1421			} while((n < len) && (n != 0));
1422			if (n < len) memset(&buf[n], 0, len - n);
1423		} else {
1424			n = os_write_file(req->fds[bit], buf, len);
1425			if(n != len){
1426				printk("do_io - write failed err = %d "
1427				       "fd = %d\n", -n, req->fds[bit]);
1428				req->error = 1;
1429				return;
1430			}
1431		}
1432
1433		start = end;
1434	} while(start < nsectors);
1435
1436	req->error = update_bitmap(req);
1437}
1438
1439/* Changed in start_io_thread, which is serialized by being called only
1440 * from ubd_init, which is an initcall.
1441 */
1442int kernel_fd = -1;
1443
1444static int io_count = 0;
1445
1446int io_thread(void *arg)
1447{
1448	struct io_thread_req *req;
1449	int n;
1450
1451	ignore_sigwinch_sig();
1452	while(1){
1453		n = os_read_file(kernel_fd, &req,
1454				 sizeof(struct io_thread_req *));
1455		if(n != sizeof(struct io_thread_req *)){
1456			if(n < 0)
1457				printk("io_thread - read failed, fd = %d, "
1458				       "err = %d\n", kernel_fd, -n);
1459			else {
1460				printk("io_thread - short read, fd = %d, "
1461				       "length = %d\n", kernel_fd, n);
1462			}
1463			continue;
1464		}
1465		io_count++;
1466		do_io(req);
1467		n = os_write_file(kernel_fd, &req,
1468				  sizeof(struct io_thread_req *));
1469		if(n != sizeof(struct io_thread_req *))
1470			printk("io_thread - write failed, fd = %d, err = %d\n",
1471			       kernel_fd, -n);
1472	}
1473
1474	return 0;
1475}
1476