1/*
2 *  gendisk handling
3 */
4
5#include <linux/module.h>
6#include <linux/fs.h>
7#include <linux/genhd.h>
8#include <linux/kdev_t.h>
9#include <linux/kernel.h>
10#include <linux/blkdev.h>
11#include <linux/init.h>
12#include <linux/spinlock.h>
13#include <linux/proc_fs.h>
14#include <linux/seq_file.h>
15#include <linux/slab.h>
16#include <linux/kmod.h>
17#include <linux/kobj_map.h>
18#include <linux/buffer_head.h>
19#include <linux/mutex.h>
20#include <linux/idr.h>
21
22#include "blk.h"
23
24static DEFINE_MUTEX(block_class_lock);
25#ifndef CONFIG_SYSFS_DEPRECATED
26struct kobject *block_depr;
27#endif
28
29/* for extended dynamic devt allocation, currently only one major is used */
30#define MAX_EXT_DEVT		(1 << MINORBITS)
31
32/* For extended devt allocation.  ext_devt_mutex prevents look up
33 * results from going away underneath its user.
34 */
35static DEFINE_MUTEX(ext_devt_mutex);
36static DEFINE_IDR(ext_devt_idr);
37
38static struct device_type disk_type;
39
40/**
41 * disk_get_part - get partition
42 * @disk: disk to look partition from
43 * @partno: partition number
44 *
45 * Look for partition @partno from @disk.  If found, increment
46 * reference count and return it.
47 *
48 * CONTEXT:
49 * Don't care.
50 *
51 * RETURNS:
52 * Pointer to the found partition on success, NULL if not found.
53 */
54struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
55{
56	struct hd_struct *part = NULL;
57	struct disk_part_tbl *ptbl;
58
59	if (unlikely(partno < 0))
60		return NULL;
61
62	rcu_read_lock();
63
64	ptbl = rcu_dereference(disk->part_tbl);
65	if (likely(partno < ptbl->len)) {
66		part = rcu_dereference(ptbl->part[partno]);
67		if (part)
68			get_device(part_to_dev(part));
69	}
70
71	rcu_read_unlock();
72
73	return part;
74}
75EXPORT_SYMBOL_GPL(disk_get_part);
76
77/**
78 * disk_part_iter_init - initialize partition iterator
79 * @piter: iterator to initialize
80 * @disk: disk to iterate over
81 * @flags: DISK_PITER_* flags
82 *
83 * Initialize @piter so that it iterates over partitions of @disk.
84 *
85 * CONTEXT:
86 * Don't care.
87 */
88void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
89			  unsigned int flags)
90{
91	struct disk_part_tbl *ptbl;
92
93	rcu_read_lock();
94	ptbl = rcu_dereference(disk->part_tbl);
95
96	piter->disk = disk;
97	piter->part = NULL;
98
99	if (flags & DISK_PITER_REVERSE)
100		piter->idx = ptbl->len - 1;
101	else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0))
102		piter->idx = 0;
103	else
104		piter->idx = 1;
105
106	piter->flags = flags;
107
108	rcu_read_unlock();
109}
110EXPORT_SYMBOL_GPL(disk_part_iter_init);
111
112/**
113 * disk_part_iter_next - proceed iterator to the next partition and return it
114 * @piter: iterator of interest
115 *
116 * Proceed @piter to the next partition and return it.
117 *
118 * CONTEXT:
119 * Don't care.
120 */
121struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
122{
123	struct disk_part_tbl *ptbl;
124	int inc, end;
125
126	/* put the last partition */
127	disk_put_part(piter->part);
128	piter->part = NULL;
129
130	/* get part_tbl */
131	rcu_read_lock();
132	ptbl = rcu_dereference(piter->disk->part_tbl);
133
134	/* determine iteration parameters */
135	if (piter->flags & DISK_PITER_REVERSE) {
136		inc = -1;
137		if (piter->flags & (DISK_PITER_INCL_PART0 |
138				    DISK_PITER_INCL_EMPTY_PART0))
139			end = -1;
140		else
141			end = 0;
142	} else {
143		inc = 1;
144		end = ptbl->len;
145	}
146
147	/* iterate to the next partition */
148	for (; piter->idx != end; piter->idx += inc) {
149		struct hd_struct *part;
150
151		part = rcu_dereference(ptbl->part[piter->idx]);
152		if (!part)
153			continue;
154		if (!part->nr_sects &&
155		    !(piter->flags & DISK_PITER_INCL_EMPTY) &&
156		    !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
157		      piter->idx == 0))
158			continue;
159
160		get_device(part_to_dev(part));
161		piter->part = part;
162		piter->idx += inc;
163		break;
164	}
165
166	rcu_read_unlock();
167
168	return piter->part;
169}
170EXPORT_SYMBOL_GPL(disk_part_iter_next);
171
172/**
173 * disk_part_iter_exit - finish up partition iteration
174 * @piter: iter of interest
175 *
176 * Called when iteration is over.  Cleans up @piter.
177 *
178 * CONTEXT:
179 * Don't care.
180 */
181void disk_part_iter_exit(struct disk_part_iter *piter)
182{
183	disk_put_part(piter->part);
184	piter->part = NULL;
185}
186EXPORT_SYMBOL_GPL(disk_part_iter_exit);
187
188static inline int sector_in_part(struct hd_struct *part, sector_t sector)
189{
190	return part->start_sect <= sector &&
191		sector < part->start_sect + part->nr_sects;
192}
193
194/**
195 * disk_map_sector_rcu - map sector to partition
196 * @disk: gendisk of interest
197 * @sector: sector to map
198 *
199 * Find out which partition @sector maps to on @disk.  This is
200 * primarily used for stats accounting.
201 *
202 * CONTEXT:
203 * RCU read locked.  The returned partition pointer is valid only
204 * while preemption is disabled.
205 *
206 * RETURNS:
207 * Found partition on success, part0 is returned if no partition matches
208 */
209struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
210{
211	struct disk_part_tbl *ptbl;
212	struct hd_struct *part;
213	int i;
214
215	ptbl = rcu_dereference(disk->part_tbl);
216
217	part = rcu_dereference(ptbl->last_lookup);
218	if (part && sector_in_part(part, sector))
219		return part;
220
221	for (i = 1; i < ptbl->len; i++) {
222		part = rcu_dereference(ptbl->part[i]);
223
224		if (part && sector_in_part(part, sector)) {
225			rcu_assign_pointer(ptbl->last_lookup, part);
226			return part;
227		}
228	}
229	return &disk->part0;
230}
231EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
232
233/*
234 * Can be deleted altogether. Later.
235 *
236 */
237static struct blk_major_name {
238	struct blk_major_name *next;
239	int major;
240	char name[16];
241} *major_names[BLKDEV_MAJOR_HASH_SIZE];
242
243/* index in the above - for now: assume no multimajor ranges */
244static inline int major_to_index(int major)
245{
246	return major % BLKDEV_MAJOR_HASH_SIZE;
247}
248
249#ifdef CONFIG_PROC_FS
250void blkdev_show(struct seq_file *seqf, off_t offset)
251{
252	struct blk_major_name *dp;
253
254	if (offset < BLKDEV_MAJOR_HASH_SIZE) {
255		mutex_lock(&block_class_lock);
256		for (dp = major_names[offset]; dp; dp = dp->next)
257			seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
258		mutex_unlock(&block_class_lock);
259	}
260}
261#endif /* CONFIG_PROC_FS */
262
263/**
264 * register_blkdev - register a new block device
265 *
266 * @major: the requested major device number [1..255]. If @major=0, try to
267 *         allocate any unused major number.
268 * @name: the name of the new block device as a zero terminated string
269 *
270 * The @name must be unique within the system.
271 *
272 * The return value depends on the @major input parameter.
273 *  - if a major device number was requested in range [1..255] then the
274 *    function returns zero on success, or a negative error code
275 *  - if any unused major number was requested with @major=0 parameter
276 *    then the return value is the allocated major number in range
277 *    [1..255] or a negative error code otherwise
278 */
279int register_blkdev(unsigned int major, const char *name)
280{
281	struct blk_major_name **n, *p;
282	int index, ret = 0;
283
284	mutex_lock(&block_class_lock);
285
286	/* temporary */
287	if (major == 0) {
288		for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
289			if (major_names[index] == NULL)
290				break;
291		}
292
293		if (index == 0) {
294			printk("register_blkdev: failed to get major for %s\n",
295			       name);
296			ret = -EBUSY;
297			goto out;
298		}
299		major = index;
300		ret = major;
301	}
302
303	p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
304	if (p == NULL) {
305		ret = -ENOMEM;
306		goto out;
307	}
308
309	p->major = major;
310	strlcpy(p->name, name, sizeof(p->name));
311	p->next = NULL;
312	index = major_to_index(major);
313
314	for (n = &major_names[index]; *n; n = &(*n)->next) {
315		if ((*n)->major == major)
316			break;
317	}
318	if (!*n)
319		*n = p;
320	else
321		ret = -EBUSY;
322
323	if (ret < 0) {
324		printk("register_blkdev: cannot get major %d for %s\n",
325		       major, name);
326		kfree(p);
327	}
328out:
329	mutex_unlock(&block_class_lock);
330	return ret;
331}
332
333EXPORT_SYMBOL(register_blkdev);
334
335void unregister_blkdev(unsigned int major, const char *name)
336{
337	struct blk_major_name **n;
338	struct blk_major_name *p = NULL;
339	int index = major_to_index(major);
340
341	mutex_lock(&block_class_lock);
342	for (n = &major_names[index]; *n; n = &(*n)->next)
343		if ((*n)->major == major)
344			break;
345	if (!*n || strcmp((*n)->name, name)) {
346		WARN_ON(1);
347	} else {
348		p = *n;
349		*n = p->next;
350	}
351	mutex_unlock(&block_class_lock);
352	kfree(p);
353}
354
355EXPORT_SYMBOL(unregister_blkdev);
356
357static struct kobj_map *bdev_map;
358
359/**
360 * blk_mangle_minor - scatter minor numbers apart
361 * @minor: minor number to mangle
362 *
363 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
364 * is enabled.  Mangling twice gives the original value.
365 *
366 * RETURNS:
367 * Mangled value.
368 *
369 * CONTEXT:
370 * Don't care.
371 */
372static int blk_mangle_minor(int minor)
373{
374#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
375	int i;
376
377	for (i = 0; i < MINORBITS / 2; i++) {
378		int low = minor & (1 << i);
379		int high = minor & (1 << (MINORBITS - 1 - i));
380		int distance = MINORBITS - 1 - 2 * i;
381
382		minor ^= low | high;	/* clear both bits */
383		low <<= distance;	/* swap the positions */
384		high >>= distance;
385		minor |= low | high;	/* and set */
386	}
387#endif
388	return minor;
389}
390
391/**
392 * blk_alloc_devt - allocate a dev_t for a partition
393 * @part: partition to allocate dev_t for
394 * @devt: out parameter for resulting dev_t
395 *
396 * Allocate a dev_t for block device.
397 *
398 * RETURNS:
399 * 0 on success, allocated dev_t is returned in *@devt.  -errno on
400 * failure.
401 *
402 * CONTEXT:
403 * Might sleep.
404 */
405int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
406{
407	struct gendisk *disk = part_to_disk(part);
408	int idx, rc;
409
410	/* in consecutive minor range? */
411	if (part->partno < disk->minors) {
412		*devt = MKDEV(disk->major, disk->first_minor + part->partno);
413		return 0;
414	}
415
416	/* allocate ext devt */
417	do {
418		if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
419			return -ENOMEM;
420		rc = idr_get_new(&ext_devt_idr, part, &idx);
421	} while (rc == -EAGAIN);
422
423	if (rc)
424		return rc;
425
426	if (idx > MAX_EXT_DEVT) {
427		idr_remove(&ext_devt_idr, idx);
428		return -EBUSY;
429	}
430
431	*devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
432	return 0;
433}
434
435/**
436 * blk_free_devt - free a dev_t
437 * @devt: dev_t to free
438 *
439 * Free @devt which was allocated using blk_alloc_devt().
440 *
441 * CONTEXT:
442 * Might sleep.
443 */
444void blk_free_devt(dev_t devt)
445{
446	might_sleep();
447
448	if (devt == MKDEV(0, 0))
449		return;
450
451	if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
452		mutex_lock(&ext_devt_mutex);
453		idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
454		mutex_unlock(&ext_devt_mutex);
455	}
456}
457
458static char *bdevt_str(dev_t devt, char *buf)
459{
460	if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
461		char tbuf[BDEVT_SIZE];
462		snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
463		snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
464	} else
465		snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
466
467	return buf;
468}
469
470/*
471 * Register device numbers dev..(dev+range-1)
472 * range must be nonzero
473 * The hash chain is sorted on range, so that subranges can override.
474 */
475void blk_register_region(dev_t devt, unsigned long range, struct module *module,
476			 struct kobject *(*probe)(dev_t, int *, void *),
477			 int (*lock)(dev_t, void *), void *data)
478{
479	kobj_map(bdev_map, devt, range, module, probe, lock, data);
480}
481
482EXPORT_SYMBOL(blk_register_region);
483
484void blk_unregister_region(dev_t devt, unsigned long range)
485{
486	kobj_unmap(bdev_map, devt, range);
487}
488
489EXPORT_SYMBOL(blk_unregister_region);
490
491static struct kobject *exact_match(dev_t devt, int *partno, void *data)
492{
493	struct gendisk *p = data;
494
495	return &disk_to_dev(p)->kobj;
496}
497
498static int exact_lock(dev_t devt, void *data)
499{
500	struct gendisk *p = data;
501
502	if (!get_disk(p))
503		return -1;
504	return 0;
505}
506
507void add_disk(struct gendisk *disk)
508{
509	struct backing_dev_info *bdi;
510	dev_t devt;
511	int retval;
512
513	/* minors == 0 indicates to use ext devt from part0 and should
514	 * be accompanied with EXT_DEVT flag.  Make sure all
515	 * parameters make sense.
516	 */
517	WARN_ON(disk->minors && !(disk->major || disk->first_minor));
518	WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
519
520	disk->flags |= GENHD_FL_UP;
521
522	retval = blk_alloc_devt(&disk->part0, &devt);
523	if (retval) {
524		WARN_ON(1);
525		return;
526	}
527	disk_to_dev(disk)->devt = devt;
528
529	/* ->major and ->first_minor aren't supposed to be
530	 * dereferenced from here on, but set them just in case.
531	 */
532	disk->major = MAJOR(devt);
533	disk->first_minor = MINOR(devt);
534
535	/* Register BDI before referencing it from bdev */
536	bdi = &disk->queue->backing_dev_info;
537	bdi_register_dev(bdi, disk_devt(disk));
538
539	blk_register_region(disk_devt(disk), disk->minors, NULL,
540			    exact_match, exact_lock, disk);
541	register_disk(disk);
542	blk_register_queue(disk);
543
544	retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
545				   "bdi");
546	WARN_ON(retval);
547}
548
549EXPORT_SYMBOL(add_disk);
550EXPORT_SYMBOL(del_gendisk);	/* in partitions/check.c */
551
552void unlink_gendisk(struct gendisk *disk)
553{
554	sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
555	bdi_unregister(&disk->queue->backing_dev_info);
556	blk_unregister_queue(disk);
557	blk_unregister_region(disk_devt(disk), disk->minors);
558}
559
560/**
561 * get_gendisk - get partitioning information for a given device
562 * @devt: device to get partitioning information for
563 * @partno: returned partition index
564 *
565 * This function gets the structure containing partitioning
566 * information for the given device @devt.
567 */
568struct gendisk *get_gendisk(dev_t devt, int *partno)
569{
570	struct gendisk *disk = NULL;
571
572	if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
573		struct kobject *kobj;
574
575		kobj = kobj_lookup(bdev_map, devt, partno);
576		if (kobj)
577			disk = dev_to_disk(kobj_to_dev(kobj));
578	} else {
579		struct hd_struct *part;
580
581		mutex_lock(&ext_devt_mutex);
582		part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
583		if (part && get_disk(part_to_disk(part))) {
584			*partno = part->partno;
585			disk = part_to_disk(part);
586		}
587		mutex_unlock(&ext_devt_mutex);
588	}
589
590	return disk;
591}
592EXPORT_SYMBOL(get_gendisk);
593
594/**
595 * bdget_disk - do bdget() by gendisk and partition number
596 * @disk: gendisk of interest
597 * @partno: partition number
598 *
599 * Find partition @partno from @disk, do bdget() on it.
600 *
601 * CONTEXT:
602 * Don't care.
603 *
604 * RETURNS:
605 * Resulting block_device on success, NULL on failure.
606 */
607struct block_device *bdget_disk(struct gendisk *disk, int partno)
608{
609	struct hd_struct *part;
610	struct block_device *bdev = NULL;
611
612	part = disk_get_part(disk, partno);
613	if (part)
614		bdev = bdget(part_devt(part));
615	disk_put_part(part);
616
617	return bdev;
618}
619EXPORT_SYMBOL(bdget_disk);
620
621/*
622 * print a full list of all partitions - intended for places where the root
623 * filesystem can't be mounted and thus to give the victim some idea of what
624 * went wrong
625 */
626void __init printk_all_partitions(void)
627{
628	struct class_dev_iter iter;
629	struct device *dev;
630
631	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
632	while ((dev = class_dev_iter_next(&iter))) {
633		struct gendisk *disk = dev_to_disk(dev);
634		struct disk_part_iter piter;
635		struct hd_struct *part;
636		char name_buf[BDEVNAME_SIZE];
637		char devt_buf[BDEVT_SIZE];
638
639		/*
640		 * Don't show empty devices or things that have been
641		 * surpressed
642		 */
643		if (get_capacity(disk) == 0 ||
644		    (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
645			continue;
646
647		/*
648		 * Note, unlike /proc/partitions, I am showing the
649		 * numbers in hex - the same format as the root=
650		 * option takes.
651		 */
652		disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
653		while ((part = disk_part_iter_next(&piter))) {
654			bool is_part0 = part == &disk->part0;
655
656			printk("%s%s %10llu %s", is_part0 ? "" : "  ",
657			       bdevt_str(part_devt(part), devt_buf),
658			       (unsigned long long)part->nr_sects >> 1,
659			       disk_name(disk, part->partno, name_buf));
660			if (is_part0) {
661				if (disk->driverfs_dev != NULL &&
662				    disk->driverfs_dev->driver != NULL)
663					printk(" driver: %s\n",
664					      disk->driverfs_dev->driver->name);
665				else
666					printk(" (driver?)\n");
667			} else
668				printk("\n");
669		}
670		disk_part_iter_exit(&piter);
671	}
672	class_dev_iter_exit(&iter);
673}
674
675#ifdef CONFIG_PROC_FS
676/* iterator */
677static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
678{
679	loff_t skip = *pos;
680	struct class_dev_iter *iter;
681	struct device *dev;
682
683	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
684	if (!iter)
685		return ERR_PTR(-ENOMEM);
686
687	seqf->private = iter;
688	class_dev_iter_init(iter, &block_class, NULL, &disk_type);
689	do {
690		dev = class_dev_iter_next(iter);
691		if (!dev)
692			return NULL;
693	} while (skip--);
694
695	return dev_to_disk(dev);
696}
697
698static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
699{
700	struct device *dev;
701
702	(*pos)++;
703	dev = class_dev_iter_next(seqf->private);
704	if (dev)
705		return dev_to_disk(dev);
706
707	return NULL;
708}
709
710static void disk_seqf_stop(struct seq_file *seqf, void *v)
711{
712	struct class_dev_iter *iter = seqf->private;
713
714	/* stop is called even after start failed :-( */
715	if (iter) {
716		class_dev_iter_exit(iter);
717		kfree(iter);
718	}
719}
720
721static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
722{
723	static void *p;
724
725	p = disk_seqf_start(seqf, pos);
726	if (!IS_ERR(p) && p && !*pos)
727		seq_puts(seqf, "major minor  #blocks  name\n\n");
728	return p;
729}
730
731static int show_partition(struct seq_file *seqf, void *v)
732{
733	struct gendisk *sgp = v;
734	struct disk_part_iter piter;
735	struct hd_struct *part;
736	char buf[BDEVNAME_SIZE];
737
738	/* Don't show non-partitionable removeable devices or empty devices */
739	if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
740				   (sgp->flags & GENHD_FL_REMOVABLE)))
741		return 0;
742	if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
743		return 0;
744
745	/* show the full disk and all non-0 size partitions of it */
746	disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
747	while ((part = disk_part_iter_next(&piter)))
748		seq_printf(seqf, "%4d  %7d %10llu %s\n",
749			   MAJOR(part_devt(part)), MINOR(part_devt(part)),
750			   (unsigned long long)part->nr_sects >> 1,
751			   disk_name(sgp, part->partno, buf));
752	disk_part_iter_exit(&piter);
753
754	return 0;
755}
756
757static const struct seq_operations partitions_op = {
758	.start	= show_partition_start,
759	.next	= disk_seqf_next,
760	.stop	= disk_seqf_stop,
761	.show	= show_partition
762};
763
764static int partitions_open(struct inode *inode, struct file *file)
765{
766	return seq_open(file, &partitions_op);
767}
768
769static const struct file_operations proc_partitions_operations = {
770	.open		= partitions_open,
771	.read		= seq_read,
772	.llseek		= seq_lseek,
773	.release	= seq_release,
774};
775#endif
776
777
778static struct kobject *base_probe(dev_t devt, int *partno, void *data)
779{
780	if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
781		/* Make old-style 2.4 aliases work */
782		request_module("block-major-%d", MAJOR(devt));
783	return NULL;
784}
785
786static int __init genhd_device_init(void)
787{
788	int error;
789
790	block_class.dev_kobj = sysfs_dev_block_kobj;
791	error = class_register(&block_class);
792	if (unlikely(error))
793		return error;
794	bdev_map = kobj_map_init(base_probe, &block_class_lock);
795	blk_dev_init();
796
797	register_blkdev(BLOCK_EXT_MAJOR, "blkext");
798
799#ifndef CONFIG_SYSFS_DEPRECATED
800	/* create top-level block dir */
801	block_depr = kobject_create_and_add("block", NULL);
802#endif
803	return 0;
804}
805
806subsys_initcall(genhd_device_init);
807
808static ssize_t disk_range_show(struct device *dev,
809			       struct device_attribute *attr, char *buf)
810{
811	struct gendisk *disk = dev_to_disk(dev);
812
813	return sprintf(buf, "%d\n", disk->minors);
814}
815
816static ssize_t disk_ext_range_show(struct device *dev,
817				   struct device_attribute *attr, char *buf)
818{
819	struct gendisk *disk = dev_to_disk(dev);
820
821	return sprintf(buf, "%d\n", disk_max_parts(disk));
822}
823
824static ssize_t disk_removable_show(struct device *dev,
825				   struct device_attribute *attr, char *buf)
826{
827	struct gendisk *disk = dev_to_disk(dev);
828
829	return sprintf(buf, "%d\n",
830		       (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
831}
832
833static ssize_t disk_ro_show(struct device *dev,
834				   struct device_attribute *attr, char *buf)
835{
836	struct gendisk *disk = dev_to_disk(dev);
837
838	return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
839}
840
841static ssize_t disk_capability_show(struct device *dev,
842				    struct device_attribute *attr, char *buf)
843{
844	struct gendisk *disk = dev_to_disk(dev);
845
846	return sprintf(buf, "%x\n", disk->flags);
847}
848
849static ssize_t disk_alignment_offset_show(struct device *dev,
850					  struct device_attribute *attr,
851					  char *buf)
852{
853	struct gendisk *disk = dev_to_disk(dev);
854
855	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
856}
857
858static ssize_t disk_discard_alignment_show(struct device *dev,
859					   struct device_attribute *attr,
860					   char *buf)
861{
862	struct gendisk *disk = dev_to_disk(dev);
863
864	return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
865}
866
867static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
868static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
869static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
870static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
871static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
872static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
873static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
874		   NULL);
875static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
876static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
877static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
878#ifdef CONFIG_FAIL_MAKE_REQUEST
879static struct device_attribute dev_attr_fail =
880	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
881#endif
882#ifdef CONFIG_FAIL_IO_TIMEOUT
883static struct device_attribute dev_attr_fail_timeout =
884	__ATTR(io-timeout-fail,  S_IRUGO|S_IWUSR, part_timeout_show,
885		part_timeout_store);
886#endif
887
888static struct attribute *disk_attrs[] = {
889	&dev_attr_range.attr,
890	&dev_attr_ext_range.attr,
891	&dev_attr_removable.attr,
892	&dev_attr_ro.attr,
893	&dev_attr_size.attr,
894	&dev_attr_alignment_offset.attr,
895	&dev_attr_discard_alignment.attr,
896	&dev_attr_capability.attr,
897	&dev_attr_stat.attr,
898	&dev_attr_inflight.attr,
899#ifdef CONFIG_FAIL_MAKE_REQUEST
900	&dev_attr_fail.attr,
901#endif
902#ifdef CONFIG_FAIL_IO_TIMEOUT
903	&dev_attr_fail_timeout.attr,
904#endif
905	NULL
906};
907
908static struct attribute_group disk_attr_group = {
909	.attrs = disk_attrs,
910};
911
912static const struct attribute_group *disk_attr_groups[] = {
913	&disk_attr_group,
914	NULL
915};
916
917static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
918{
919	struct disk_part_tbl *ptbl =
920		container_of(head, struct disk_part_tbl, rcu_head);
921
922	kfree(ptbl);
923}
924
925/**
926 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
927 * @disk: disk to replace part_tbl for
928 * @new_ptbl: new part_tbl to install
929 *
930 * Replace disk->part_tbl with @new_ptbl in RCU-safe way.  The
931 * original ptbl is freed using RCU callback.
932 *
933 * LOCKING:
934 * Matching bd_mutx locked.
935 */
936static void disk_replace_part_tbl(struct gendisk *disk,
937				  struct disk_part_tbl *new_ptbl)
938{
939	struct disk_part_tbl *old_ptbl = disk->part_tbl;
940
941	rcu_assign_pointer(disk->part_tbl, new_ptbl);
942
943	if (old_ptbl) {
944		rcu_assign_pointer(old_ptbl->last_lookup, NULL);
945		call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
946	}
947}
948
949/**
950 * disk_expand_part_tbl - expand disk->part_tbl
951 * @disk: disk to expand part_tbl for
952 * @partno: expand such that this partno can fit in
953 *
954 * Expand disk->part_tbl such that @partno can fit in.  disk->part_tbl
955 * uses RCU to allow unlocked dereferencing for stats and other stuff.
956 *
957 * LOCKING:
958 * Matching bd_mutex locked, might sleep.
959 *
960 * RETURNS:
961 * 0 on success, -errno on failure.
962 */
963int disk_expand_part_tbl(struct gendisk *disk, int partno)
964{
965	struct disk_part_tbl *old_ptbl = disk->part_tbl;
966	struct disk_part_tbl *new_ptbl;
967	int len = old_ptbl ? old_ptbl->len : 0;
968	int target = partno + 1;
969	size_t size;
970	int i;
971
972	/* disk_max_parts() is zero during initialization, ignore if so */
973	if (disk_max_parts(disk) && target > disk_max_parts(disk))
974		return -EINVAL;
975
976	if (target <= len)
977		return 0;
978
979	size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
980	new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
981	if (!new_ptbl)
982		return -ENOMEM;
983
984	new_ptbl->len = target;
985
986	for (i = 0; i < len; i++)
987		rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
988
989	disk_replace_part_tbl(disk, new_ptbl);
990	return 0;
991}
992
993static void disk_release(struct device *dev)
994{
995	struct gendisk *disk = dev_to_disk(dev);
996
997	kfree(disk->random);
998	disk_replace_part_tbl(disk, NULL);
999	free_part_stats(&disk->part0);
1000	kfree(disk);
1001}
1002struct class block_class = {
1003	.name		= "block",
1004};
1005
1006static char *block_devnode(struct device *dev, mode_t *mode)
1007{
1008	struct gendisk *disk = dev_to_disk(dev);
1009
1010	if (disk->devnode)
1011		return disk->devnode(disk, mode);
1012	return NULL;
1013}
1014
1015static struct device_type disk_type = {
1016	.name		= "disk",
1017	.groups		= disk_attr_groups,
1018	.release	= disk_release,
1019	.devnode	= block_devnode,
1020};
1021
1022#ifdef CONFIG_PROC_FS
1023/*
1024 * aggregate disk stat collector.  Uses the same stats that the sysfs
1025 * entries do, above, but makes them available through one seq_file.
1026 *
1027 * The output looks suspiciously like /proc/partitions with a bunch of
1028 * extra fields.
1029 */
1030static int diskstats_show(struct seq_file *seqf, void *v)
1031{
1032	struct gendisk *gp = v;
1033	struct disk_part_iter piter;
1034	struct hd_struct *hd;
1035	char buf[BDEVNAME_SIZE];
1036	int cpu;
1037
1038	/*
1039	if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1040		seq_puts(seqf,	"major minor name"
1041				"     rio rmerge rsect ruse wio wmerge "
1042				"wsect wuse running use aveq"
1043				"\n\n");
1044	*/
1045
1046	disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
1047	while ((hd = disk_part_iter_next(&piter))) {
1048		cpu = part_stat_lock();
1049		part_round_stats(cpu, hd);
1050		part_stat_unlock();
1051		seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
1052			   "%u %lu %lu %llu %u %u %u %u\n",
1053			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
1054			   disk_name(gp, hd->partno, buf),
1055			   part_stat_read(hd, ios[0]),
1056			   part_stat_read(hd, merges[0]),
1057			   (unsigned long long)part_stat_read(hd, sectors[0]),
1058			   jiffies_to_msecs(part_stat_read(hd, ticks[0])),
1059			   part_stat_read(hd, ios[1]),
1060			   part_stat_read(hd, merges[1]),
1061			   (unsigned long long)part_stat_read(hd, sectors[1]),
1062			   jiffies_to_msecs(part_stat_read(hd, ticks[1])),
1063			   part_in_flight(hd),
1064			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
1065			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
1066			);
1067	}
1068	disk_part_iter_exit(&piter);
1069
1070	return 0;
1071}
1072
1073static const struct seq_operations diskstats_op = {
1074	.start	= disk_seqf_start,
1075	.next	= disk_seqf_next,
1076	.stop	= disk_seqf_stop,
1077	.show	= diskstats_show
1078};
1079
1080static int diskstats_open(struct inode *inode, struct file *file)
1081{
1082	return seq_open(file, &diskstats_op);
1083}
1084
1085static const struct file_operations proc_diskstats_operations = {
1086	.open		= diskstats_open,
1087	.read		= seq_read,
1088	.llseek		= seq_lseek,
1089	.release	= seq_release,
1090};
1091
1092static int __init proc_genhd_init(void)
1093{
1094	proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
1095	proc_create("partitions", 0, NULL, &proc_partitions_operations);
1096	return 0;
1097}
1098module_init(proc_genhd_init);
1099#endif /* CONFIG_PROC_FS */
1100
1101static void media_change_notify_thread(struct work_struct *work)
1102{
1103	struct gendisk *gd = container_of(work, struct gendisk, async_notify);
1104	char event[] = "MEDIA_CHANGE=1";
1105	char *envp[] = { event, NULL };
1106
1107	/*
1108	 * set enviroment vars to indicate which event this is for
1109	 * so that user space will know to go check the media status.
1110	 */
1111	kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1112	put_device(gd->driverfs_dev);
1113}
1114
1115
1116dev_t blk_lookup_devt(const char *name, int partno)
1117{
1118	dev_t devt = MKDEV(0, 0);
1119	struct class_dev_iter iter;
1120	struct device *dev;
1121
1122	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1123	while ((dev = class_dev_iter_next(&iter))) {
1124		struct gendisk *disk = dev_to_disk(dev);
1125		struct hd_struct *part;
1126
1127		if (strcmp(dev_name(dev), name))
1128			continue;
1129
1130		if (partno < disk->minors) {
1131			/* We need to return the right devno, even
1132			 * if the partition doesn't exist yet.
1133			 */
1134			devt = MKDEV(MAJOR(dev->devt),
1135				     MINOR(dev->devt) + partno);
1136			break;
1137		}
1138		part = disk_get_part(disk, partno);
1139		if (part) {
1140			devt = part_devt(part);
1141			disk_put_part(part);
1142			break;
1143		}
1144		disk_put_part(part);
1145	}
1146	class_dev_iter_exit(&iter);
1147	return devt;
1148}
1149EXPORT_SYMBOL(blk_lookup_devt);
1150
1151struct gendisk *alloc_disk(int minors)
1152{
1153	return alloc_disk_node(minors, -1);
1154}
1155EXPORT_SYMBOL(alloc_disk);
1156
1157struct gendisk *alloc_disk_node(int minors, int node_id)
1158{
1159	struct gendisk *disk;
1160
1161	disk = kmalloc_node(sizeof(struct gendisk),
1162				GFP_KERNEL | __GFP_ZERO, node_id);
1163	if (disk) {
1164		if (!init_part_stats(&disk->part0)) {
1165			kfree(disk);
1166			return NULL;
1167		}
1168		disk->node_id = node_id;
1169		if (disk_expand_part_tbl(disk, 0)) {
1170			free_part_stats(&disk->part0);
1171			kfree(disk);
1172			return NULL;
1173		}
1174		disk->part_tbl->part[0] = &disk->part0;
1175
1176		disk->minors = minors;
1177		rand_initialize_disk(disk);
1178		disk_to_dev(disk)->class = &block_class;
1179		disk_to_dev(disk)->type = &disk_type;
1180		device_initialize(disk_to_dev(disk));
1181		INIT_WORK(&disk->async_notify,
1182			media_change_notify_thread);
1183	}
1184	return disk;
1185}
1186EXPORT_SYMBOL(alloc_disk_node);
1187
1188struct kobject *get_disk(struct gendisk *disk)
1189{
1190	struct module *owner;
1191	struct kobject *kobj;
1192
1193	if (!disk->fops)
1194		return NULL;
1195	owner = disk->fops->owner;
1196	if (owner && !try_module_get(owner))
1197		return NULL;
1198	kobj = kobject_get(&disk_to_dev(disk)->kobj);
1199	if (kobj == NULL) {
1200		module_put(owner);
1201		return NULL;
1202	}
1203	return kobj;
1204
1205}
1206
1207EXPORT_SYMBOL(get_disk);
1208
1209void put_disk(struct gendisk *disk)
1210{
1211	if (disk)
1212		kobject_put(&disk_to_dev(disk)->kobj);
1213}
1214
1215EXPORT_SYMBOL(put_disk);
1216
1217static void set_disk_ro_uevent(struct gendisk *gd, int ro)
1218{
1219	char event[] = "DISK_RO=1";
1220	char *envp[] = { event, NULL };
1221
1222	if (!ro)
1223		event[8] = '0';
1224	kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1225}
1226
1227void set_device_ro(struct block_device *bdev, int flag)
1228{
1229	bdev->bd_part->policy = flag;
1230}
1231
1232EXPORT_SYMBOL(set_device_ro);
1233
1234void set_disk_ro(struct gendisk *disk, int flag)
1235{
1236	struct disk_part_iter piter;
1237	struct hd_struct *part;
1238
1239	if (disk->part0.policy != flag) {
1240		set_disk_ro_uevent(disk, flag);
1241		disk->part0.policy = flag;
1242	}
1243
1244	disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
1245	while ((part = disk_part_iter_next(&piter)))
1246		part->policy = flag;
1247	disk_part_iter_exit(&piter);
1248}
1249
1250EXPORT_SYMBOL(set_disk_ro);
1251
1252int bdev_read_only(struct block_device *bdev)
1253{
1254	if (!bdev)
1255		return 0;
1256	return bdev->bd_part->policy;
1257}
1258
1259EXPORT_SYMBOL(bdev_read_only);
1260
1261int invalidate_partition(struct gendisk *disk, int partno)
1262{
1263	int res = 0;
1264	struct block_device *bdev = bdget_disk(disk, partno);
1265	if (bdev) {
1266		fsync_bdev(bdev);
1267		res = __invalidate_device(bdev);
1268		bdput(bdev);
1269	}
1270	return res;
1271}
1272
1273EXPORT_SYMBOL(invalidate_partition);
1274