• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6/drivers/md/
1/*
2 * Copyright (C) 2003 Sistina Software Limited.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This file is released under the GPL.
6 */
7
8#include <linux/device-mapper.h>
9
10#include "dm-path-selector.h"
11#include "dm-uevent.h"
12
13#include <linux/ctype.h>
14#include <linux/init.h>
15#include <linux/mempool.h>
16#include <linux/module.h>
17#include <linux/pagemap.h>
18#include <linux/slab.h>
19#include <linux/time.h>
20#include <linux/workqueue.h>
21#include <scsi/scsi_dh.h>
22#include <asm/atomic.h>
23
24#define DM_MSG_PREFIX "multipath"
25#define MESG_STR(x) x, sizeof(x)
26
27/* Path properties */
28struct pgpath {
29	struct list_head list;
30
31	struct priority_group *pg;	/* Owning PG */
32	unsigned is_active;		/* Path status */
33	unsigned fail_count;		/* Cumulative failure count */
34
35	struct dm_path path;
36	struct work_struct activate_path;
37};
38
39#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
40
41/*
42 * Paths are grouped into Priority Groups and numbered from 1 upwards.
43 * Each has a path selector which controls which path gets used.
44 */
45struct priority_group {
46	struct list_head list;
47
48	struct multipath *m;		/* Owning multipath instance */
49	struct path_selector ps;
50
51	unsigned pg_num;		/* Reference number */
52	unsigned bypassed;		/* Temporarily bypass this PG? */
53
54	unsigned nr_pgpaths;		/* Number of paths in PG */
55	struct list_head pgpaths;
56};
57
58/* Multipath context */
59struct multipath {
60	struct list_head list;
61	struct dm_target *ti;
62
63	spinlock_t lock;
64
65	const char *hw_handler_name;
66	char *hw_handler_params;
67	unsigned nr_priority_groups;
68	struct list_head priority_groups;
69	unsigned pg_init_required;	/* pg_init needs calling? */
70	unsigned pg_init_in_progress;	/* Only one pg_init allowed at once */
71	wait_queue_head_t pg_init_wait;	/* Wait for pg_init completion */
72
73	unsigned nr_valid_paths;	/* Total number of usable paths */
74	struct pgpath *current_pgpath;
75	struct priority_group *current_pg;
76	struct priority_group *next_pg;	/* Switch to this PG if set */
77	unsigned repeat_count;		/* I/Os left before calling PS again */
78
79	unsigned queue_io;		/* Must we queue all I/O? */
80	unsigned queue_if_no_path;	/* Queue I/O if last path fails? */
81	unsigned saved_queue_if_no_path;/* Saved state during suspension */
82	unsigned pg_init_retries;	/* Number of times to retry pg_init */
83	unsigned pg_init_count;		/* Number of times pg_init called */
84
85	struct work_struct process_queued_ios;
86	struct list_head queued_ios;
87	unsigned queue_size;
88
89	struct work_struct trigger_event;
90
91	/*
92	 * We must use a mempool of dm_mpath_io structs so that we
93	 * can resubmit bios on error.
94	 */
95	mempool_t *mpio_pool;
96
97	struct mutex work_mutex;
98};
99
100/*
101 * Context information attached to each bio we process.
102 */
103struct dm_mpath_io {
104	struct pgpath *pgpath;
105	size_t nr_bytes;
106};
107
108typedef int (*action_fn) (struct pgpath *pgpath);
109
110#define MIN_IOS 256	/* Mempool size */
111
112static struct kmem_cache *_mpio_cache;
113
114static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
115static void process_queued_ios(struct work_struct *work);
116static void trigger_event(struct work_struct *work);
117static void activate_path(struct work_struct *work);
118
119
120/*-----------------------------------------------
121 * Allocation routines
122 *-----------------------------------------------*/
123
124static struct pgpath *alloc_pgpath(void)
125{
126	struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
127
128	if (pgpath) {
129		pgpath->is_active = 1;
130		INIT_WORK(&pgpath->activate_path, activate_path);
131	}
132
133	return pgpath;
134}
135
136static void free_pgpath(struct pgpath *pgpath)
137{
138	kfree(pgpath);
139}
140
141static struct priority_group *alloc_priority_group(void)
142{
143	struct priority_group *pg;
144
145	pg = kzalloc(sizeof(*pg), GFP_KERNEL);
146
147	if (pg)
148		INIT_LIST_HEAD(&pg->pgpaths);
149
150	return pg;
151}
152
153static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
154{
155	struct pgpath *pgpath, *tmp;
156	struct multipath *m = ti->private;
157
158	list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
159		list_del(&pgpath->list);
160		if (m->hw_handler_name)
161			scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
162		dm_put_device(ti, pgpath->path.dev);
163		free_pgpath(pgpath);
164	}
165}
166
167static void free_priority_group(struct priority_group *pg,
168				struct dm_target *ti)
169{
170	struct path_selector *ps = &pg->ps;
171
172	if (ps->type) {
173		ps->type->destroy(ps);
174		dm_put_path_selector(ps->type);
175	}
176
177	free_pgpaths(&pg->pgpaths, ti);
178	kfree(pg);
179}
180
181static struct multipath *alloc_multipath(struct dm_target *ti)
182{
183	struct multipath *m;
184
185	m = kzalloc(sizeof(*m), GFP_KERNEL);
186	if (m) {
187		INIT_LIST_HEAD(&m->priority_groups);
188		INIT_LIST_HEAD(&m->queued_ios);
189		spin_lock_init(&m->lock);
190		m->queue_io = 1;
191		INIT_WORK(&m->process_queued_ios, process_queued_ios);
192		INIT_WORK(&m->trigger_event, trigger_event);
193		init_waitqueue_head(&m->pg_init_wait);
194		mutex_init(&m->work_mutex);
195		m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
196		if (!m->mpio_pool) {
197			kfree(m);
198			return NULL;
199		}
200		m->ti = ti;
201		ti->private = m;
202	}
203
204	return m;
205}
206
207static void free_multipath(struct multipath *m)
208{
209	struct priority_group *pg, *tmp;
210
211	list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
212		list_del(&pg->list);
213		free_priority_group(pg, m->ti);
214	}
215
216	kfree(m->hw_handler_name);
217	kfree(m->hw_handler_params);
218	mempool_destroy(m->mpio_pool);
219	kfree(m);
220}
221
222
223/*-----------------------------------------------
224 * Path selection
225 *-----------------------------------------------*/
226
227static void __pg_init_all_paths(struct multipath *m)
228{
229	struct pgpath *pgpath;
230
231	m->pg_init_count++;
232	m->pg_init_required = 0;
233	list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) {
234		/* Skip failed paths */
235		if (!pgpath->is_active)
236			continue;
237		if (queue_work(kmpath_handlerd, &pgpath->activate_path))
238			m->pg_init_in_progress++;
239	}
240}
241
242static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
243{
244	m->current_pg = pgpath->pg;
245
246	/* Must we initialise the PG first, and queue I/O till it's ready? */
247	if (m->hw_handler_name) {
248		m->pg_init_required = 1;
249		m->queue_io = 1;
250	} else {
251		m->pg_init_required = 0;
252		m->queue_io = 0;
253	}
254
255	m->pg_init_count = 0;
256}
257
258static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
259			       size_t nr_bytes)
260{
261	struct dm_path *path;
262
263	path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes);
264	if (!path)
265		return -ENXIO;
266
267	m->current_pgpath = path_to_pgpath(path);
268
269	if (m->current_pg != pg)
270		__switch_pg(m, m->current_pgpath);
271
272	return 0;
273}
274
275static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
276{
277	struct priority_group *pg;
278	unsigned bypassed = 1;
279
280	if (!m->nr_valid_paths)
281		goto failed;
282
283	/* Were we instructed to switch PG? */
284	if (m->next_pg) {
285		pg = m->next_pg;
286		m->next_pg = NULL;
287		if (!__choose_path_in_pg(m, pg, nr_bytes))
288			return;
289	}
290
291	/* Don't change PG until it has no remaining paths */
292	if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes))
293		return;
294
295	/*
296	 * Loop through priority groups until we find a valid path.
297	 * First time we skip PGs marked 'bypassed'.
298	 * Second time we only try the ones we skipped.
299	 */
300	do {
301		list_for_each_entry(pg, &m->priority_groups, list) {
302			if (pg->bypassed == bypassed)
303				continue;
304			if (!__choose_path_in_pg(m, pg, nr_bytes))
305				return;
306		}
307	} while (bypassed--);
308
309failed:
310	m->current_pgpath = NULL;
311	m->current_pg = NULL;
312}
313
314/*
315 * Check whether bios must be queued in the device-mapper core rather
316 * than here in the target.
317 *
318 * m->lock must be held on entry.
319 *
320 * If m->queue_if_no_path and m->saved_queue_if_no_path hold the
321 * same value then we are not between multipath_presuspend()
322 * and multipath_resume() calls and we have no need to check
323 * for the DMF_NOFLUSH_SUSPENDING flag.
324 */
325static int __must_push_back(struct multipath *m)
326{
327	return (m->queue_if_no_path != m->saved_queue_if_no_path &&
328		dm_noflush_suspending(m->ti));
329}
330
331static int map_io(struct multipath *m, struct request *clone,
332		  struct dm_mpath_io *mpio, unsigned was_queued)
333{
334	int r = DM_MAPIO_REMAPPED;
335	size_t nr_bytes = blk_rq_bytes(clone);
336	unsigned long flags;
337	struct pgpath *pgpath;
338	struct block_device *bdev;
339
340	spin_lock_irqsave(&m->lock, flags);
341
342	/* Do we need to select a new pgpath? */
343	if (!m->current_pgpath ||
344	    (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
345		__choose_pgpath(m, nr_bytes);
346
347	pgpath = m->current_pgpath;
348
349	if (was_queued)
350		m->queue_size--;
351
352	if ((pgpath && m->queue_io) ||
353	    (!pgpath && m->queue_if_no_path)) {
354		/* Queue for the daemon to resubmit */
355		list_add_tail(&clone->queuelist, &m->queued_ios);
356		m->queue_size++;
357		if ((m->pg_init_required && !m->pg_init_in_progress) ||
358		    !m->queue_io)
359			queue_work(kmultipathd, &m->process_queued_ios);
360		pgpath = NULL;
361		r = DM_MAPIO_SUBMITTED;
362	} else if (pgpath) {
363		bdev = pgpath->path.dev->bdev;
364		clone->q = bdev_get_queue(bdev);
365		clone->rq_disk = bdev->bd_disk;
366	} else if (__must_push_back(m))
367		r = DM_MAPIO_REQUEUE;
368	else
369		r = -EIO;	/* Failed */
370
371	mpio->pgpath = pgpath;
372	mpio->nr_bytes = nr_bytes;
373
374	if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io)
375		pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path,
376					      nr_bytes);
377
378	spin_unlock_irqrestore(&m->lock, flags);
379
380	return r;
381}
382
383/*
384 * If we run out of usable paths, should we queue I/O or error it?
385 */
386static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
387			    unsigned save_old_value)
388{
389	unsigned long flags;
390
391	spin_lock_irqsave(&m->lock, flags);
392
393	if (save_old_value)
394		m->saved_queue_if_no_path = m->queue_if_no_path;
395	else
396		m->saved_queue_if_no_path = queue_if_no_path;
397	m->queue_if_no_path = queue_if_no_path;
398	if (!m->queue_if_no_path && m->queue_size)
399		queue_work(kmultipathd, &m->process_queued_ios);
400
401	spin_unlock_irqrestore(&m->lock, flags);
402
403	return 0;
404}
405
406/*-----------------------------------------------------------------
407 * The multipath daemon is responsible for resubmitting queued ios.
408 *---------------------------------------------------------------*/
409
410static void dispatch_queued_ios(struct multipath *m)
411{
412	int r;
413	unsigned long flags;
414	struct dm_mpath_io *mpio;
415	union map_info *info;
416	struct request *clone, *n;
417	LIST_HEAD(cl);
418
419	spin_lock_irqsave(&m->lock, flags);
420	list_splice_init(&m->queued_ios, &cl);
421	spin_unlock_irqrestore(&m->lock, flags);
422
423	list_for_each_entry_safe(clone, n, &cl, queuelist) {
424		list_del_init(&clone->queuelist);
425
426		info = dm_get_rq_mapinfo(clone);
427		mpio = info->ptr;
428
429		r = map_io(m, clone, mpio, 1);
430		if (r < 0) {
431			mempool_free(mpio, m->mpio_pool);
432			dm_kill_unmapped_request(clone, r);
433		} else if (r == DM_MAPIO_REMAPPED)
434			dm_dispatch_request(clone);
435		else if (r == DM_MAPIO_REQUEUE) {
436			mempool_free(mpio, m->mpio_pool);
437			dm_requeue_unmapped_request(clone);
438		}
439	}
440}
441
442static void process_queued_ios(struct work_struct *work)
443{
444	struct multipath *m =
445		container_of(work, struct multipath, process_queued_ios);
446	struct pgpath *pgpath = NULL;
447	unsigned must_queue = 1;
448	unsigned long flags;
449
450	spin_lock_irqsave(&m->lock, flags);
451
452	if (!m->queue_size)
453		goto out;
454
455	if (!m->current_pgpath)
456		__choose_pgpath(m, 0);
457
458	pgpath = m->current_pgpath;
459
460	if ((pgpath && !m->queue_io) ||
461	    (!pgpath && !m->queue_if_no_path))
462		must_queue = 0;
463
464	if (m->pg_init_required && !m->pg_init_in_progress && pgpath)
465		__pg_init_all_paths(m);
466
467out:
468	spin_unlock_irqrestore(&m->lock, flags);
469	if (!must_queue)
470		dispatch_queued_ios(m);
471}
472
473/*
474 * An event is triggered whenever a path is taken out of use.
475 * Includes path failure and PG bypass.
476 */
477static void trigger_event(struct work_struct *work)
478{
479	struct multipath *m =
480		container_of(work, struct multipath, trigger_event);
481
482	dm_table_event(m->ti->table);
483}
484
485/*-----------------------------------------------------------------
486 * Constructor/argument parsing:
487 * <#multipath feature args> [<arg>]*
488 * <#hw_handler args> [hw_handler [<arg>]*]
489 * <#priority groups>
490 * <initial priority group>
491 *     [<selector> <#selector args> [<arg>]*
492 *      <#paths> <#per-path selector args>
493 *         [<path> [<arg>]* ]+ ]+
494 *---------------------------------------------------------------*/
495struct param {
496	unsigned min;
497	unsigned max;
498	char *error;
499};
500
501static int read_param(struct param *param, char *str, unsigned *v, char **error)
502{
503	if (!str ||
504	    (sscanf(str, "%u", v) != 1) ||
505	    (*v < param->min) ||
506	    (*v > param->max)) {
507		*error = param->error;
508		return -EINVAL;
509	}
510
511	return 0;
512}
513
514struct arg_set {
515	unsigned argc;
516	char **argv;
517};
518
519static char *shift(struct arg_set *as)
520{
521	char *r;
522
523	if (as->argc) {
524		as->argc--;
525		r = *as->argv;
526		as->argv++;
527		return r;
528	}
529
530	return NULL;
531}
532
533static void consume(struct arg_set *as, unsigned n)
534{
535	BUG_ON (as->argc < n);
536	as->argc -= n;
537	as->argv += n;
538}
539
540static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
541			       struct dm_target *ti)
542{
543	int r;
544	struct path_selector_type *pst;
545	unsigned ps_argc;
546
547	static struct param _params[] = {
548		{0, 1024, "invalid number of path selector args"},
549	};
550
551	pst = dm_get_path_selector(shift(as));
552	if (!pst) {
553		ti->error = "unknown path selector type";
554		return -EINVAL;
555	}
556
557	r = read_param(_params, shift(as), &ps_argc, &ti->error);
558	if (r) {
559		dm_put_path_selector(pst);
560		return -EINVAL;
561	}
562
563	if (ps_argc > as->argc) {
564		dm_put_path_selector(pst);
565		ti->error = "not enough arguments for path selector";
566		return -EINVAL;
567	}
568
569	r = pst->create(&pg->ps, ps_argc, as->argv);
570	if (r) {
571		dm_put_path_selector(pst);
572		ti->error = "path selector constructor failed";
573		return r;
574	}
575
576	pg->ps.type = pst;
577	consume(as, ps_argc);
578
579	return 0;
580}
581
582static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
583			       struct dm_target *ti)
584{
585	int r;
586	struct pgpath *p;
587	struct multipath *m = ti->private;
588
589	/* we need at least a path arg */
590	if (as->argc < 1) {
591		ti->error = "no device given";
592		return ERR_PTR(-EINVAL);
593	}
594
595	p = alloc_pgpath();
596	if (!p)
597		return ERR_PTR(-ENOMEM);
598
599	r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table),
600			  &p->path.dev);
601	if (r) {
602		ti->error = "error getting device";
603		goto bad;
604	}
605
606	if (m->hw_handler_name) {
607		struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
608
609		r = scsi_dh_attach(q, m->hw_handler_name);
610		if (r == -EBUSY) {
611			/*
612			 * Already attached to different hw_handler,
613			 * try to reattach with correct one.
614			 */
615			scsi_dh_detach(q);
616			r = scsi_dh_attach(q, m->hw_handler_name);
617		}
618
619		if (r < 0) {
620			ti->error = "error attaching hardware handler";
621			dm_put_device(ti, p->path.dev);
622			goto bad;
623		}
624
625		if (m->hw_handler_params) {
626			r = scsi_dh_set_params(q, m->hw_handler_params);
627			if (r < 0) {
628				ti->error = "unable to set hardware "
629							"handler parameters";
630				scsi_dh_detach(q);
631				dm_put_device(ti, p->path.dev);
632				goto bad;
633			}
634		}
635	}
636
637	r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
638	if (r) {
639		dm_put_device(ti, p->path.dev);
640		goto bad;
641	}
642
643	return p;
644
645 bad:
646	free_pgpath(p);
647	return ERR_PTR(r);
648}
649
650static struct priority_group *parse_priority_group(struct arg_set *as,
651						   struct multipath *m)
652{
653	static struct param _params[] = {
654		{1, 1024, "invalid number of paths"},
655		{0, 1024, "invalid number of selector args"}
656	};
657
658	int r;
659	unsigned i, nr_selector_args, nr_params;
660	struct priority_group *pg;
661	struct dm_target *ti = m->ti;
662
663	if (as->argc < 2) {
664		as->argc = 0;
665		ti->error = "not enough priority group arguments";
666		return ERR_PTR(-EINVAL);
667	}
668
669	pg = alloc_priority_group();
670	if (!pg) {
671		ti->error = "couldn't allocate priority group";
672		return ERR_PTR(-ENOMEM);
673	}
674	pg->m = m;
675
676	r = parse_path_selector(as, pg, ti);
677	if (r)
678		goto bad;
679
680	/*
681	 * read the paths
682	 */
683	r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
684	if (r)
685		goto bad;
686
687	r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
688	if (r)
689		goto bad;
690
691	nr_params = 1 + nr_selector_args;
692	for (i = 0; i < pg->nr_pgpaths; i++) {
693		struct pgpath *pgpath;
694		struct arg_set path_args;
695
696		if (as->argc < nr_params) {
697			ti->error = "not enough path parameters";
698			r = -EINVAL;
699			goto bad;
700		}
701
702		path_args.argc = nr_params;
703		path_args.argv = as->argv;
704
705		pgpath = parse_path(&path_args, &pg->ps, ti);
706		if (IS_ERR(pgpath)) {
707			r = PTR_ERR(pgpath);
708			goto bad;
709		}
710
711		pgpath->pg = pg;
712		list_add_tail(&pgpath->list, &pg->pgpaths);
713		consume(as, nr_params);
714	}
715
716	return pg;
717
718 bad:
719	free_priority_group(pg, ti);
720	return ERR_PTR(r);
721}
722
723static int parse_hw_handler(struct arg_set *as, struct multipath *m)
724{
725	unsigned hw_argc;
726	int ret;
727	struct dm_target *ti = m->ti;
728
729	static struct param _params[] = {
730		{0, 1024, "invalid number of hardware handler args"},
731	};
732
733	if (read_param(_params, shift(as), &hw_argc, &ti->error))
734		return -EINVAL;
735
736	if (!hw_argc)
737		return 0;
738
739	if (hw_argc > as->argc) {
740		ti->error = "not enough arguments for hardware handler";
741		return -EINVAL;
742	}
743
744	m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
745	request_module("scsi_dh_%s", m->hw_handler_name);
746	if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
747		ti->error = "unknown hardware handler type";
748		ret = -EINVAL;
749		goto fail;
750	}
751
752	if (hw_argc > 1) {
753		char *p;
754		int i, j, len = 4;
755
756		for (i = 0; i <= hw_argc - 2; i++)
757			len += strlen(as->argv[i]) + 1;
758		p = m->hw_handler_params = kzalloc(len, GFP_KERNEL);
759		if (!p) {
760			ti->error = "memory allocation failed";
761			ret = -ENOMEM;
762			goto fail;
763		}
764		j = sprintf(p, "%d", hw_argc - 1);
765		for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
766			j = sprintf(p, "%s", as->argv[i]);
767	}
768	consume(as, hw_argc - 1);
769
770	return 0;
771fail:
772	kfree(m->hw_handler_name);
773	m->hw_handler_name = NULL;
774	return ret;
775}
776
777static int parse_features(struct arg_set *as, struct multipath *m)
778{
779	int r;
780	unsigned argc;
781	struct dm_target *ti = m->ti;
782	const char *param_name;
783
784	static struct param _params[] = {
785		{0, 3, "invalid number of feature args"},
786		{1, 50, "pg_init_retries must be between 1 and 50"},
787	};
788
789	r = read_param(_params, shift(as), &argc, &ti->error);
790	if (r)
791		return -EINVAL;
792
793	if (!argc)
794		return 0;
795
796	do {
797		param_name = shift(as);
798		argc--;
799
800		if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
801			r = queue_if_no_path(m, 1, 0);
802			continue;
803		}
804
805		if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
806		    (argc >= 1)) {
807			r = read_param(_params + 1, shift(as),
808				       &m->pg_init_retries, &ti->error);
809			argc--;
810			continue;
811		}
812
813		ti->error = "Unrecognised multipath feature request";
814		r = -EINVAL;
815	} while (argc && !r);
816
817	return r;
818}
819
820static int multipath_ctr(struct dm_target *ti, unsigned int argc,
821			 char **argv)
822{
823	/* target parameters */
824	static struct param _params[] = {
825		{1, 1024, "invalid number of priority groups"},
826		{1, 1024, "invalid initial priority group number"},
827	};
828
829	int r;
830	struct multipath *m;
831	struct arg_set as;
832	unsigned pg_count = 0;
833	unsigned next_pg_num;
834
835	as.argc = argc;
836	as.argv = argv;
837
838	m = alloc_multipath(ti);
839	if (!m) {
840		ti->error = "can't allocate multipath";
841		return -EINVAL;
842	}
843
844	r = parse_features(&as, m);
845	if (r)
846		goto bad;
847
848	r = parse_hw_handler(&as, m);
849	if (r)
850		goto bad;
851
852	r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
853	if (r)
854		goto bad;
855
856	r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
857	if (r)
858		goto bad;
859
860	/* parse the priority groups */
861	while (as.argc) {
862		struct priority_group *pg;
863
864		pg = parse_priority_group(&as, m);
865		if (IS_ERR(pg)) {
866			r = PTR_ERR(pg);
867			goto bad;
868		}
869
870		m->nr_valid_paths += pg->nr_pgpaths;
871		list_add_tail(&pg->list, &m->priority_groups);
872		pg_count++;
873		pg->pg_num = pg_count;
874		if (!--next_pg_num)
875			m->next_pg = pg;
876	}
877
878	if (pg_count != m->nr_priority_groups) {
879		ti->error = "priority group count mismatch";
880		r = -EINVAL;
881		goto bad;
882	}
883
884	ti->num_flush_requests = 1;
885	ti->num_discard_requests = 1;
886
887	return 0;
888
889 bad:
890	free_multipath(m);
891	return r;
892}
893
894static void multipath_wait_for_pg_init_completion(struct multipath *m)
895{
896	DECLARE_WAITQUEUE(wait, current);
897	unsigned long flags;
898
899	add_wait_queue(&m->pg_init_wait, &wait);
900
901	while (1) {
902		set_current_state(TASK_UNINTERRUPTIBLE);
903
904		spin_lock_irqsave(&m->lock, flags);
905		if (!m->pg_init_in_progress) {
906			spin_unlock_irqrestore(&m->lock, flags);
907			break;
908		}
909		spin_unlock_irqrestore(&m->lock, flags);
910
911		io_schedule();
912	}
913	set_current_state(TASK_RUNNING);
914
915	remove_wait_queue(&m->pg_init_wait, &wait);
916}
917
918static void flush_multipath_work(struct multipath *m)
919{
920	flush_workqueue(kmpath_handlerd);
921	multipath_wait_for_pg_init_completion(m);
922	flush_workqueue(kmultipathd);
923	flush_scheduled_work();
924}
925
926static void multipath_dtr(struct dm_target *ti)
927{
928	struct multipath *m = ti->private;
929
930	flush_multipath_work(m);
931	free_multipath(m);
932}
933
934/*
935 * Map cloned requests
936 */
937static int multipath_map(struct dm_target *ti, struct request *clone,
938			 union map_info *map_context)
939{
940	int r;
941	struct dm_mpath_io *mpio;
942	struct multipath *m = (struct multipath *) ti->private;
943
944	mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
945	if (!mpio)
946		/* ENOMEM, requeue */
947		return DM_MAPIO_REQUEUE;
948	memset(mpio, 0, sizeof(*mpio));
949
950	map_context->ptr = mpio;
951	clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
952	r = map_io(m, clone, mpio, 0);
953	if (r < 0 || r == DM_MAPIO_REQUEUE)
954		mempool_free(mpio, m->mpio_pool);
955
956	return r;
957}
958
959/*
960 * Take a path out of use.
961 */
962static int fail_path(struct pgpath *pgpath)
963{
964	unsigned long flags;
965	struct multipath *m = pgpath->pg->m;
966
967	spin_lock_irqsave(&m->lock, flags);
968
969	if (!pgpath->is_active)
970		goto out;
971
972	DMWARN("Failing path %s.", pgpath->path.dev->name);
973
974	pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
975	pgpath->is_active = 0;
976	pgpath->fail_count++;
977
978	m->nr_valid_paths--;
979
980	if (pgpath == m->current_pgpath)
981		m->current_pgpath = NULL;
982
983	dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
984		      pgpath->path.dev->name, m->nr_valid_paths);
985
986	schedule_work(&m->trigger_event);
987
988out:
989	spin_unlock_irqrestore(&m->lock, flags);
990
991	return 0;
992}
993
994/*
995 * Reinstate a previously-failed path
996 */
997static int reinstate_path(struct pgpath *pgpath)
998{
999	int r = 0;
1000	unsigned long flags;
1001	struct multipath *m = pgpath->pg->m;
1002
1003	spin_lock_irqsave(&m->lock, flags);
1004
1005	if (pgpath->is_active)
1006		goto out;
1007
1008	if (!pgpath->pg->ps.type->reinstate_path) {
1009		DMWARN("Reinstate path not supported by path selector %s",
1010		       pgpath->pg->ps.type->name);
1011		r = -EINVAL;
1012		goto out;
1013	}
1014
1015	r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
1016	if (r)
1017		goto out;
1018
1019	pgpath->is_active = 1;
1020
1021	if (!m->nr_valid_paths++ && m->queue_size) {
1022		m->current_pgpath = NULL;
1023		queue_work(kmultipathd, &m->process_queued_ios);
1024	} else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
1025		if (queue_work(kmpath_handlerd, &pgpath->activate_path))
1026			m->pg_init_in_progress++;
1027	}
1028
1029	dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
1030		      pgpath->path.dev->name, m->nr_valid_paths);
1031
1032	schedule_work(&m->trigger_event);
1033
1034out:
1035	spin_unlock_irqrestore(&m->lock, flags);
1036
1037	return r;
1038}
1039
1040/*
1041 * Fail or reinstate all paths that match the provided struct dm_dev.
1042 */
1043static int action_dev(struct multipath *m, struct dm_dev *dev,
1044		      action_fn action)
1045{
1046	int r = 0;
1047	struct pgpath *pgpath;
1048	struct priority_group *pg;
1049
1050	list_for_each_entry(pg, &m->priority_groups, list) {
1051		list_for_each_entry(pgpath, &pg->pgpaths, list) {
1052			if (pgpath->path.dev == dev)
1053				r = action(pgpath);
1054		}
1055	}
1056
1057	return r;
1058}
1059
1060/*
1061 * Temporarily try to avoid having to use the specified PG
1062 */
1063static void bypass_pg(struct multipath *m, struct priority_group *pg,
1064		      int bypassed)
1065{
1066	unsigned long flags;
1067
1068	spin_lock_irqsave(&m->lock, flags);
1069
1070	pg->bypassed = bypassed;
1071	m->current_pgpath = NULL;
1072	m->current_pg = NULL;
1073
1074	spin_unlock_irqrestore(&m->lock, flags);
1075
1076	schedule_work(&m->trigger_event);
1077}
1078
1079/*
1080 * Switch to using the specified PG from the next I/O that gets mapped
1081 */
1082static int switch_pg_num(struct multipath *m, const char *pgstr)
1083{
1084	struct priority_group *pg;
1085	unsigned pgnum;
1086	unsigned long flags;
1087
1088	if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1089	    (pgnum > m->nr_priority_groups)) {
1090		DMWARN("invalid PG number supplied to switch_pg_num");
1091		return -EINVAL;
1092	}
1093
1094	spin_lock_irqsave(&m->lock, flags);
1095	list_for_each_entry(pg, &m->priority_groups, list) {
1096		pg->bypassed = 0;
1097		if (--pgnum)
1098			continue;
1099
1100		m->current_pgpath = NULL;
1101		m->current_pg = NULL;
1102		m->next_pg = pg;
1103	}
1104	spin_unlock_irqrestore(&m->lock, flags);
1105
1106	schedule_work(&m->trigger_event);
1107	return 0;
1108}
1109
1110/*
1111 * Set/clear bypassed status of a PG.
1112 * PGs are numbered upwards from 1 in the order they were declared.
1113 */
1114static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
1115{
1116	struct priority_group *pg;
1117	unsigned pgnum;
1118
1119	if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1120	    (pgnum > m->nr_priority_groups)) {
1121		DMWARN("invalid PG number supplied to bypass_pg");
1122		return -EINVAL;
1123	}
1124
1125	list_for_each_entry(pg, &m->priority_groups, list) {
1126		if (!--pgnum)
1127			break;
1128	}
1129
1130	bypass_pg(m, pg, bypassed);
1131	return 0;
1132}
1133
1134/*
1135 * Should we retry pg_init immediately?
1136 */
1137static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
1138{
1139	unsigned long flags;
1140	int limit_reached = 0;
1141
1142	spin_lock_irqsave(&m->lock, flags);
1143
1144	if (m->pg_init_count <= m->pg_init_retries)
1145		m->pg_init_required = 1;
1146	else
1147		limit_reached = 1;
1148
1149	spin_unlock_irqrestore(&m->lock, flags);
1150
1151	return limit_reached;
1152}
1153
1154static void pg_init_done(void *data, int errors)
1155{
1156	struct pgpath *pgpath = data;
1157	struct priority_group *pg = pgpath->pg;
1158	struct multipath *m = pg->m;
1159	unsigned long flags;
1160
1161	/* device or driver problems */
1162	switch (errors) {
1163	case SCSI_DH_OK:
1164		break;
1165	case SCSI_DH_NOSYS:
1166		if (!m->hw_handler_name) {
1167			errors = 0;
1168			break;
1169		}
1170		DMERR("Could not failover the device: Handler scsi_dh_%s "
1171		      "Error %d.", m->hw_handler_name, errors);
1172		/*
1173		 * Fail path for now, so we do not ping pong
1174		 */
1175		fail_path(pgpath);
1176		break;
1177	case SCSI_DH_DEV_TEMP_BUSY:
1178		/*
1179		 * Probably doing something like FW upgrade on the
1180		 * controller so try the other pg.
1181		 */
1182		bypass_pg(m, pg, 1);
1183		break;
1184	/* TODO: For SCSI_DH_RETRY we should wait a couple seconds */
1185	case SCSI_DH_RETRY:
1186	case SCSI_DH_IMM_RETRY:
1187	case SCSI_DH_RES_TEMP_UNAVAIL:
1188		if (pg_init_limit_reached(m, pgpath))
1189			fail_path(pgpath);
1190		errors = 0;
1191		break;
1192	default:
1193		/*
1194		 * We probably do not want to fail the path for a device
1195		 * error, but this is what the old dm did. In future
1196		 * patches we can do more advanced handling.
1197		 */
1198		fail_path(pgpath);
1199	}
1200
1201	spin_lock_irqsave(&m->lock, flags);
1202	if (errors) {
1203		if (pgpath == m->current_pgpath) {
1204			DMERR("Could not failover device. Error %d.", errors);
1205			m->current_pgpath = NULL;
1206			m->current_pg = NULL;
1207		}
1208	} else if (!m->pg_init_required)
1209		pg->bypassed = 0;
1210
1211	if (--m->pg_init_in_progress)
1212		/* Activations of other paths are still on going */
1213		goto out;
1214
1215	if (!m->pg_init_required)
1216		m->queue_io = 0;
1217
1218	queue_work(kmultipathd, &m->process_queued_ios);
1219
1220	/*
1221	 * Wake up any thread waiting to suspend.
1222	 */
1223	wake_up(&m->pg_init_wait);
1224
1225out:
1226	spin_unlock_irqrestore(&m->lock, flags);
1227}
1228
1229static void activate_path(struct work_struct *work)
1230{
1231	struct pgpath *pgpath =
1232		container_of(work, struct pgpath, activate_path);
1233
1234	scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev),
1235				pg_init_done, pgpath);
1236}
1237
1238/*
1239 * end_io handling
1240 */
1241static int do_end_io(struct multipath *m, struct request *clone,
1242		     int error, struct dm_mpath_io *mpio)
1243{
1244	/*
1245	 * We don't queue any clone request inside the multipath target
1246	 * during end I/O handling, since those clone requests don't have
1247	 * bio clones.  If we queue them inside the multipath target,
1248	 * we need to make bio clones, that requires memory allocation.
1249	 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
1250	 *  don't have bio clones.)
1251	 * Instead of queueing the clone request here, we queue the original
1252	 * request into dm core, which will remake a clone request and
1253	 * clone bios for it and resubmit it later.
1254	 */
1255	int r = DM_ENDIO_REQUEUE;
1256	unsigned long flags;
1257
1258	if (!error && !clone->errors)
1259		return 0;	/* I/O complete */
1260
1261	if (error == -EOPNOTSUPP)
1262		return error;
1263
1264	if (clone->cmd_flags & REQ_DISCARD)
1265		return error;
1266
1267	if (mpio->pgpath)
1268		fail_path(mpio->pgpath);
1269
1270	spin_lock_irqsave(&m->lock, flags);
1271	if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
1272		r = -EIO;
1273	spin_unlock_irqrestore(&m->lock, flags);
1274
1275	return r;
1276}
1277
1278static int multipath_end_io(struct dm_target *ti, struct request *clone,
1279			    int error, union map_info *map_context)
1280{
1281	struct multipath *m = ti->private;
1282	struct dm_mpath_io *mpio = map_context->ptr;
1283	struct pgpath *pgpath = mpio->pgpath;
1284	struct path_selector *ps;
1285	int r;
1286
1287	r  = do_end_io(m, clone, error, mpio);
1288	if (pgpath) {
1289		ps = &pgpath->pg->ps;
1290		if (ps->type->end_io)
1291			ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1292	}
1293	mempool_free(mpio, m->mpio_pool);
1294
1295	return r;
1296}
1297
1298/*
1299 * Suspend can't complete until all the I/O is processed so if
1300 * the last path fails we must error any remaining I/O.
1301 * Note that if the freeze_bdev fails while suspending, the
1302 * queue_if_no_path state is lost - userspace should reset it.
1303 */
1304static void multipath_presuspend(struct dm_target *ti)
1305{
1306	struct multipath *m = (struct multipath *) ti->private;
1307
1308	queue_if_no_path(m, 0, 1);
1309}
1310
1311static void multipath_postsuspend(struct dm_target *ti)
1312{
1313	struct multipath *m = ti->private;
1314
1315	mutex_lock(&m->work_mutex);
1316	flush_multipath_work(m);
1317	mutex_unlock(&m->work_mutex);
1318}
1319
1320/*
1321 * Restore the queue_if_no_path setting.
1322 */
1323static void multipath_resume(struct dm_target *ti)
1324{
1325	struct multipath *m = (struct multipath *) ti->private;
1326	unsigned long flags;
1327
1328	spin_lock_irqsave(&m->lock, flags);
1329	m->queue_if_no_path = m->saved_queue_if_no_path;
1330	spin_unlock_irqrestore(&m->lock, flags);
1331}
1332
1333/*
1334 * Info output has the following format:
1335 * num_multipath_feature_args [multipath_feature_args]*
1336 * num_handler_status_args [handler_status_args]*
1337 * num_groups init_group_number
1338 *            [A|D|E num_ps_status_args [ps_status_args]*
1339 *             num_paths num_selector_args
1340 *             [path_dev A|F fail_count [selector_args]* ]+ ]+
1341 *
1342 * Table output has the following format (identical to the constructor string):
1343 * num_feature_args [features_args]*
1344 * num_handler_args hw_handler [hw_handler_args]*
1345 * num_groups init_group_number
1346 *     [priority selector-name num_ps_args [ps_args]*
1347 *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1348 */
1349static int multipath_status(struct dm_target *ti, status_type_t type,
1350			    char *result, unsigned int maxlen)
1351{
1352	int sz = 0;
1353	unsigned long flags;
1354	struct multipath *m = (struct multipath *) ti->private;
1355	struct priority_group *pg;
1356	struct pgpath *p;
1357	unsigned pg_num;
1358	char state;
1359
1360	spin_lock_irqsave(&m->lock, flags);
1361
1362	/* Features */
1363	if (type == STATUSTYPE_INFO)
1364		DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
1365	else {
1366		DMEMIT("%u ", m->queue_if_no_path +
1367			      (m->pg_init_retries > 0) * 2);
1368		if (m->queue_if_no_path)
1369			DMEMIT("queue_if_no_path ");
1370		if (m->pg_init_retries)
1371			DMEMIT("pg_init_retries %u ", m->pg_init_retries);
1372	}
1373
1374	if (!m->hw_handler_name || type == STATUSTYPE_INFO)
1375		DMEMIT("0 ");
1376	else
1377		DMEMIT("1 %s ", m->hw_handler_name);
1378
1379	DMEMIT("%u ", m->nr_priority_groups);
1380
1381	if (m->next_pg)
1382		pg_num = m->next_pg->pg_num;
1383	else if (m->current_pg)
1384		pg_num = m->current_pg->pg_num;
1385	else
1386			pg_num = 1;
1387
1388	DMEMIT("%u ", pg_num);
1389
1390	switch (type) {
1391	case STATUSTYPE_INFO:
1392		list_for_each_entry(pg, &m->priority_groups, list) {
1393			if (pg->bypassed)
1394				state = 'D';	/* Disabled */
1395			else if (pg == m->current_pg)
1396				state = 'A';	/* Currently Active */
1397			else
1398				state = 'E';	/* Enabled */
1399
1400			DMEMIT("%c ", state);
1401
1402			if (pg->ps.type->status)
1403				sz += pg->ps.type->status(&pg->ps, NULL, type,
1404							  result + sz,
1405							  maxlen - sz);
1406			else
1407				DMEMIT("0 ");
1408
1409			DMEMIT("%u %u ", pg->nr_pgpaths,
1410			       pg->ps.type->info_args);
1411
1412			list_for_each_entry(p, &pg->pgpaths, list) {
1413				DMEMIT("%s %s %u ", p->path.dev->name,
1414				       p->is_active ? "A" : "F",
1415				       p->fail_count);
1416				if (pg->ps.type->status)
1417					sz += pg->ps.type->status(&pg->ps,
1418					      &p->path, type, result + sz,
1419					      maxlen - sz);
1420			}
1421		}
1422		break;
1423
1424	case STATUSTYPE_TABLE:
1425		list_for_each_entry(pg, &m->priority_groups, list) {
1426			DMEMIT("%s ", pg->ps.type->name);
1427
1428			if (pg->ps.type->status)
1429				sz += pg->ps.type->status(&pg->ps, NULL, type,
1430							  result + sz,
1431							  maxlen - sz);
1432			else
1433				DMEMIT("0 ");
1434
1435			DMEMIT("%u %u ", pg->nr_pgpaths,
1436			       pg->ps.type->table_args);
1437
1438			list_for_each_entry(p, &pg->pgpaths, list) {
1439				DMEMIT("%s ", p->path.dev->name);
1440				if (pg->ps.type->status)
1441					sz += pg->ps.type->status(&pg->ps,
1442					      &p->path, type, result + sz,
1443					      maxlen - sz);
1444			}
1445		}
1446		break;
1447	}
1448
1449	spin_unlock_irqrestore(&m->lock, flags);
1450
1451	return 0;
1452}
1453
1454static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1455{
1456	int r = -EINVAL;
1457	struct dm_dev *dev;
1458	struct multipath *m = (struct multipath *) ti->private;
1459	action_fn action;
1460
1461	mutex_lock(&m->work_mutex);
1462
1463	if (dm_suspended(ti)) {
1464		r = -EBUSY;
1465		goto out;
1466	}
1467
1468	if (argc == 1) {
1469		if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) {
1470			r = queue_if_no_path(m, 1, 0);
1471			goto out;
1472		} else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) {
1473			r = queue_if_no_path(m, 0, 0);
1474			goto out;
1475		}
1476	}
1477
1478	if (argc != 2) {
1479		DMWARN("Unrecognised multipath message received.");
1480		goto out;
1481	}
1482
1483	if (!strnicmp(argv[0], MESG_STR("disable_group"))) {
1484		r = bypass_pg_num(m, argv[1], 1);
1485		goto out;
1486	} else if (!strnicmp(argv[0], MESG_STR("enable_group"))) {
1487		r = bypass_pg_num(m, argv[1], 0);
1488		goto out;
1489	} else if (!strnicmp(argv[0], MESG_STR("switch_group"))) {
1490		r = switch_pg_num(m, argv[1]);
1491		goto out;
1492	} else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
1493		action = reinstate_path;
1494	else if (!strnicmp(argv[0], MESG_STR("fail_path")))
1495		action = fail_path;
1496	else {
1497		DMWARN("Unrecognised multipath message received.");
1498		goto out;
1499	}
1500
1501	r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
1502	if (r) {
1503		DMWARN("message: error getting device %s",
1504		       argv[1]);
1505		goto out;
1506	}
1507
1508	r = action_dev(m, dev, action);
1509
1510	dm_put_device(ti, dev);
1511
1512out:
1513	mutex_unlock(&m->work_mutex);
1514	return r;
1515}
1516
1517static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
1518			   unsigned long arg)
1519{
1520	struct multipath *m = (struct multipath *) ti->private;
1521	struct block_device *bdev = NULL;
1522	fmode_t mode = 0;
1523	unsigned long flags;
1524	int r = 0;
1525
1526	spin_lock_irqsave(&m->lock, flags);
1527
1528	if (!m->current_pgpath)
1529		__choose_pgpath(m, 0);
1530
1531	if (m->current_pgpath) {
1532		bdev = m->current_pgpath->path.dev->bdev;
1533		mode = m->current_pgpath->path.dev->mode;
1534	}
1535
1536	if (m->queue_io)
1537		r = -EAGAIN;
1538	else if (!bdev)
1539		r = -EIO;
1540
1541	spin_unlock_irqrestore(&m->lock, flags);
1542
1543	return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
1544}
1545
1546static int multipath_iterate_devices(struct dm_target *ti,
1547				     iterate_devices_callout_fn fn, void *data)
1548{
1549	struct multipath *m = ti->private;
1550	struct priority_group *pg;
1551	struct pgpath *p;
1552	int ret = 0;
1553
1554	list_for_each_entry(pg, &m->priority_groups, list) {
1555		list_for_each_entry(p, &pg->pgpaths, list) {
1556			ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
1557			if (ret)
1558				goto out;
1559		}
1560	}
1561
1562out:
1563	return ret;
1564}
1565
1566static int __pgpath_busy(struct pgpath *pgpath)
1567{
1568	struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1569
1570	return dm_underlying_device_busy(q);
1571}
1572
1573/*
1574 * We return "busy", only when we can map I/Os but underlying devices
1575 * are busy (so even if we map I/Os now, the I/Os will wait on
1576 * the underlying queue).
1577 * In other words, if we want to kill I/Os or queue them inside us
1578 * due to map unavailability, we don't return "busy".  Otherwise,
1579 * dm core won't give us the I/Os and we can't do what we want.
1580 */
1581static int multipath_busy(struct dm_target *ti)
1582{
1583	int busy = 0, has_active = 0;
1584	struct multipath *m = ti->private;
1585	struct priority_group *pg;
1586	struct pgpath *pgpath;
1587	unsigned long flags;
1588
1589	spin_lock_irqsave(&m->lock, flags);
1590
1591	/* Guess which priority_group will be used at next mapping time */
1592	if (unlikely(!m->current_pgpath && m->next_pg))
1593		pg = m->next_pg;
1594	else if (likely(m->current_pg))
1595		pg = m->current_pg;
1596	else
1597		/*
1598		 * We don't know which pg will be used at next mapping time.
1599		 * We don't call __choose_pgpath() here to avoid to trigger
1600		 * pg_init just by busy checking.
1601		 * So we don't know whether underlying devices we will be using
1602		 * at next mapping time are busy or not. Just try mapping.
1603		 */
1604		goto out;
1605
1606	/*
1607	 * If there is one non-busy active path at least, the path selector
1608	 * will be able to select it. So we consider such a pg as not busy.
1609	 */
1610	busy = 1;
1611	list_for_each_entry(pgpath, &pg->pgpaths, list)
1612		if (pgpath->is_active) {
1613			has_active = 1;
1614
1615			if (!__pgpath_busy(pgpath)) {
1616				busy = 0;
1617				break;
1618			}
1619		}
1620
1621	if (!has_active)
1622		/*
1623		 * No active path in this pg, so this pg won't be used and
1624		 * the current_pg will be changed at next mapping time.
1625		 * We need to try mapping to determine it.
1626		 */
1627		busy = 0;
1628
1629out:
1630	spin_unlock_irqrestore(&m->lock, flags);
1631
1632	return busy;
1633}
1634
1635/*-----------------------------------------------------------------
1636 * Module setup
1637 *---------------------------------------------------------------*/
1638static struct target_type multipath_target = {
1639	.name = "multipath",
1640	.version = {1, 1, 1},
1641	.module = THIS_MODULE,
1642	.ctr = multipath_ctr,
1643	.dtr = multipath_dtr,
1644	.map_rq = multipath_map,
1645	.rq_end_io = multipath_end_io,
1646	.presuspend = multipath_presuspend,
1647	.postsuspend = multipath_postsuspend,
1648	.resume = multipath_resume,
1649	.status = multipath_status,
1650	.message = multipath_message,
1651	.ioctl  = multipath_ioctl,
1652	.iterate_devices = multipath_iterate_devices,
1653	.busy = multipath_busy,
1654};
1655
1656static int __init dm_multipath_init(void)
1657{
1658	int r;
1659
1660	/* allocate a slab for the dm_ios */
1661	_mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
1662	if (!_mpio_cache)
1663		return -ENOMEM;
1664
1665	r = dm_register_target(&multipath_target);
1666	if (r < 0) {
1667		DMERR("register failed %d", r);
1668		kmem_cache_destroy(_mpio_cache);
1669		return -EINVAL;
1670	}
1671
1672	kmultipathd = create_workqueue("kmpathd");
1673	if (!kmultipathd) {
1674		DMERR("failed to create workqueue kmpathd");
1675		dm_unregister_target(&multipath_target);
1676		kmem_cache_destroy(_mpio_cache);
1677		return -ENOMEM;
1678	}
1679
1680	/*
1681	 * A separate workqueue is used to handle the device handlers
1682	 * to avoid overloading existing workqueue. Overloading the
1683	 * old workqueue would also create a bottleneck in the
1684	 * path of the storage hardware device activation.
1685	 */
1686	kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd");
1687	if (!kmpath_handlerd) {
1688		DMERR("failed to create workqueue kmpath_handlerd");
1689		destroy_workqueue(kmultipathd);
1690		dm_unregister_target(&multipath_target);
1691		kmem_cache_destroy(_mpio_cache);
1692		return -ENOMEM;
1693	}
1694
1695	DMINFO("version %u.%u.%u loaded",
1696	       multipath_target.version[0], multipath_target.version[1],
1697	       multipath_target.version[2]);
1698
1699	return r;
1700}
1701
1702static void __exit dm_multipath_exit(void)
1703{
1704	destroy_workqueue(kmpath_handlerd);
1705	destroy_workqueue(kmultipathd);
1706
1707	dm_unregister_target(&multipath_target);
1708	kmem_cache_destroy(_mpio_cache);
1709}
1710
1711module_init(dm_multipath_init);
1712module_exit(dm_multipath_exit);
1713
1714MODULE_DESCRIPTION(DM_NAME " multipath target");
1715MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
1716MODULE_LICENSE("GPL");
1717