• Home
  • History
  • Annotate
  • only in this directory
1/*
2   md_k.h : kernel internal structure of the Linux MD driver
3          Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   You should have received a copy of the GNU General Public License
11   (for example /usr/src/linux/COPYING); if not, write to the Free
12   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
13*/
14
15#ifndef _MD_K_H
16#define _MD_K_H
17
18#define MD_RESERVED       0UL
19#define LINEAR            1UL
20#define RAID0             2UL
21#define RAID1             3UL
22#define RAID5             4UL
23#define TRANSLUCENT       5UL
24#define HSM               6UL
25#define MULTIPATH         7UL
26#define MAX_PERSONALITY   8UL
27
28static inline int pers_to_level (int pers)
29{
30	switch (pers) {
31		case MULTIPATH:		return -4;
32		case HSM:		return -3;
33		case TRANSLUCENT:	return -2;
34		case LINEAR:		return -1;
35		case RAID0:		return 0;
36		case RAID1:		return 1;
37		case RAID5:		return 5;
38	}
39	BUG();
40	return MD_RESERVED;
41}
42
43static inline int level_to_pers (int level)
44{
45	switch (level) {
46		case -4: return MULTIPATH;
47		case -3: return HSM;
48		case -2: return TRANSLUCENT;
49		case -1: return LINEAR;
50		case 0: return RAID0;
51		case 1: return RAID1;
52		case 4:
53		case 5: return RAID5;
54	}
55	return MD_RESERVED;
56}
57
58typedef struct mddev_s mddev_t;
59typedef struct mdk_rdev_s mdk_rdev_t;
60
61#if MINORBITS != 8
62#error MD does not handle bigger kdev yet
63#endif
64
65#define MAX_MD_DEVS  (1<<MINORBITS)	/* Max number of md dev */
66
67/*
68 * Maps a kdev to an mddev/subdev. How 'data' is handled is up to
69 * the personality. (eg. HSM uses this to identify individual LVs)
70 */
71typedef struct dev_mapping_s {
72	mddev_t *mddev;
73	void *data;
74} dev_mapping_t;
75
76extern dev_mapping_t mddev_map [MAX_MD_DEVS];
77
78static inline mddev_t * kdev_to_mddev (kdev_t dev)
79{
80	if (MAJOR(dev) != MD_MAJOR)
81		BUG();
82        return mddev_map[MINOR(dev)].mddev;
83}
84
85/*
86 * options passed in raidrun:
87 */
88
89#define MAX_CHUNK_SIZE (4096*1024)
90
91/*
92 * default readahead
93 */
94#define MD_READAHEAD	vm_max_readahead
95
96static inline int disk_faulty(mdp_disk_t * d)
97{
98	return d->state & (1 << MD_DISK_FAULTY);
99}
100
101static inline int disk_active(mdp_disk_t * d)
102{
103	return d->state & (1 << MD_DISK_ACTIVE);
104}
105
106static inline int disk_sync(mdp_disk_t * d)
107{
108	return d->state & (1 << MD_DISK_SYNC);
109}
110
111static inline int disk_spare(mdp_disk_t * d)
112{
113	return !disk_sync(d) && !disk_active(d) && !disk_faulty(d);
114}
115
116static inline int disk_removed(mdp_disk_t * d)
117{
118	return d->state & (1 << MD_DISK_REMOVED);
119}
120
121static inline void mark_disk_faulty(mdp_disk_t * d)
122{
123	d->state |= (1 << MD_DISK_FAULTY);
124}
125
126static inline void mark_disk_active(mdp_disk_t * d)
127{
128	d->state |= (1 << MD_DISK_ACTIVE);
129}
130
131static inline void mark_disk_sync(mdp_disk_t * d)
132{
133	d->state |= (1 << MD_DISK_SYNC);
134}
135
136static inline void mark_disk_spare(mdp_disk_t * d)
137{
138	d->state = 0;
139}
140
141static inline void mark_disk_removed(mdp_disk_t * d)
142{
143	d->state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED);
144}
145
146static inline void mark_disk_inactive(mdp_disk_t * d)
147{
148	d->state &= ~(1 << MD_DISK_ACTIVE);
149}
150
151static inline void mark_disk_nonsync(mdp_disk_t * d)
152{
153	d->state &= ~(1 << MD_DISK_SYNC);
154}
155
156/*
157 * MD's 'extended' device
158 */
159struct mdk_rdev_s
160{
161	struct md_list_head same_set;	/* RAID devices within the same set */
162	struct md_list_head all;	/* all RAID devices */
163	struct md_list_head pending;	/* undetected RAID devices */
164
165	kdev_t dev;			/* Device number */
166	kdev_t old_dev;			/*  "" when it was last imported */
167	unsigned long size;		/* Device size (in blocks) */
168	mddev_t *mddev;			/* RAID array if running */
169	unsigned long last_events;	/* IO event timestamp */
170
171	struct block_device *bdev;	/* block device handle */
172
173	mdp_super_t *sb;
174	unsigned long sb_offset;
175
176	int alias_device;		/* device alias to the same disk */
177	int faulty;			/* if faulty do not issue IO requests */
178	int desc_nr;			/* descriptor index in the superblock */
179};
180
181
182/*
183 * disk operations in a working array:
184 */
185#define DISKOP_SPARE_INACTIVE	0
186#define DISKOP_SPARE_WRITE	1
187#define DISKOP_SPARE_ACTIVE	2
188#define DISKOP_HOT_REMOVE_DISK	3
189#define DISKOP_HOT_ADD_DISK	4
190
191typedef struct mdk_personality_s mdk_personality_t;
192
193struct mddev_s
194{
195	void				*private;
196	mdk_personality_t		*pers;
197	int				__minor;
198	mdp_super_t			*sb;
199	int				nb_dev;
200	struct md_list_head 		disks;
201	int				sb_dirty;
202	mdu_param_t			param;
203	int				ro;
204	unsigned long			curr_resync;	/* blocks scheduled */
205	unsigned long			resync_mark;	/* a recent timestamp */
206	unsigned long			resync_mark_cnt;/* blocks written at resync_mark */
207	char				*name;
208	int				recovery_running;
209	struct semaphore		reconfig_sem;
210	struct semaphore		recovery_sem;
211	struct semaphore		resync_sem;
212	atomic_t			active;
213
214	atomic_t			recovery_active; /* blocks scheduled, but not written */
215	md_wait_queue_head_t		recovery_wait;
216
217	struct md_list_head		all_mddevs;
218};
219
220struct mdk_personality_s
221{
222	char *name;
223	int (*make_request)(mddev_t *mddev, int rw, struct buffer_head * bh);
224	int (*run)(mddev_t *mddev);
225	int (*stop)(mddev_t *mddev);
226	int (*status)(char *page, mddev_t *mddev);
227	int (*error_handler)(mddev_t *mddev, kdev_t dev);
228
229/*
230 * Some personalities (RAID-1, RAID-5) can have disks hot-added and
231 * hot-removed. Hot removal is different from failure. (failure marks
232 * a disk inactive, but the disk is still part of the array) The interface
233 * to such operations is the 'pers->diskop()' function, can be NULL.
234 *
235 * the diskop function can change the pointer pointing to the incoming
236 * descriptor, but must do so very carefully. (currently only
237 * SPARE_ACTIVE expects such a change)
238 */
239	int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state);
240
241	int (*stop_resync)(mddev_t *mddev);
242	int (*restart_resync)(mddev_t *mddev);
243	int (*sync_request)(mddev_t *mddev, unsigned long block_nr);
244};
245
246
247/*
248 * Currently we index md_array directly, based on the minor
249 * number. This will have to change to dynamic allocation
250 * once we start supporting partitioning of md devices.
251 */
252static inline int mdidx (mddev_t * mddev)
253{
254	return mddev->__minor;
255}
256
257static inline kdev_t mddev_to_kdev(mddev_t * mddev)
258{
259	return MKDEV(MD_MAJOR, mdidx(mddev));
260}
261
262extern mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev);
263extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr);
264extern mdp_disk_t *get_spare(mddev_t *mddev);
265
266/*
267 * iterates through some rdev ringlist. It's safe to remove the
268 * current 'rdev'. Dont touch 'tmp' though.
269 */
270#define ITERATE_RDEV_GENERIC(head,field,rdev,tmp)			\
271									\
272	for (tmp = head.next;						\
273		rdev = md_list_entry(tmp, mdk_rdev_t, field),		\
274			tmp = tmp->next, tmp->prev != &head		\
275		; )
276/*
277 * iterates through the 'same array disks' ringlist
278 */
279#define ITERATE_RDEV(mddev,rdev,tmp)					\
280	ITERATE_RDEV_GENERIC((mddev)->disks,same_set,rdev,tmp)
281
282/*
283 * Same as above, but assumes that the device has rdev->desc_nr numbered
284 * from 0 to mddev->nb_dev, and iterates through rdevs in ascending order.
285 */
286#define ITERATE_RDEV_ORDERED(mddev,rdev,i)				\
287	for (i = 0; rdev = find_rdev_nr(mddev, i), i < mddev->nb_dev; i++)
288
289
290/*
291 * Iterates through all 'RAID managed disks'
292 */
293#define ITERATE_RDEV_ALL(rdev,tmp)					\
294	ITERATE_RDEV_GENERIC(all_raid_disks,all,rdev,tmp)
295
296/*
297 * Iterates through 'pending RAID disks'
298 */
299#define ITERATE_RDEV_PENDING(rdev,tmp)					\
300	ITERATE_RDEV_GENERIC(pending_raid_disks,pending,rdev,tmp)
301
302/*
303 * iterates through all used mddevs in the system.
304 */
305#define ITERATE_MDDEV(mddev,tmp)					\
306									\
307	for (tmp = all_mddevs.next;					\
308		mddev = md_list_entry(tmp, mddev_t, all_mddevs),	\
309			tmp = tmp->next, tmp->prev != &all_mddevs	\
310		; )
311
312static inline int lock_mddev (mddev_t * mddev)
313{
314	return down_interruptible(&mddev->reconfig_sem);
315}
316
317static inline void unlock_mddev (mddev_t * mddev)
318{
319	up(&mddev->reconfig_sem);
320}
321
322#define xchg_values(x,y) do { __typeof__(x) __tmp = x; \
323				x = y; y = __tmp; } while (0)
324
325typedef struct mdk_thread_s {
326	void			(*run) (void *data);
327	void			*data;
328	md_wait_queue_head_t	wqueue;
329	unsigned long           flags;
330	struct completion	*event;
331	struct task_struct	*tsk;
332	const char		*name;
333} mdk_thread_t;
334
335#define THREAD_WAKEUP  0
336
337#define MAX_DISKNAME_LEN 64
338
339typedef struct dev_name_s {
340	struct md_list_head list;
341	kdev_t dev;
342	char namebuf [MAX_DISKNAME_LEN];
343	char *name;
344} dev_name_t;
345
346
347#define __wait_event_lock_irq(wq, condition, lock) 			\
348do {									\
349	wait_queue_t __wait;						\
350	init_waitqueue_entry(&__wait, current);				\
351									\
352	add_wait_queue(&wq, &__wait);					\
353	for (;;) {							\
354		set_current_state(TASK_UNINTERRUPTIBLE);		\
355		if (condition)						\
356			break;						\
357		spin_unlock_irq(&lock);					\
358		run_task_queue(&tq_disk);				\
359		schedule();						\
360		spin_lock_irq(&lock);					\
361	}								\
362	current->state = TASK_RUNNING;					\
363	remove_wait_queue(&wq, &__wait);				\
364} while (0)
365
366#define wait_event_lock_irq(wq, condition, lock) 			\
367do {									\
368	if (condition)	 						\
369		break;							\
370	__wait_event_lock_irq(wq, condition, lock);			\
371} while (0)
372
373
374#define __wait_disk_event(wq, condition) 				\
375do {									\
376	wait_queue_t __wait;						\
377	init_waitqueue_entry(&__wait, current);				\
378									\
379	add_wait_queue(&wq, &__wait);					\
380	for (;;) {							\
381		set_current_state(TASK_UNINTERRUPTIBLE);		\
382		if (condition)						\
383			break;						\
384		run_task_queue(&tq_disk);				\
385		schedule();						\
386	}								\
387	current->state = TASK_RUNNING;					\
388	remove_wait_queue(&wq, &__wait);				\
389} while (0)
390
391#define wait_disk_event(wq, condition) 					\
392do {									\
393	if (condition)	 						\
394		break;							\
395	__wait_disk_event(wq, condition);				\
396} while (0)
397
398#endif
399
400