1/*
2 * kernel/lvm.c
3 *
4 * Copyright (C) 1997 - 2002  Heinz Mauelshagen, Sistina Software
5 *
6 * February-November 1997
7 * April-May,July-August,November 1998
8 * January-March,May,July,September,October 1999
9 * January,February,July,September-November 2000
10 * January-May,June,October 2001
11 * May-July 2002
12 *
13 *
14 * LVM driver is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2, or (at your option)
17 * any later version.
18 *
19 * LVM driver is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with GNU CC; see the file COPYING.  If not, write to
26 * the Free Software Foundation, 59 Temple Place - Suite 330,
27 * Boston, MA 02111-1307, USA.
28 *
29 */
30
31/*
32 * Changelog
33 *
34 *    09/11/1997 - added chr ioctls VG_STATUS_GET_COUNT
35 *                 and VG_STATUS_GET_NAMELIST
36 *    18/01/1998 - change lvm_chr_open/close lock handling
37 *    30/04/1998 - changed LV_STATUS ioctl to LV_STATUS_BYNAME and
38 *               - added   LV_STATUS_BYINDEX ioctl
39 *               - used lvm_status_byname_req_t and
40 *                      lvm_status_byindex_req_t vars
41 *    04/05/1998 - added multiple device support
42 *    08/05/1998 - added support to set/clear extendable flag in volume group
43 *    09/05/1998 - changed output of lvm_proc_get_global_info() because of
44 *                 support for free (eg. longer) logical volume names
45 *    12/05/1998 - added spin_locks (thanks to Pascal van Dam
46 *                 <pascal@ramoth.xs4all.nl>)
47 *    25/05/1998 - fixed handling of locked PEs in lvm_map() and
48 *                 lvm_chr_ioctl()
49 *    26/05/1998 - reactivated verify_area by access_ok
50 *    07/06/1998 - used vmalloc/vfree instead of kmalloc/kfree to go
51 *                 beyond 128/256 KB max allocation limit per call
52 *               - #ifdef blocked spin_lock calls to avoid compile errors
53 *                 with 2.0.x
54 *    11/06/1998 - another enhancement to spinlock code in lvm_chr_open()
55 *                 and use of LVM_VERSION_CODE instead of my own macros
56 *                 (thanks to  Michael Marxmeier <mike@msede.com>)
57 *    07/07/1998 - added statistics in lvm_map()
58 *    08/07/1998 - saved statistics in lvm_do_lv_extend_reduce()
59 *    25/07/1998 - used __initfunc macro
60 *    02/08/1998 - changes for official char/block major numbers
61 *    07/08/1998 - avoided init_module() and cleanup_module() to be static
62 *    30/08/1998 - changed VG lv_open counter from sum of LV lv_open counters
63 *                 to sum of LVs open (no matter how often each is)
64 *    01/09/1998 - fixed lvm_gendisk.part[] index error
65 *    07/09/1998 - added copying of lv_current_pe-array
66 *                 in LV_STATUS_BYINDEX ioctl
67 *    17/11/1998 - added KERN_* levels to printk
68 *    13/01/1999 - fixed LV index bug in lvm_do_lv_create() which hit lvrename
69 *    07/02/1999 - fixed spinlock handling bug in case of LVM_RESET
70 *                 by moving spinlock code from lvm_chr_open()
71 *                 to lvm_chr_ioctl()
72 *               - added LVM_LOCK_LVM ioctl to lvm_chr_ioctl()
73 *               - allowed LVM_RESET and retrieval commands to go ahead;
74 *                 only other update ioctls are blocked now
75 *               - fixed pv->pe to NULL for pv_status
76 *               - using lv_req structure in lvm_chr_ioctl() now
77 *               - fixed NULL ptr reference bug in lvm_do_lv_extend_reduce()
78 *                 caused by uncontiguous PV array in lvm_chr_ioctl(VG_REDUCE)
79 *    09/02/1999 - changed BLKRASET and BLKRAGET in lvm_chr_ioctl() to
80 *                 handle lgoical volume private read ahead sector
81 *               - implemented LV read_ahead handling with lvm_blk_read()
82 *                 and lvm_blk_write()
83 *    10/02/1999 - implemented 2.[12].* support function lvm_hd_name()
84 *                 to be used in drivers/block/genhd.c by disk_name()
85 *    12/02/1999 - fixed index bug in lvm_blk_ioctl(), HDIO_GETGEO
86 *               - enhanced gendisk insert/remove handling
87 *    16/02/1999 - changed to dynamic block minor number allocation to
88 *                 have as much as 99 volume groups with 256 logical volumes
89 *                 as the grand total; this allows having 1 volume group with
90 *                 up to 256 logical volumes in it
91 *    21/02/1999 - added LV open count information to proc filesystem
92 *               - substituted redundant LVM_RESET code by calls
93 *                 to lvm_do_vg_remove()
94 *    22/02/1999 - used schedule_timeout() to be more responsive
95 *                 in case of lvm_do_vg_remove() with lots of logical volumes
96 *    19/03/1999 - fixed NULL pointer bug in module_init/lvm_init
97 *    17/05/1999 - used DECLARE_WAIT_QUEUE_HEAD macro (>2.3.0)
98 *               - enhanced lvm_hd_name support
99 *    03/07/1999 - avoided use of KERNEL_VERSION macro based ifdefs and
100 *                 memcpy_tofs/memcpy_fromfs macro redefinitions
101 *    06/07/1999 - corrected reads/writes statistic counter copy in case
102 *                 of striped logical volume
103 *    28/07/1999 - implemented snapshot logical volumes
104 *                 - lvm_chr_ioctl
105 *                   - LV_STATUS_BYINDEX
106 *                   - LV_STATUS_BYNAME
107 *                 - lvm_do_lv_create
108 *                 - lvm_do_lv_remove
109 *                 - lvm_map
110 *                 - new lvm_snapshot_remap_block
111 *                 - new lvm_snapshot_remap_new_block
112 *    08/10/1999 - implemented support for multiple snapshots per
113 *                 original logical volume
114 *    12/10/1999 - support for 2.3.19
115 *    11/11/1999 - support for 2.3.28
116 *    21/11/1999 - changed lvm_map() interface to buffer_head based
117 *    19/12/1999 - support for 2.3.33
118 *    01/01/2000 - changed locking concept in lvm_map(),
119 *                 lvm_do_vg_create() and lvm_do_lv_remove()
120 *    15/01/2000 - fixed PV_FLUSH bug in lvm_chr_ioctl()
121 *    24/01/2000 - ported to 2.3.40 including Alan Cox's pointer changes etc.
122 *    29/01/2000 - used kmalloc/kfree again for all small structures
123 *    20/01/2000 - cleaned up lvm_chr_ioctl by moving code
124 *                 to seperated functions
125 *               - avoided "/dev/" in proc filesystem output
126 *               - avoided inline strings functions lvm_strlen etc.
127 *    14/02/2000 - support for 2.3.43
128 *               - integrated Andrea Arcagneli's snapshot code
129 *    25/06/2000 - james (chip) , IKKHAYD! roffl
130 *    26/06/2000 - enhanced lv_extend_reduce for snapshot logical volume
131 *                 support
132 *    06/09/2000 - added devfs support
133 *    07/09/2000 - changed IOP version to 9
134 *               - started to add new char ioctl LV_STATUS_BYDEV_T to support
135 *                 getting an lv_t based on the dev_t of the Logical Volume
136 *    14/09/2000 - enhanced lvm_do_lv_create to upcall VFS functions
137 *                 to sync and lock, activate snapshot and unlock the FS
138 *                 (to support journaled filesystems)
139 *    18/09/2000 - hardsector size support
140 *    27/09/2000 - implemented lvm_do_lv_rename() and lvm_do_vg_rename()
141 *    30/10/2000 - added Andi Kleen's LV_BMAP ioctl to support LILO
142 *    01/11/2000 - added memory information on hash tables to
143 *                 lvm_proc_get_global_info()
144 *    02/11/2000 - implemented /proc/lvm/ hierarchy
145 *    22/11/2000 - changed lvm_do_create_proc_entry_of_pv () to work
146 *                 with devfs
147 *    26/11/2000 - corrected #ifdef locations for PROC_FS
148 *    28/11/2000 - fixed lvm_do_vg_extend() NULL pointer BUG
149 *               - fixed lvm_do_create_proc_entry_of_pv() buffer tampering BUG
150 *    08/01/2001 - Removed conditional compiles related to PROC_FS,
151 *                 procfs is always supported now. (JT)
152 *    12/01/2001 - avoided flushing logical volume in case of shrinking
153 *                 because of unecessary overhead in case of heavy updates
154 *    25/01/2001 - Allow RO open of an inactive LV so it can be reactivated.
155 *    31/01/2001 - removed blk_init_queue/blk_cleanup_queue queueing will be
156 *                 handled by the proper devices.
157 *               - If you try and BMAP a snapshot you now get an -EPERM
158 *    01/01/2001 - lvm_map() now calls buffer_IO_error on error for 2.4
159 *               - factored __remap_snapshot out of lvm_map
160 *    12/02/2001 - move devfs code to create VG before LVs
161 *    13/02/2001 - allow VG_CREATE on /dev/lvm
162 *    14/02/2001 - removed modversions.h
163 *               - tidied device defines for blk.h
164 *               - tidied debug statements
165 *               - bug: vg[] member not set back to NULL if activation fails
166 *               - more lvm_map tidying
167 *    15/02/2001 - register /dev/lvm with devfs correctly (major/minor
168 *                 were swapped)
169 *    19/02/2001 - preallocated buffer_heads for rawio when using
170 *                 snapshots [JT]
171 *    28/02/2001 - introduced the P_DEV macro and changed some internel
172 *                 functions to be static [AD]
173 *    28/02/2001 - factored lvm_get_snapshot_use_rate out of blk_ioctl [AD]
174 *               - fixed user address accessing bug in lvm_do_lv_create()
175 *                 where the check for an existing LV takes place right at
176 *                 the beginning
177 *    01/03/2001 - Add VG_CREATE_OLD for IOP 10 compatibility
178 *    02/03/2001 - Don't destroy usermode pointers in lv_t structures duing
179 *                 LV_STATUS_BYxxx
180 *                 and remove redundant lv_t variables from same.
181 *               - avoid compilation of lvm_dummy_device_request in case of
182 *                 Linux >= 2.3.0 to avoid a warning
183 *               - added lvm_name argument to printk in buffer allocation
184 *                 in order to avoid a warning
185 *    04/03/2001 - moved linux/version.h above first use of KERNEL_VERSION
186 *                 macros
187 *    05/03/2001 - restore copying pe_t array in lvm_do_lv_status_byname. For
188 *                 lvdisplay -v (PC)
189 *               - restore copying pe_t array in lvm_do_lv_status_byindex (HM)
190 *               - added copying pe_t array in lvm_do_lv_status_bydev (HM)
191 *               - enhanced lvm_do_lv_status_by{name,index,dev} to be capable
192 *                 to copy the lv_block_exception_t array to userspace (HM)
193 *    08/03/2001 - initialize new lv_ptr->lv_COW_table_iobuf for snapshots;
194 *                 removed obsolete lv_ptr->lv_COW_table_page initialization
195 *               - factored lvm_do_pv_flush out of lvm_chr_ioctl (HM)
196 *    09/03/2001 - Added _lock_open_count to ensure we only drop the lock
197 *                 when the locking process closes.
198 *    05/04/2001 - Defer writes to an extent that is being moved [JT]
199 *    05/04/2001 - use b_rdev and b_rsector rather than b_dev and b_blocknr in
200 *                 lvm_map() in order to make stacking devices more happy (HM)
201 *    11/04/2001 - cleaned up the pvmove queue code. I no longer retain the
202 *                 rw flag, instead WRITEA's are just dropped [JT]
203 *    30/04/2001 - added KERNEL_VERSION > 2.4.3 get_hardsect_size() rather
204 *                 than get_hardblocksize() call
205 *    03/05/2001 - Use copy_to/from_user to preserve pointers in
206 *                 lvm_do_status_by*
207 *    11/05/2001 - avoid accesses to inactive snapshot data in
208 *                 __update_hardsectsize() and lvm_do_lv_extend_reduce() (JW)
209 *    28/05/2001 - implemented missing BLKSSZGET ioctl
210 *    05/06/2001 - Move _pe_lock out of fast path for lvm_map when no PEs
211 *                 locked.  Make buffer queue flush not need locking.
212 *                 Fix lvm_user_bmap() to set b_rsector for new lvm_map(). [AED]
213 *    30/06/2001 - Speed up __update_hardsectsize() by checking if PVs have
214 *                 the same hardsectsize (very likely) before scanning all LEs
215 *                 in the LV each time.  [AED]
216 *    12/10/2001 - Use add/del_gendisk() routines in 2.4.10+
217 *    01/11/2001 - Backport read_ahead change from Linus kernel [AED]
218 *    24/05/2002 - fixed locking bug in lvm_do_le_remap() introduced with 1.0.4
219 *    13/06/2002 - use blk_ioctl() to support various standard block ioctls
220 *               - support HDIO_GETGEO_BIG ioctl
221 *    05/07/2002 - fixed OBO error on vg array access [benh@kernel.crashing.org]
222 *    22/07/2002 - streamlined blk_ioctl() call
223 *
224 */
225
226#include <linux/version.h>
227
228#define MAJOR_NR LVM_BLK_MAJOR
229#define DEVICE_OFF(device)
230#define LOCAL_END_REQUEST
231
232/* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */
233/* #define	LVM_VFS_ENHANCEMENT */
234
235#include <linux/config.h>
236#include <linux/module.h>
237#include <linux/kernel.h>
238#include <linux/vmalloc.h>
239
240#include <linux/slab.h>
241#include <linux/init.h>
242
243#include <linux/hdreg.h>
244#include <linux/stat.h>
245#include <linux/fs.h>
246#include <linux/proc_fs.h>
247#include <linux/blkdev.h>
248#include <linux/genhd.h>
249#include <linux/locks.h>
250
251
252#include <linux/devfs_fs_kernel.h>
253#include <linux/smp_lock.h>
254#include <asm/ioctl.h>
255#include <asm/segment.h>
256#include <asm/uaccess.h>
257
258#ifdef CONFIG_KERNELD
259#include <linux/kerneld.h>
260#endif
261
262#include <linux/blk.h>
263#include <linux/blkpg.h>
264
265#include <linux/errno.h>
266#include <linux/lvm.h>
267
268#include "lvm-internal.h"
269
270#define	LVM_CORRECT_READ_AHEAD(a)		\
271do {						\
272	if ((a) < LVM_MIN_READ_AHEAD ||		\
273	    (a) > LVM_MAX_READ_AHEAD)		\
274		(a) = LVM_DEFAULT_READ_AHEAD;	\
275	read_ahead[MAJOR_NR] = (a);		\
276} while(0)
277
278#ifndef WRITEA
279#  define WRITEA WRITE
280#endif
281
282
283/*
284 * External function prototypes
285 */
286static int lvm_make_request_fn(request_queue_t*, int, struct buffer_head*);
287
288static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong);
289static int lvm_blk_open(struct inode *, struct file *);
290
291static int lvm_blk_close(struct inode *, struct file *);
292static int lvm_get_snapshot_use_rate(lv_t *lv_ptr, void *arg);
293static int lvm_user_bmap(struct inode *, struct lv_bmap *);
294
295static int lvm_chr_open(struct inode *, struct file *);
296static int lvm_chr_close(struct inode *, struct file *);
297static int lvm_chr_ioctl(struct inode *, struct file *, uint, ulong);
298
299
300/* End external function prototypes */
301
302
303/*
304 * Internal function prototypes
305 */
306static void lvm_cleanup(void);
307static void lvm_init_vars(void);
308
309#ifdef LVM_HD_NAME
310extern void (*lvm_hd_name_ptr) (char *, int);
311#endif
312static int lvm_map(struct buffer_head *, int);
313static int lvm_do_lock_lvm(void);
314static int lvm_do_le_remap(vg_t *, void *);
315
316static int lvm_do_pv_create(pv_t *, vg_t *, ulong);
317static int lvm_do_pv_remove(vg_t *, ulong);
318static int lvm_do_lv_create(int, char *, lv_t *);
319static int lvm_do_lv_extend_reduce(int, char *, lv_t *);
320static int lvm_do_lv_remove(int, char *, int);
321static int lvm_do_lv_rename(vg_t *, lv_req_t *, lv_t *);
322static int lvm_do_lv_status_byname(vg_t *r, void *);
323static int lvm_do_lv_status_byindex(vg_t *, void *);
324static int lvm_do_lv_status_bydev(vg_t *, void *);
325
326static int lvm_do_pe_lock_unlock(vg_t *r, void *);
327
328static int lvm_do_pv_change(vg_t*, void*);
329static int lvm_do_pv_status(vg_t *, void *);
330static int lvm_do_pv_flush(void *);
331
332static int lvm_do_vg_create(void *, int minor);
333static int lvm_do_vg_extend(vg_t *, void *);
334static int lvm_do_vg_reduce(vg_t *, void *);
335static int lvm_do_vg_rename(vg_t *, void *);
336static int lvm_do_vg_remove(int);
337static void lvm_geninit(struct gendisk *);
338static void __update_hardsectsize(lv_t *lv);
339
340
341static void _queue_io(struct buffer_head *bh, int rw);
342static struct buffer_head *_dequeue_io(void);
343static void _flush_io(struct buffer_head *bh);
344
345static int _open_pv(pv_t *pv);
346static void _close_pv(pv_t *pv);
347
348static unsigned long _sectors_to_k(unsigned long sect);
349
350#ifdef LVM_HD_NAME
351void lvm_hd_name(char *, int);
352#endif
353/* END Internal function prototypes */
354
355
356/* variables */
357char *lvm_version = "LVM version "LVM_RELEASE_NAME"("LVM_RELEASE_DATE")";
358ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
359int loadtime = 0;
360const char *const lvm_name = LVM_NAME;
361
362
363/* volume group descriptor area pointers */
364vg_t *vg[ABS_MAX_VG + 1];
365
366/* map from block minor number to VG and LV numbers */
367static struct {
368	int vg_number;
369	int lv_number;
370} vg_lv_map[ABS_MAX_LV];
371
372
373/* Request structures (lvm_chr_ioctl()) */
374static pv_change_req_t pv_change_req;
375static pv_status_req_t pv_status_req;
376volatile static pe_lock_req_t pe_lock_req;
377static le_remap_req_t le_remap_req;
378static lv_req_t lv_req;
379
380#ifdef LVM_TOTAL_RESET
381static int lvm_reset_spindown = 0;
382#endif
383
384static char pv_name[NAME_LEN];
385/* static char rootvg[NAME_LEN] = { 0, }; */
386static int lock = 0;
387static int _lock_open_count = 0;
388static uint vg_count = 0;
389static long lvm_chr_open_count = 0;
390static DECLARE_WAIT_QUEUE_HEAD(lvm_wait);
391
392static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED;
393static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
394
395static struct buffer_head *_pe_requests;
396static DECLARE_RWSEM(_pe_lock);
397
398
399struct file_operations lvm_chr_fops = {
400	owner:		THIS_MODULE,
401	open:		lvm_chr_open,
402	release:	lvm_chr_close,
403	ioctl:		lvm_chr_ioctl,
404};
405
406/* block device operations structure needed for 2.3.38? and above */
407struct block_device_operations lvm_blk_dops =
408{
409	owner:		THIS_MODULE,
410	open:		lvm_blk_open,
411	release:	lvm_blk_close,
412	ioctl:		lvm_blk_ioctl,
413};
414
415
416/* gendisk structures */
417static struct hd_struct lvm_hd_struct[MAX_LV];
418static int lvm_blocksizes[MAX_LV];
419static int lvm_hardsectsizes[MAX_LV];
420static int lvm_size[MAX_LV];
421
422static struct gendisk lvm_gendisk =
423{
424	major:		MAJOR_NR,
425	major_name:	LVM_NAME,
426	minor_shift:	0,
427	max_p:		1,
428	part:		lvm_hd_struct,
429	sizes:		lvm_size,
430	nr_real:	MAX_LV,
431};
432
433
434/*
435 * Driver initialization...
436 */
437int lvm_init(void)
438{
439	if (devfs_register_chrdev(LVM_CHAR_MAJOR,
440				  lvm_name, &lvm_chr_fops) < 0) {
441		printk(KERN_ERR "%s -- devfs_register_chrdev failed\n",
442		       lvm_name);
443		return -EIO;
444	}
445	if (devfs_register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0)
446	{
447		printk("%s -- devfs_register_blkdev failed\n", lvm_name);
448		if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
449			printk(KERN_ERR
450			       "%s -- devfs_unregister_chrdev failed\n",
451			       lvm_name);
452		return -EIO;
453	}
454
455	lvm_init_fs();
456	lvm_init_vars();
457	lvm_geninit(&lvm_gendisk);
458
459	/* insert our gendisk at the corresponding major */
460	add_gendisk(&lvm_gendisk);
461
462#ifdef LVM_HD_NAME
463	/* reference from drivers/block/genhd.c */
464	lvm_hd_name_ptr = lvm_hd_name;
465#endif
466
467	blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn);
468
469
470	/* initialise the pe lock */
471	pe_lock_req.lock = UNLOCK_PE;
472
473	/* optional read root VGDA */
474/*
475   if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg);
476*/
477
478#ifdef MODULE
479	printk(KERN_INFO "%s module loaded\n", lvm_version);
480#else
481	printk(KERN_INFO "%s\n", lvm_version);
482#endif
483
484	return 0;
485} /* lvm_init() */
486
487/*
488 * cleanup...
489 */
490
491static void lvm_cleanup(void)
492{
493	if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
494		printk(KERN_ERR "%s -- devfs_unregister_chrdev failed\n",
495		       lvm_name);
496	if (devfs_unregister_blkdev(MAJOR_NR, lvm_name) < 0)
497		printk(KERN_ERR "%s -- devfs_unregister_blkdev failed\n",
498		       lvm_name);
499
500
501
502	/* delete our gendisk from chain */
503	del_gendisk(&lvm_gendisk);
504
505	blk_size[MAJOR_NR] = NULL;
506	blksize_size[MAJOR_NR] = NULL;
507	hardsect_size[MAJOR_NR] = NULL;
508
509#ifdef LVM_HD_NAME
510	/* reference from linux/drivers/block/genhd.c */
511	lvm_hd_name_ptr = NULL;
512#endif
513
514	/* unregister with procfs and devfs */
515	lvm_fin_fs();
516
517#ifdef MODULE
518	printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name);
519#endif
520
521	return;
522}	/* lvm_cleanup() */
523
524/*
525 * support function to initialize lvm variables
526 */
527static void __init lvm_init_vars(void)
528{
529	int v;
530
531	loadtime = CURRENT_TIME;
532
533	lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
534
535	pe_lock_req.lock = UNLOCK_PE;
536	pe_lock_req.data.lv_dev = 0;
537	pe_lock_req.data.pv_dev = 0;
538	pe_lock_req.data.pv_offset = 0;
539
540	/* Initialize VG pointers */
541	for (v = 0; v < ABS_MAX_VG + 1; v++)
542		vg[v] = NULL;
543
544	/* Initialize LV -> VG association */
545	for (v = 0; v < ABS_MAX_LV; v++) {
546		/* index ABS_MAX_VG never used for real VG */
547		vg_lv_map[v].vg_number = ABS_MAX_VG;
548		vg_lv_map[v].lv_number = -1;
549	}
550
551	return;
552} /* lvm_init_vars() */
553
554
555/********************************************************************
556 *
557 * Character device functions
558 *
559 ********************************************************************/
560
561#define MODE_TO_STR(mode) (mode) & FMODE_READ ? "READ" : "", \
562			  (mode) & FMODE_WRITE ? "WRITE" : ""
563
564/*
565 * character device open routine
566 */
567static int lvm_chr_open(struct inode *inode, struct file *file)
568{
569	int minor = MINOR(inode->i_rdev);
570
571	P_DEV("chr_open MINOR: %d  VG#: %d  mode: %s%s  lock: %d\n",
572	      minor, VG_CHR(minor), MODE_TO_STR(file->f_mode), lock);
573
574	/* super user validation */
575	if (!capable(CAP_SYS_ADMIN)) return -EACCES;
576
577	/* Group special file open */
578	if (VG_CHR(minor) > MAX_VG) return -ENXIO;
579
580	spin_lock(&lvm_lock);
581	if(lock == current->pid)
582		_lock_open_count++;
583	spin_unlock(&lvm_lock);
584
585	lvm_chr_open_count++;
586
587	MOD_INC_USE_COUNT;
588
589	return 0;
590} /* lvm_chr_open() */
591
592
593/*
594 * character device i/o-control routine
595 *
596 * Only one changing process can do changing ioctl at one time,
597 * others will block.
598 *
599 */
600static int lvm_chr_ioctl(struct inode *inode, struct file *file,
601			 uint command, ulong a)
602{
603	int minor = MINOR(inode->i_rdev);
604	uint extendable, l, v;
605	void *arg = (void *) a;
606	lv_t lv;
607	vg_t* vg_ptr = vg[VG_CHR(minor)];
608
609	/* otherwise cc will complain about unused variables */
610	(void) lvm_lock;
611
612	P_IOCTL("chr MINOR: %d  command: 0x%X  arg: %p  VG#: %d  mode: %s%s\n",
613		minor, command, arg, VG_CHR(minor), MODE_TO_STR(file->f_mode));
614
615#ifdef LVM_TOTAL_RESET
616	if (lvm_reset_spindown > 0) return -EACCES;
617#endif
618
619	/* Main command switch */
620	switch (command) {
621	case LVM_LOCK_LVM:
622		/* lock the LVM */
623		return lvm_do_lock_lvm();
624
625	case LVM_GET_IOP_VERSION:
626		/* check lvm version to ensure driver/tools+lib
627		   interoperability */
628		if (copy_to_user(arg, &lvm_iop_version, sizeof(ushort)) != 0)
629			return -EFAULT;
630		return 0;
631
632#ifdef LVM_TOTAL_RESET
633	case LVM_RESET:
634		/* lock reset function */
635		lvm_reset_spindown = 1;
636		for (v = 0; v < ABS_MAX_VG; v++) {
637			if (vg[v] != NULL) lvm_do_vg_remove(v);
638		}
639
640#ifdef MODULE
641		while (GET_USE_COUNT(&__this_module) < 1)
642			MOD_INC_USE_COUNT;
643		while (GET_USE_COUNT(&__this_module) > 1)
644			MOD_DEC_USE_COUNT;
645#endif /* MODULE */
646		lock = 0;	/* release lock */
647		wake_up_interruptible(&lvm_wait);
648		return 0;
649#endif /* LVM_TOTAL_RESET */
650
651
652	case LE_REMAP:
653		/* remap a logical extent (after moving the physical extent) */
654		return lvm_do_le_remap(vg_ptr,arg);
655
656	case PE_LOCK_UNLOCK:
657		/* lock/unlock i/o to a physical extent to move it to another
658		   physical volume (move's done in user space's pvmove) */
659		return lvm_do_pe_lock_unlock(vg_ptr,arg);
660
661	case VG_CREATE_OLD:
662		/* create a VGDA */
663		return lvm_do_vg_create(arg, minor);
664
665	case VG_CREATE:
666	        /* create a VGDA, assume VG number is filled in */
667		return lvm_do_vg_create(arg, -1);
668
669	case VG_EXTEND:
670		/* extend a volume group */
671		return lvm_do_vg_extend(vg_ptr, arg);
672
673	case VG_REDUCE:
674		/* reduce a volume group */
675		return lvm_do_vg_reduce(vg_ptr, arg);
676
677	case VG_RENAME:
678		/* rename a volume group */
679		return lvm_do_vg_rename(vg_ptr, arg);
680
681	case VG_REMOVE:
682		/* remove an inactive VGDA */
683		return lvm_do_vg_remove(minor);
684
685
686	case VG_SET_EXTENDABLE:
687		/* set/clear extendability flag of volume group */
688		if (vg_ptr == NULL) return -ENXIO;
689		if (copy_from_user(&extendable, arg, sizeof(extendable)) != 0)
690			return -EFAULT;
691
692		if (extendable == VG_EXTENDABLE ||
693		    extendable == ~VG_EXTENDABLE) {
694			if (extendable == VG_EXTENDABLE)
695				vg_ptr->vg_status |= VG_EXTENDABLE;
696			else
697				vg_ptr->vg_status &= ~VG_EXTENDABLE;
698		} else return -EINVAL;
699		return 0;
700
701
702	case VG_STATUS:
703		/* get volume group data (only the vg_t struct) */
704		if (vg_ptr == NULL) return -ENXIO;
705		if (copy_to_user(arg, vg_ptr, sizeof(vg_t)) != 0)
706			return -EFAULT;
707		return 0;
708
709
710	case VG_STATUS_GET_COUNT:
711		/* get volume group count */
712		if (copy_to_user(arg, &vg_count, sizeof(vg_count)) != 0)
713			return -EFAULT;
714		return 0;
715
716
717	case VG_STATUS_GET_NAMELIST:
718		/* get volume group names */
719		for (l = v = 0; v < ABS_MAX_VG; v++) {
720			if (vg[v] != NULL) {
721				if (copy_to_user(arg + l * NAME_LEN,
722						 vg[v]->vg_name,
723						 NAME_LEN) != 0)
724					return -EFAULT;
725				l++;
726			}
727		}
728		return 0;
729
730
731	case LV_CREATE:
732	case LV_EXTEND:
733	case LV_REDUCE:
734	case LV_REMOVE:
735	case LV_RENAME:
736		/* create, extend, reduce, remove or rename a logical volume */
737		if (vg_ptr == NULL) return -ENXIO;
738		if (copy_from_user(&lv_req, arg, sizeof(lv_req)) != 0)
739			return -EFAULT;
740
741		if (command != LV_REMOVE) {
742			if (copy_from_user(&lv, lv_req.lv, sizeof(lv_t)) != 0)
743				return -EFAULT;
744		}
745		switch (command) {
746		case LV_CREATE:
747			return lvm_do_lv_create(minor, lv_req.lv_name, &lv);
748
749		case LV_EXTEND:
750		case LV_REDUCE:
751			return lvm_do_lv_extend_reduce(minor, lv_req.lv_name, &lv);
752		case LV_REMOVE:
753			return lvm_do_lv_remove(minor, lv_req.lv_name, -1);
754
755		case LV_RENAME:
756			return lvm_do_lv_rename(vg_ptr, &lv_req, &lv);
757		}
758
759
760
761
762	case LV_STATUS_BYNAME:
763		/* get status of a logical volume by name */
764		return lvm_do_lv_status_byname(vg_ptr, arg);
765
766
767	case LV_STATUS_BYINDEX:
768		/* get status of a logical volume by index */
769		return lvm_do_lv_status_byindex(vg_ptr, arg);
770
771
772	case LV_STATUS_BYDEV:
773		/* get status of a logical volume by device */
774		return lvm_do_lv_status_bydev(vg_ptr, arg);
775
776
777	case PV_CHANGE:
778		/* change a physical volume */
779		return lvm_do_pv_change(vg_ptr,arg);
780
781
782	case PV_STATUS:
783		/* get physical volume data (pv_t structure only) */
784		return lvm_do_pv_status(vg_ptr,arg);
785
786
787	case PV_FLUSH:
788		/* physical volume buffer flush/invalidate */
789		return lvm_do_pv_flush(arg);
790
791
792	default:
793		printk(KERN_WARNING
794		       "%s -- lvm_chr_ioctl: unknown command 0x%x\n",
795		       lvm_name, command);
796		return -ENOTTY;
797	}
798
799	return 0;
800} /* lvm_chr_ioctl */
801
802
803/*
804 * character device close routine
805 */
806static int lvm_chr_close(struct inode *inode, struct file *file)
807{
808	P_DEV("chr_close MINOR: %d  VG#: %d\n",
809	      MINOR(inode->i_rdev), VG_CHR(MINOR(inode->i_rdev)));
810
811#ifdef LVM_TOTAL_RESET
812	if (lvm_reset_spindown > 0) {
813		lvm_reset_spindown = 0;
814		lvm_chr_open_count = 0;
815	}
816#endif
817
818	if (lvm_chr_open_count > 0) lvm_chr_open_count--;
819
820	spin_lock(&lvm_lock);
821	if(lock == current->pid) {
822		if(!_lock_open_count) {
823			P_DEV("chr_close: unlocking LVM for pid %d\n", lock);
824			lock = 0;
825			wake_up_interruptible(&lvm_wait);
826		} else
827			_lock_open_count--;
828	}
829	spin_unlock(&lvm_lock);
830
831	MOD_DEC_USE_COUNT;
832
833	return 0;
834} /* lvm_chr_close() */
835
836
837
838/********************************************************************
839 *
840 * Block device functions
841 *
842 ********************************************************************/
843
844/*
845 * block device open routine
846 */
847static int lvm_blk_open(struct inode *inode, struct file *file)
848{
849	int minor = MINOR(inode->i_rdev);
850	lv_t *lv_ptr;
851	vg_t *vg_ptr = vg[VG_BLK(minor)];
852
853	P_DEV("blk_open MINOR: %d  VG#: %d  LV#: %d  mode: %s%s\n",
854	      minor, VG_BLK(minor), LV_BLK(minor), MODE_TO_STR(file->f_mode));
855
856#ifdef LVM_TOTAL_RESET
857	if (lvm_reset_spindown > 0)
858		return -EPERM;
859#endif
860
861	if (vg_ptr != NULL &&
862	    (vg_ptr->vg_status & VG_ACTIVE) &&
863	    (lv_ptr = vg_ptr->lv[LV_BLK(minor)]) != NULL &&
864	    LV_BLK(minor) >= 0 &&
865	    LV_BLK(minor) < vg_ptr->lv_max) {
866
867		/* Check parallel LV spindown (LV remove) */
868		if (lv_ptr->lv_status & LV_SPINDOWN) return -EPERM;
869
870		/* Check inactive LV and open for read/write */
871		/* We need to be able to "read" an inactive LV
872		   to re-activate it again */
873		if ((file->f_mode & FMODE_WRITE) &&
874		    (!(lv_ptr->lv_status & LV_ACTIVE)))
875		    return -EPERM;
876
877		if (!(lv_ptr->lv_access & LV_WRITE) &&
878		    (file->f_mode & FMODE_WRITE))
879			return -EACCES;
880
881
882                /* be sure to increment VG counter */
883		if (lv_ptr->lv_open == 0) vg_ptr->lv_open++;
884		lv_ptr->lv_open++;
885
886		MOD_INC_USE_COUNT;
887
888		P_DEV("blk_open OK, LV size %d\n", lv_ptr->lv_size);
889
890		return 0;
891	}
892	return -ENXIO;
893} /* lvm_blk_open() */
894
895/* Deliver "hard disk geometry" */
896static int _hdio_getgeo(ulong a, lv_t *lv_ptr, int what)
897{
898	int ret = 0;
899	uchar heads = 128;
900	uchar sectors = 128;
901	ulong start = 0;
902	uint cylinders;
903
904	while ( heads * sectors > lv_ptr->lv_size) {
905		heads >>= 1;
906		sectors >>= 1;
907	}
908	cylinders = lv_ptr->lv_size / heads / sectors;
909
910	switch (what) {
911		case 0:
912		{
913			struct hd_geometry *hd = (struct hd_geometry *) a;
914
915			if (put_user(heads, &hd->heads) ||
916	    		    put_user(sectors, &hd->sectors) ||
917	    		    put_user((ushort) cylinders, &hd->cylinders) ||
918			    put_user(start, &hd->start))
919				return -EFAULT;
920			break;
921		}
922
923#ifdef HDIO_GETGEO_BIG
924		case 1:
925		{
926			struct hd_big_geometry *hd =
927				(struct hd_big_geometry *) a;
928
929			if (put_user(heads, &hd->heads) ||
930	    		    put_user(sectors, &hd->sectors) ||
931	    		    put_user(cylinders, &hd->cylinders) ||
932			    put_user(start, &hd->start))
933				return -EFAULT;
934			break;
935		}
936#endif
937
938	}
939
940	P_IOCTL("%s -- lvm_blk_ioctl -- cylinders: %d\n",
941		lvm_name, cylinders);
942	return ret;
943}
944
945
946/*
947 * block device i/o-control routine
948 */
949static int lvm_blk_ioctl(struct inode *inode, struct file *file,
950			 uint cmd, ulong a)
951{
952	kdev_t dev = inode->i_rdev;
953	int minor = MINOR(dev), ret;
954	vg_t *vg_ptr = vg[VG_BLK(minor)];
955	lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
956	void *arg = (void *) a;
957
958	P_IOCTL("blk MINOR: %d  cmd: 0x%X  arg: %p  VG#: %d  LV#: %d  "
959		"mode: %s%s\n", minor, cmd, arg, VG_BLK(minor),
960		LV_BLK(minor), MODE_TO_STR(file->f_mode));
961
962	switch (cmd) {
963		case BLKRASET:
964			/* set read ahead for block device */
965			ret = blk_ioctl(dev, cmd, a);
966			if (ret)
967				return ret;
968			lv_ptr->lv_read_ahead = (long) a;
969			LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
970			break;
971
972		case HDIO_GETGEO:
973#ifdef HDIO_GETGEO_BIG
974		case HDIO_GETGEO_BIG:
975#endif
976			/* get disk geometry */
977			P_IOCTL("%s -- lvm_blk_ioctl -- HDIO_GETGEO\n",
978				lvm_name);
979			if (!a)
980				return -EINVAL;
981
982			switch (cmd) {
983				case HDIO_GETGEO:
984					return _hdio_getgeo(a, lv_ptr, 0);
985#ifdef HDIO_GETGEO_BIG
986				case HDIO_GETGEO_BIG:
987					return _hdio_getgeo(a, lv_ptr, 1);
988#endif
989			}
990
991		case LV_BMAP:
992			/* turn logical block into (dev_t, block). non privileged. */
993			/* don't bmap a snapshot, since the mapping can change */
994			if (lv_ptr->lv_access & LV_SNAPSHOT)
995				return -EPERM;
996
997			return lvm_user_bmap(inode, (struct lv_bmap *) arg);
998
999		case LV_SET_ACCESS:
1000			/* set access flags of a logical volume */
1001			if (!capable(CAP_SYS_ADMIN)) return -EACCES;
1002
1003			down_write(&lv_ptr->lv_lock);
1004			lv_ptr->lv_access = (ulong) arg;
1005			up_write(&lv_ptr->lv_lock);
1006
1007			if ( lv_ptr->lv_access & LV_WRITE)
1008				set_device_ro(lv_ptr->lv_dev, 0);
1009			else
1010				set_device_ro(lv_ptr->lv_dev, 1);
1011			break;
1012
1013
1014		case LV_SET_ALLOCATION:
1015			/* set allocation flags of a logical volume */
1016			if (!capable(CAP_SYS_ADMIN)) return -EACCES;
1017			down_write(&lv_ptr->lv_lock);
1018			lv_ptr->lv_allocation = (ulong) arg;
1019			up_write(&lv_ptr->lv_lock);
1020			break;
1021
1022		case LV_SET_STATUS:
1023			/* set status flags of a logical volume */
1024			if (!capable(CAP_SYS_ADMIN)) return -EACCES;
1025			if (!((ulong) arg & LV_ACTIVE) && lv_ptr->lv_open > 1)
1026				return -EPERM;
1027			down_write(&lv_ptr->lv_lock);
1028			lv_ptr->lv_status = (ulong) arg;
1029			up_write(&lv_ptr->lv_lock);
1030			break;
1031
1032		case LV_SNAPSHOT_USE_RATE:
1033			return lvm_get_snapshot_use_rate(lv_ptr, arg);
1034
1035		default:
1036			/* Handle rest here */
1037			ret = blk_ioctl(dev, cmd, a);
1038			if (ret)
1039				printk(KERN_WARNING
1040				       "%s -- lvm_blk_ioctl: unknown "
1041				       "cmd 0x%x\n",
1042				       lvm_name, cmd);
1043			return ret;
1044	}
1045
1046	return 0;
1047} /* lvm_blk_ioctl() */
1048
1049
1050/*
1051 * block device close routine
1052 */
1053static int lvm_blk_close(struct inode *inode, struct file *file)
1054{
1055	int minor = MINOR(inode->i_rdev);
1056	vg_t *vg_ptr = vg[VG_BLK(minor)];
1057	lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
1058
1059	P_DEV("blk_close MINOR: %d  VG#: %d  LV#: %d\n",
1060	      minor, VG_BLK(minor), LV_BLK(minor));
1061
1062	if (lv_ptr->lv_open == 1) vg_ptr->lv_open--;
1063	lv_ptr->lv_open--;
1064
1065	MOD_DEC_USE_COUNT;
1066
1067	return 0;
1068} /* lvm_blk_close() */
1069
1070static int lvm_get_snapshot_use_rate(lv_t *lv, void *arg)
1071{
1072	lv_snapshot_use_rate_req_t lv_rate_req;
1073
1074	down_read(&lv->lv_lock);
1075	if (!(lv->lv_access & LV_SNAPSHOT)) {
1076		up_read(&lv->lv_lock);
1077		return -EPERM;
1078	}
1079	up_read(&lv->lv_lock);
1080
1081	if (copy_from_user(&lv_rate_req, arg, sizeof(lv_rate_req)))
1082		return -EFAULT;
1083
1084	if (lv_rate_req.rate < 0 || lv_rate_req.rate > 100)
1085		return -EINVAL;
1086
1087	switch (lv_rate_req.block) {
1088	case 0:
1089		down_write(&lv->lv_lock);
1090		lv->lv_snapshot_use_rate = lv_rate_req.rate;
1091		up_write(&lv->lv_lock);
1092		down_read(&lv->lv_lock);
1093		if (lv->lv_remap_ptr * 100 / lv->lv_remap_end <
1094		    lv->lv_snapshot_use_rate) {
1095			up_read(&lv->lv_lock);
1096			interruptible_sleep_on(&lv->lv_snapshot_wait);
1097			down_read(&lv->lv_lock);
1098		}
1099		up_read(&lv->lv_lock);
1100		break;
1101
1102	case O_NONBLOCK:
1103		break;
1104
1105	default:
1106		return -EINVAL;
1107	}
1108	down_read(&lv->lv_lock);
1109	lv_rate_req.rate = lv->lv_remap_ptr * 100 / lv->lv_remap_end;
1110	up_read(&lv->lv_lock);
1111
1112	return copy_to_user(arg, &lv_rate_req,
1113			    sizeof(lv_rate_req)) ? -EFAULT : 0;
1114}
1115
1116static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result)
1117{
1118	struct buffer_head bh;
1119	unsigned long block;
1120	int err;
1121
1122	if (get_user(block, &user_result->lv_block))
1123		return -EFAULT;
1124
1125	memset(&bh,0,sizeof bh);
1126	bh.b_blocknr = block;
1127	bh.b_dev = bh.b_rdev = inode->i_rdev;
1128	bh.b_size = lvm_get_blksize(bh.b_dev);
1129	bh.b_rsector = block * (bh.b_size >> 9);
1130	bh.b_end_io = NULL;
1131	if ((err = lvm_map(&bh, READ)) < 0)  {
1132		printk("lvm map failed: %d\n", err);
1133		return -EINVAL;
1134	}
1135
1136	return put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) ||
1137	       put_user(bh.b_rsector/(bh.b_size>>9), &user_result->lv_block) ?
1138		-EFAULT : 0;
1139}
1140
1141
1142/*
1143 * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
1144 * (see init_module/lvm_init)
1145 */
1146static void __remap_snapshot(kdev_t rdev, ulong rsector,
1147				    ulong pe_start, lv_t *lv, vg_t *vg) {
1148
1149	/* copy a chunk from the origin to a snapshot device */
1150	down_write(&lv->lv_lock);
1151
1152	/* we must redo lvm_snapshot_remap_block in order to avoid a
1153	   race condition in the gap where no lock was held */
1154	if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) &&
1155	    !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv))
1156		lvm_write_COW_table_block(vg, lv);
1157
1158	up_write(&lv->lv_lock);
1159}
1160
1161static inline void _remap_snapshot(kdev_t rdev, ulong rsector,
1162				   ulong pe_start, lv_t *lv, vg_t *vg) {
1163	int r;
1164
1165	/* check to see if this chunk is already in the snapshot */
1166	down_read(&lv->lv_lock);
1167	r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv);
1168	up_read(&lv->lv_lock);
1169
1170	if (!r)
1171		/* we haven't yet copied this block to the snapshot */
1172		__remap_snapshot(rdev, rsector, pe_start, lv, vg);
1173}
1174
1175
1176/*
1177 * extents destined for a pe that is on the move should be deferred
1178 */
1179static inline int _should_defer(kdev_t pv, ulong sector, uint32_t pe_size) {
1180	return ((pe_lock_req.lock == LOCK_PE) &&
1181		(pv == pe_lock_req.data.pv_dev) &&
1182		(sector >= pe_lock_req.data.pv_offset) &&
1183		(sector < (pe_lock_req.data.pv_offset + pe_size)));
1184}
1185
1186static inline int _defer_extent(struct buffer_head *bh, int rw,
1187				kdev_t pv, ulong sector, uint32_t pe_size)
1188{
1189	if (pe_lock_req.lock == LOCK_PE) {
1190		down_read(&_pe_lock);
1191		if (_should_defer(pv, sector, pe_size)) {
1192			up_read(&_pe_lock);
1193			down_write(&_pe_lock);
1194			if (_should_defer(pv, sector, pe_size))
1195				_queue_io(bh, rw);
1196			up_write(&_pe_lock);
1197			return 1;
1198		}
1199		up_read(&_pe_lock);
1200	}
1201	return 0;
1202}
1203
1204
1205static int lvm_map(struct buffer_head *bh, int rw)
1206{
1207	int minor = MINOR(bh->b_rdev);
1208	ulong index;
1209	ulong pe_start;
1210	ulong size = bh->b_size >> 9;
1211	ulong rsector_org = bh->b_rsector;
1212	ulong rsector_map;
1213	kdev_t rdev_map;
1214	vg_t *vg_this = vg[VG_BLK(minor)];
1215	lv_t *lv = vg_this->lv[LV_BLK(minor)];
1216
1217
1218	down_read(&lv->lv_lock);
1219	if (!(lv->lv_status & LV_ACTIVE)) {
1220		printk(KERN_ALERT
1221		       "%s - lvm_map: ll_rw_blk for inactive LV %s\n",
1222		       lvm_name, lv->lv_name);
1223		goto bad;
1224	}
1225
1226	if ((rw == WRITE || rw == WRITEA) &&
1227	    !(lv->lv_access & LV_WRITE)) {
1228		printk(KERN_CRIT
1229		       "%s - lvm_map: ll_rw_blk write for readonly LV %s\n",
1230		       lvm_name, lv->lv_name);
1231		goto bad;
1232	}
1233
1234	P_MAP("%s - lvm_map minor: %d  *rdev: %s  *rsector: %lu  size:%lu\n",
1235	      lvm_name, minor,
1236	      kdevname(bh->b_rdev),
1237	      rsector_org, size);
1238
1239	if (rsector_org + size > lv->lv_size) {
1240		printk(KERN_ALERT
1241		       "%s - lvm_map access beyond end of device; *rsector: "
1242                       "%lu or size: %lu wrong for minor: %2d\n",
1243                       lvm_name, rsector_org, size, minor);
1244		goto bad;
1245	}
1246
1247
1248	if (lv->lv_stripes < 2) { /* linear mapping */
1249		/* get the index */
1250		index = rsector_org / vg_this->pe_size;
1251		pe_start = lv->lv_current_pe[index].pe;
1252		rsector_map = lv->lv_current_pe[index].pe +
1253			(rsector_org % vg_this->pe_size);
1254		rdev_map = lv->lv_current_pe[index].dev;
1255
1256		P_MAP("lv_current_pe[%ld].pe: %d  rdev: %s  rsector:%ld\n",
1257		      index, lv->lv_current_pe[index].pe,
1258		      kdevname(rdev_map), rsector_map);
1259
1260	} else {		/* striped mapping */
1261		ulong stripe_index;
1262		ulong stripe_length;
1263
1264		stripe_length = vg_this->pe_size * lv->lv_stripes;
1265		stripe_index = (rsector_org % stripe_length) /
1266			lv->lv_stripesize;
1267		index = rsector_org / stripe_length +
1268			(stripe_index % lv->lv_stripes) *
1269			(lv->lv_allocated_le / lv->lv_stripes);
1270		pe_start = lv->lv_current_pe[index].pe;
1271		rsector_map = lv->lv_current_pe[index].pe +
1272			(rsector_org % stripe_length) -
1273			(stripe_index % lv->lv_stripes) * lv->lv_stripesize -
1274			stripe_index / lv->lv_stripes *
1275			(lv->lv_stripes - 1) * lv->lv_stripesize;
1276		rdev_map = lv->lv_current_pe[index].dev;
1277
1278		P_MAP("lv_current_pe[%ld].pe: %d  rdev: %s  rsector:%ld\n"
1279		      "stripe_length: %ld  stripe_index: %ld\n",
1280		      index, lv->lv_current_pe[index].pe, kdevname(rdev_map),
1281		      rsector_map, stripe_length, stripe_index);
1282	}
1283
1284	/*
1285	 * Queue writes to physical extents on the move until move completes.
1286	 * Don't get _pe_lock until there is a reasonable expectation that
1287	 * we need to queue this request, because this is in the fast path.
1288	 */
1289	if (rw == WRITE || rw == WRITEA) {
1290		if(_defer_extent(bh, rw, rdev_map,
1291				 rsector_map, vg_this->pe_size)) {
1292
1293			up_read(&lv->lv_lock);
1294			return 0;
1295		}
1296
1297		lv->lv_current_pe[index].writes++;	/* statistic */
1298	} else
1299		lv->lv_current_pe[index].reads++;	/* statistic */
1300
1301	/* snapshot volume exception handling on physical device address base */
1302	if (!(lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG)))
1303		goto out;
1304
1305	if (lv->lv_access & LV_SNAPSHOT) { /* remap snapshot */
1306		if (lvm_snapshot_remap_block(&rdev_map, &rsector_map,
1307					     pe_start, lv) < 0)
1308			goto bad;
1309
1310	} else if (rw == WRITE || rw == WRITEA) { /* snapshot origin */
1311		lv_t *snap;
1312
1313		/* start with first snapshot and loop through all of
1314		   them */
1315		for (snap = lv->lv_snapshot_next; snap;
1316		     snap = snap->lv_snapshot_next) {
1317			/* Check for inactive snapshot */
1318			if (!(snap->lv_status & LV_ACTIVE))
1319				continue;
1320
1321			/* Serializes the COW with the accesses to the
1322			   snapshot device */
1323			_remap_snapshot(rdev_map, rsector_map,
1324					 pe_start, snap, vg_this);
1325		}
1326	}
1327
1328 out:
1329	bh->b_rdev = rdev_map;
1330	bh->b_rsector = rsector_map;
1331	up_read(&lv->lv_lock);
1332	return 1;
1333
1334 bad:
1335	if (bh->b_end_io)
1336	buffer_IO_error(bh);
1337	up_read(&lv->lv_lock);
1338	return -1;
1339} /* lvm_map() */
1340
1341
1342/*
1343 * internal support functions
1344 */
1345
1346#ifdef LVM_HD_NAME
1347/*
1348 * generate "hard disk" name
1349 */
1350void lvm_hd_name(char *buf, int minor)
1351{
1352	int len = 0;
1353	lv_t *lv_ptr;
1354
1355	if (vg[VG_BLK(minor)] == NULL ||
1356	    (lv_ptr = vg[VG_BLK(minor)]->lv[LV_BLK(minor)]) == NULL)
1357		return;
1358	len = strlen(lv_ptr->lv_name) - 5;
1359	memcpy(buf, &lv_ptr->lv_name[5], len);
1360	buf[len] = 0;
1361	return;
1362}
1363#endif
1364
1365
1366
1367
1368/*
1369 * make request function
1370 */
1371static int lvm_make_request_fn(request_queue_t *q,
1372			       int rw,
1373			       struct buffer_head *bh)
1374{
1375	return (lvm_map(bh, rw) <= 0) ? 0 : 1;
1376}
1377
1378
1379/********************************************************************
1380 *
1381 * Character device support functions
1382 *
1383 ********************************************************************/
1384/*
1385 * character device support function logical volume manager lock
1386 */
1387static int lvm_do_lock_lvm(void)
1388{
1389lock_try_again:
1390	spin_lock(&lvm_lock);
1391	if (lock != 0 && lock != current->pid) {
1392		P_DEV("lvm_do_lock_lvm: locked by pid %d ...\n", lock);
1393		spin_unlock(&lvm_lock);
1394		interruptible_sleep_on(&lvm_wait);
1395		if (current->sigpending != 0)
1396			return -EINTR;
1397#ifdef LVM_TOTAL_RESET
1398		if (lvm_reset_spindown > 0)
1399			return -EACCES;
1400#endif
1401		goto lock_try_again;
1402	}
1403	lock = current->pid;
1404	P_DEV("lvm_do_lock_lvm: locking LVM for pid %d\n", lock);
1405	spin_unlock(&lvm_lock);
1406	return 0;
1407} /* lvm_do_lock_lvm */
1408
1409
1410/*
1411 * character device support function lock/unlock physical extend
1412 */
1413static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg)
1414{
1415	pe_lock_req_t new_lock;
1416	struct buffer_head *bh;
1417	uint p;
1418
1419	if (vg_ptr == NULL) return -ENXIO;
1420	if (copy_from_user(&new_lock, arg, sizeof(new_lock)) != 0)
1421		return -EFAULT;
1422
1423	switch (new_lock.lock) {
1424	case LOCK_PE:
1425		for (p = 0; p < vg_ptr->pv_max; p++) {
1426			if (vg_ptr->pv[p] != NULL &&
1427			    new_lock.data.pv_dev == vg_ptr->pv[p]->pv_dev)
1428				break;
1429		}
1430		if (p == vg_ptr->pv_max) return -ENXIO;
1431
1432		/*
1433		 * this sync releaves memory pressure to lessen the
1434		 * likelyhood of pvmove being paged out - resulting in
1435		 * deadlock.
1436		 *
1437		 * This method of doing a pvmove is broken
1438		 */
1439		fsync_dev(pe_lock_req.data.lv_dev);
1440
1441		down_write(&_pe_lock);
1442		if (pe_lock_req.lock == LOCK_PE) {
1443			up_write(&_pe_lock);
1444			return -EBUSY;
1445		}
1446
1447		/* Should we do to_kdev_t() on the pv_dev and lv_dev??? */
1448		pe_lock_req.lock = LOCK_PE;
1449		pe_lock_req.data.lv_dev = new_lock.data.lv_dev;
1450		pe_lock_req.data.pv_dev = new_lock.data.pv_dev;
1451		pe_lock_req.data.pv_offset = new_lock.data.pv_offset;
1452		up_write(&_pe_lock);
1453
1454		/* some requests may have got through since the fsync */
1455		fsync_dev(pe_lock_req.data.pv_dev);
1456		break;
1457
1458	case UNLOCK_PE:
1459		down_write(&_pe_lock);
1460		pe_lock_req.lock = UNLOCK_PE;
1461		pe_lock_req.data.lv_dev = 0;
1462		pe_lock_req.data.pv_dev = 0;
1463		pe_lock_req.data.pv_offset = 0;
1464		bh = _dequeue_io();
1465		up_write(&_pe_lock);
1466
1467		/* handle all deferred io for this PE */
1468		_flush_io(bh);
1469		break;
1470
1471	default:
1472		return -EINVAL;
1473	}
1474	return 0;
1475}
1476
1477
1478/*
1479 * character device support function logical extend remap
1480 */
1481static int lvm_do_le_remap(vg_t *vg_ptr, void *arg)
1482{
1483	uint l, le;
1484	lv_t *lv_ptr;
1485
1486	if (vg_ptr == NULL) return -ENXIO;
1487	if (copy_from_user(&le_remap_req, arg,
1488			   sizeof(le_remap_req_t)) != 0)
1489		return -EFAULT;
1490
1491	for (l = 0; l < vg_ptr->lv_max; l++) {
1492		lv_ptr = vg_ptr->lv[l];
1493
1494		if (!lv_ptr)
1495			continue;
1496
1497		if (strcmp(lv_ptr->lv_name, le_remap_req.lv_name) == 0) {
1498			down_write(&lv_ptr->lv_lock);
1499			for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
1500				if (lv_ptr->lv_current_pe[le].dev ==
1501				    le_remap_req.old_dev &&
1502				    lv_ptr->lv_current_pe[le].pe ==
1503				    le_remap_req.old_pe) {
1504					lv_ptr->lv_current_pe[le].dev =
1505					    le_remap_req.new_dev;
1506					lv_ptr->lv_current_pe[le].pe =
1507					    le_remap_req.new_pe;
1508					__update_hardsectsize(lv_ptr);
1509					up_write(&lv_ptr->lv_lock);
1510					return 0;
1511				}
1512			}
1513			up_write(&lv_ptr->lv_lock);
1514			return -EINVAL;
1515		}
1516	}
1517	return -ENXIO;
1518} /* lvm_do_le_remap() */
1519
1520
1521/*
1522 * character device support function VGDA create
1523 */
1524static int lvm_do_vg_create(void *arg, int minor)
1525{
1526	int ret = 0;
1527	ulong l, ls = 0, p, size;
1528	lv_t lv;
1529	vg_t *vg_ptr;
1530	lv_t **snap_lv_ptr;
1531
1532	if ((vg_ptr = kmalloc(sizeof(vg_t),GFP_KERNEL)) == NULL) {
1533		printk(KERN_CRIT
1534		       "%s -- VG_CREATE: kmalloc error VG at line %d\n",
1535		       lvm_name, __LINE__);
1536		return -ENOMEM;
1537	}
1538	/* get the volume group structure */
1539	if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) {
1540		P_IOCTL("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n",
1541			arg, sizeof(vg_t));
1542		kfree(vg_ptr);
1543		return -EFAULT;
1544	}
1545
1546	/* VG_CREATE now uses minor number in VG structure */
1547	if (minor == -1)
1548		minor = vg_ptr->vg_number;
1549
1550	/* check limits */
1551	if (minor >= ABS_MAX_VG)
1552		return -EFAULT;
1553
1554	/* Validate it */
1555	if (vg[VG_CHR(minor)] != NULL) {
1556		P_IOCTL("lvm_do_vg_create ERROR: VG %d in use\n", minor);
1557		kfree(vg_ptr);
1558		return -EPERM;
1559	}
1560
1561	/* we are not that active so far... */
1562	vg_ptr->vg_status &= ~VG_ACTIVE;
1563	vg_ptr->pe_allocated = 0;
1564
1565	if (vg_ptr->pv_max > ABS_MAX_PV) {
1566		printk(KERN_WARNING
1567		       "%s -- Can't activate VG: ABS_MAX_PV too small\n",
1568		       lvm_name);
1569		kfree(vg_ptr);
1570		return -EPERM;
1571	}
1572
1573	if (vg_ptr->lv_max > ABS_MAX_LV) {
1574		printk(KERN_WARNING
1575		"%s -- Can't activate VG: ABS_MAX_LV too small for %u\n",
1576		       lvm_name, vg_ptr->lv_max);
1577		kfree(vg_ptr);
1578		return -EPERM;
1579	}
1580
1581	/* create devfs and procfs entries */
1582	lvm_fs_create_vg(vg_ptr);
1583
1584	vg[VG_CHR(minor)] = vg_ptr;
1585
1586	/* get the physical volume structures */
1587	vg_ptr->pv_act = vg_ptr->pv_cur = 0;
1588	for (p = 0; p < vg_ptr->pv_max; p++) {
1589		pv_t *pvp;
1590		/* user space address */
1591		if ((pvp = vg_ptr->pv[p]) != NULL) {
1592			ret = lvm_do_pv_create(pvp, vg_ptr, p);
1593			if ( ret != 0) {
1594				lvm_do_vg_remove(minor);
1595				return ret;
1596			}
1597		}
1598	}
1599
1600	size = vg_ptr->lv_max * sizeof(lv_t *);
1601	if ((snap_lv_ptr = vmalloc ( size)) == NULL) {
1602		printk(KERN_CRIT
1603		       "%s -- VG_CREATE: vmalloc error snapshot LVs at line %d\n",
1604		       lvm_name, __LINE__);
1605		lvm_do_vg_remove(minor);
1606		return -EFAULT;
1607	}
1608	memset(snap_lv_ptr, 0, size);
1609
1610	/* get the logical volume structures */
1611	vg_ptr->lv_cur = 0;
1612	for (l = 0; l < vg_ptr->lv_max; l++) {
1613		lv_t *lvp;
1614		/* user space address */
1615		if ((lvp = vg_ptr->lv[l]) != NULL) {
1616			if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
1617				P_IOCTL("ERROR: copying LV ptr %p (%d bytes)\n",
1618					lvp, sizeof(lv_t));
1619				lvm_do_vg_remove(minor);
1620				return -EFAULT;
1621			}
1622			if ( lv.lv_access & LV_SNAPSHOT) {
1623				snap_lv_ptr[ls] = lvp;
1624				vg_ptr->lv[l] = NULL;
1625				ls++;
1626				continue;
1627			}
1628			vg_ptr->lv[l] = NULL;
1629			/* only create original logical volumes for now */
1630			if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
1631				lvm_do_vg_remove(minor);
1632				return -EFAULT;
1633			}
1634		}
1635	}
1636
1637	/* Second path to correct snapshot logical volumes which are not
1638	   in place during first path above */
1639	for (l = 0; l < ls; l++) {
1640		lv_t *lvp = snap_lv_ptr[l];
1641		if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
1642			lvm_do_vg_remove(minor);
1643			return -EFAULT;
1644		}
1645		if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
1646			lvm_do_vg_remove(minor);
1647			return -EFAULT;
1648		}
1649	}
1650
1651	vfree(snap_lv_ptr);
1652
1653	vg_count++;
1654
1655
1656	MOD_INC_USE_COUNT;
1657
1658	/* let's go active */
1659	vg_ptr->vg_status |= VG_ACTIVE;
1660
1661	return 0;
1662} /* lvm_do_vg_create() */
1663
1664
1665/*
1666 * character device support function VGDA extend
1667 */
1668static int lvm_do_vg_extend(vg_t *vg_ptr, void *arg)
1669{
1670	int ret = 0;
1671	uint p;
1672	pv_t *pv_ptr;
1673
1674	if (vg_ptr == NULL) return -ENXIO;
1675	if (vg_ptr->pv_cur < vg_ptr->pv_max) {
1676		for (p = 0; p < vg_ptr->pv_max; p++) {
1677			if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) {
1678				ret = lvm_do_pv_create(arg, vg_ptr, p);
1679				if ( ret != 0) return ret;
1680				pv_ptr = vg_ptr->pv[p];
1681				vg_ptr->pe_total += pv_ptr->pe_total;
1682				return 0;
1683			}
1684		}
1685	}
1686	return -EPERM;
1687} /* lvm_do_vg_extend() */
1688
1689
1690/*
1691 * character device support function VGDA reduce
1692 */
1693static int lvm_do_vg_reduce(vg_t *vg_ptr, void *arg) {
1694	uint p;
1695	pv_t *pv_ptr;
1696
1697	if (vg_ptr == NULL) return -ENXIO;
1698	if (copy_from_user(pv_name, arg, sizeof(pv_name)) != 0)
1699		return -EFAULT;
1700
1701	for (p = 0; p < vg_ptr->pv_max; p++) {
1702		pv_ptr = vg_ptr->pv[p];
1703		if (pv_ptr != NULL &&
1704		    strcmp(pv_ptr->pv_name,
1705			       pv_name) == 0) {
1706			if (pv_ptr->lv_cur > 0) return -EPERM;
1707			lvm_do_pv_remove(vg_ptr, p);
1708			/* Make PV pointer array contiguous */
1709			for (; p < vg_ptr->pv_max - 1; p++)
1710				vg_ptr->pv[p] = vg_ptr->pv[p + 1];
1711			vg_ptr->pv[p + 1] = NULL;
1712			return 0;
1713		}
1714	}
1715	return -ENXIO;
1716} /* lvm_do_vg_reduce */
1717
1718
1719/*
1720 * character device support function VG rename
1721 */
1722static int lvm_do_vg_rename(vg_t *vg_ptr, void *arg)
1723{
1724	int l = 0, p = 0, len = 0;
1725	char vg_name[NAME_LEN] = { 0,};
1726	char lv_name[NAME_LEN] = { 0,};
1727	char *ptr = NULL;
1728	lv_t *lv_ptr = NULL;
1729	pv_t *pv_ptr = NULL;
1730
1731	/* If the VG doesn't exist in the kernel then just exit */
1732	if (!vg_ptr) return 0;
1733
1734	if (copy_from_user(vg_name, arg, sizeof(vg_name)) != 0)
1735		return -EFAULT;
1736
1737	lvm_fs_remove_vg(vg_ptr);
1738
1739	strncpy ( vg_ptr->vg_name, vg_name, sizeof ( vg_name)-1);
1740	for ( l = 0; l < vg_ptr->lv_max; l++)
1741	{
1742		if ((lv_ptr = vg_ptr->lv[l]) == NULL) continue;
1743		memset (lv_ptr->vg_name, 0, sizeof (*vg_name));
1744		strncpy(lv_ptr->vg_name, vg_name, sizeof ( vg_name));
1745		ptr = strrchr(lv_ptr->lv_name, '/');
1746		ptr = ptr ? ptr + 1 : lv_ptr->lv_name;
1747		strncpy(lv_name, ptr, sizeof ( lv_name));
1748		len = sizeof(LVM_DIR_PREFIX);
1749		strcpy(lv_ptr->lv_name, LVM_DIR_PREFIX);
1750		strncat(lv_ptr->lv_name, vg_name, NAME_LEN - len);
1751		strcat (lv_ptr->lv_name, "/");
1752		len += strlen(vg_name) + 1;
1753		strncat(lv_ptr->lv_name, lv_name, NAME_LEN - len);
1754	}
1755	for ( p = 0; p < vg_ptr->pv_max; p++)
1756	{
1757		if ( (pv_ptr = vg_ptr->pv[p]) == NULL) continue;
1758		strncpy(pv_ptr->vg_name, vg_name, NAME_LEN);
1759	}
1760
1761	lvm_fs_create_vg(vg_ptr);
1762
1763	/* Need to add PV entries */
1764	for ( p = 0; p < vg_ptr->pv_act; p++) {
1765		pv_t *pv_ptr = vg_ptr->pv[p];
1766
1767		if (pv_ptr)
1768			lvm_fs_create_pv(vg_ptr, pv_ptr);
1769	}
1770
1771	/* Need to add LV entries */
1772        for ( l = 0; l < vg_ptr->lv_max; l++) {
1773		lv_t *lv_ptr = vg_ptr->lv[l];
1774
1775		if (!lv_ptr)
1776			continue;
1777
1778		lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
1779	    		lvm_fs_create_lv(vg_ptr, lv_ptr);
1780	}
1781
1782	return 0;
1783} /* lvm_do_vg_rename */
1784
1785
1786/*
1787 * character device support function VGDA remove
1788 */
1789static int lvm_do_vg_remove(int minor)
1790{
1791	int i;
1792	vg_t *vg_ptr = vg[VG_CHR(minor)];
1793	pv_t *pv_ptr;
1794
1795	if (vg_ptr == NULL) return -ENXIO;
1796
1797#ifdef LVM_TOTAL_RESET
1798	if (vg_ptr->lv_open > 0 && lvm_reset_spindown == 0)
1799#else
1800	if (vg_ptr->lv_open > 0)
1801#endif
1802		return -EPERM;
1803
1804	/* let's go inactive */
1805	vg_ptr->vg_status &= ~VG_ACTIVE;
1806
1807	/* remove from procfs and devfs */
1808	lvm_fs_remove_vg(vg_ptr);
1809
1810	/* free LVs */
1811	/* first free snapshot logical volumes */
1812	for (i = 0; i < vg_ptr->lv_max; i++) {
1813		if (vg_ptr->lv[i] != NULL &&
1814		    vg_ptr->lv[i]->lv_access & LV_SNAPSHOT) {
1815			lvm_do_lv_remove(minor, NULL, i);
1816			current->state = TASK_UNINTERRUPTIBLE;
1817			schedule_timeout(1);
1818		}
1819	}
1820	/* then free the rest of the LVs */
1821	for (i = 0; i < vg_ptr->lv_max; i++) {
1822		if (vg_ptr->lv[i] != NULL) {
1823			lvm_do_lv_remove(minor, NULL, i);
1824			current->state = TASK_UNINTERRUPTIBLE;
1825			schedule_timeout(1);
1826		}
1827	}
1828
1829	/* free PVs */
1830	for (i = 0; i < vg_ptr->pv_max; i++) {
1831		if ((pv_ptr = vg_ptr->pv[i]) != NULL) {
1832			P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
1833			lvm_do_pv_remove(vg_ptr, i);
1834		}
1835	}
1836
1837	P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
1838	kfree(vg_ptr);
1839	vg[VG_CHR(minor)] = NULL;
1840
1841	vg_count--;
1842
1843	MOD_DEC_USE_COUNT;
1844
1845	return 0;
1846} /* lvm_do_vg_remove() */
1847
1848
1849/*
1850 * character device support function physical volume create
1851 */
1852static int lvm_do_pv_create(pv_t *pvp, vg_t *vg_ptr, ulong p) {
1853	pv_t *pv;
1854	int err;
1855
1856	if (!vg_ptr)
1857		return -ENXIO;
1858
1859	pv = kmalloc(sizeof(pv_t),GFP_KERNEL);
1860	if (pv == NULL) {
1861		printk(KERN_CRIT
1862		       "%s -- PV_CREATE: kmalloc error PV at line %d\n",
1863		       lvm_name, __LINE__);
1864		return -ENOMEM;
1865	}
1866
1867	memset(pv, 0, sizeof(*pv));
1868
1869	if (copy_from_user(pv, pvp, sizeof(pv_t)) != 0) {
1870		P_IOCTL("lvm_do_pv_create ERROR: copy PV ptr %p (%d bytes)\n",
1871			pvp, sizeof(pv_t));
1872		kfree(pv);
1873		return -EFAULT;
1874	}
1875
1876	if ((err = _open_pv(pv))) {
1877		kfree(pv);
1878		return err;
1879	}
1880
1881	/* We don't need the PE list
1882	   in kernel space as with LVs pe_t list (see below) */
1883	pv->pe = NULL;
1884	pv->pe_allocated = 0;
1885	pv->pv_status = PV_ACTIVE;
1886	vg_ptr->pv_act++;
1887	vg_ptr->pv_cur++;
1888	lvm_fs_create_pv(vg_ptr, pv);
1889
1890	vg_ptr->pv[p] = pv;
1891	return 0;
1892} /* lvm_do_pv_create() */
1893
1894
1895/*
1896 * character device support function physical volume remove
1897 */
1898static int lvm_do_pv_remove(vg_t *vg_ptr, ulong p) {
1899	pv_t *pv = vg_ptr->pv[p];
1900
1901	lvm_fs_remove_pv(vg_ptr, pv);
1902
1903	vg_ptr->pe_total -= pv->pe_total;
1904	vg_ptr->pv_cur--;
1905	vg_ptr->pv_act--;
1906
1907	_close_pv(pv);
1908	kfree(pv);
1909
1910	vg_ptr->pv[p] = NULL;
1911
1912	return 0;
1913}
1914
1915
1916static void __update_hardsectsize(lv_t *lv)
1917{
1918	int max_hardsectsize = 0, hardsectsize = 0;
1919	int p;
1920
1921	/* Check PVs first to see if they all have same sector size */
1922	for (p = 0; p < lv->vg->pv_cur; p++) {
1923		pv_t *pv = lv->vg->pv[p];
1924		if (pv && (hardsectsize = lvm_sectsize(pv->pv_dev))) {
1925			if (max_hardsectsize == 0)
1926				max_hardsectsize = hardsectsize;
1927			else if (hardsectsize != max_hardsectsize) {
1928				P_DEV("%s PV[%d] (%s) sector size %d, not %d\n",
1929				      lv->lv_name, p, kdevname(pv->pv_dev),
1930				      hardsectsize, max_hardsectsize);
1931				break;
1932			}
1933		}
1934	}
1935
1936	/* PVs have different block size, need to check each LE sector size */
1937	if (hardsectsize != max_hardsectsize) {
1938		int le;
1939		for (le = 0; le < lv->lv_allocated_le; le++) {
1940			hardsectsize = lvm_sectsize(lv->lv_current_pe[le].dev);
1941			if (hardsectsize > max_hardsectsize) {
1942				P_DEV("%s LE[%d] (%s) blocksize %d not %d\n",
1943				      lv->lv_name, le,
1944				      kdevname(lv->lv_current_pe[le].dev),
1945				      hardsectsize, max_hardsectsize);
1946				max_hardsectsize = hardsectsize;
1947			}
1948		}
1949
1950		/* only perform this operation on active snapshots */
1951		if ((lv->lv_access & LV_SNAPSHOT) &&
1952		    (lv->lv_status & LV_ACTIVE)) {
1953			int e;
1954			for (e = 0; e < lv->lv_remap_end; e++) {
1955				hardsectsize = lvm_sectsize(lv->lv_block_exception[e].rdev_new);
1956				if (hardsectsize > max_hardsectsize)
1957					max_hardsectsize = hardsectsize;
1958			}
1959		}
1960	}
1961
1962	if (max_hardsectsize == 0)
1963		max_hardsectsize = SECTOR_SIZE;
1964	P_DEV("hardblocksize for LV %s is %d\n",
1965	      kdevname(lv->lv_dev), max_hardsectsize);
1966	lvm_hardsectsizes[MINOR(lv->lv_dev)] = max_hardsectsize;
1967}
1968
1969/*
1970 * character device support function logical volume create
1971 */
1972static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv)
1973{
1974	int e, ret, l, le, l_new, p, size, activate = 1;
1975	ulong lv_status_save;
1976	lv_block_exception_t *lvbe = lv->lv_block_exception;
1977	vg_t *vg_ptr = vg[VG_CHR(minor)];
1978	lv_t *lv_ptr = NULL;
1979	pe_t *pep;
1980
1981	if (!(pep = lv->lv_current_pe))
1982		return -EINVAL;
1983
1984	if (_sectors_to_k(lv->lv_chunk_size) > LVM_SNAPSHOT_MAX_CHUNK)
1985		return -EINVAL;
1986
1987	for (l = 0; l < vg_ptr->lv_cur; l++) {
1988		if (vg_ptr->lv[l] != NULL &&
1989		    strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0)
1990			return -EEXIST;
1991	}
1992
1993	/* in case of lv_remove(), lv_create() pair */
1994	l_new = -1;
1995	if (vg_ptr->lv[lv->lv_number] == NULL)
1996		l_new = lv->lv_number;
1997	else {
1998		for (l = 0; l < vg_ptr->lv_max; l++) {
1999			if (vg_ptr->lv[l] == NULL)
2000				if (l_new == -1) l_new = l;
2001		}
2002	}
2003	if (l_new == -1) return -EPERM;
2004	else             l = l_new;
2005
2006	if ((lv_ptr = kmalloc(sizeof(lv_t),GFP_KERNEL)) == NULL) {;
2007		printk(KERN_CRIT "%s -- LV_CREATE: kmalloc error LV at line %d\n",
2008		       lvm_name, __LINE__);
2009		return -ENOMEM;
2010	}
2011	/* copy preloaded LV */
2012	memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t));
2013
2014	lv_status_save = lv_ptr->lv_status;
2015	lv_ptr->lv_status &= ~LV_ACTIVE;
2016	lv_ptr->lv_snapshot_org = NULL;
2017	lv_ptr->lv_snapshot_prev = NULL;
2018	lv_ptr->lv_snapshot_next = NULL;
2019	lv_ptr->lv_block_exception = NULL;
2020	lv_ptr->lv_iobuf = NULL;
2021	lv_ptr->lv_COW_table_iobuf = NULL;
2022	lv_ptr->lv_snapshot_hash_table = NULL;
2023	lv_ptr->lv_snapshot_hash_table_size = 0;
2024	lv_ptr->lv_snapshot_hash_mask = 0;
2025	init_rwsem(&lv_ptr->lv_lock);
2026
2027	lv_ptr->lv_snapshot_use_rate = 0;
2028
2029	vg_ptr->lv[l] = lv_ptr;
2030
2031	/* get the PE structures from user space if this
2032	   is not a snapshot logical volume */
2033	if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
2034		size = lv_ptr->lv_allocated_le * sizeof(pe_t);
2035
2036		if ((lv_ptr->lv_current_pe = vmalloc(size)) == NULL) {
2037			printk(KERN_CRIT
2038			       "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte "
2039			       "at line %d\n",
2040			       lvm_name, size, __LINE__);
2041			P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
2042			kfree(lv_ptr);
2043			vg_ptr->lv[l] = NULL;
2044			return -ENOMEM;
2045		}
2046		if (copy_from_user(lv_ptr->lv_current_pe, pep, size)) {
2047			P_IOCTL("ERROR: copying PE ptr %p (%d bytes)\n",
2048				pep, sizeof(size));
2049			vfree(lv_ptr->lv_current_pe);
2050			kfree(lv_ptr);
2051			vg_ptr->lv[l] = NULL;
2052			return -EFAULT;
2053		}
2054		/* correct the PE count in PVs */
2055		for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
2056			vg_ptr->pe_allocated++;
2057			for (p = 0; p < vg_ptr->pv_cur; p++) {
2058				if (vg_ptr->pv[p]->pv_dev ==
2059				    lv_ptr->lv_current_pe[le].dev)
2060					vg_ptr->pv[p]->pe_allocated++;
2061			}
2062		}
2063	} else {
2064		/* Get snapshot exception data and block list */
2065		if (lvbe != NULL) {
2066			lv_ptr->lv_snapshot_org =
2067			    vg_ptr->lv[LV_BLK(lv_ptr->lv_snapshot_minor)];
2068			if (lv_ptr->lv_snapshot_org != NULL) {
2069				size = lv_ptr->lv_remap_end * sizeof(lv_block_exception_t);
2070
2071				if (!size) {
2072					printk(KERN_WARNING
2073					       "%s -- zero length exception table requested\n",
2074					       lvm_name);
2075					kfree(lv_ptr);
2076					return -EINVAL;
2077				}
2078
2079				if ((lv_ptr->lv_block_exception = vmalloc(size)) == NULL) {
2080					printk(KERN_CRIT
2081					       "%s -- lvm_do_lv_create: vmalloc error LV_BLOCK_EXCEPTION "
2082					       "of %d byte at line %d\n",
2083					       lvm_name, size, __LINE__);
2084					P_KFREE("%s -- kfree %d\n", lvm_name,
2085						__LINE__);
2086					kfree(lv_ptr);
2087					vg_ptr->lv[l] = NULL;
2088					return -ENOMEM;
2089				}
2090				if (copy_from_user(lv_ptr->lv_block_exception, lvbe, size)) {
2091					vfree(lv_ptr->lv_block_exception);
2092					kfree(lv_ptr);
2093					vg_ptr->lv[l] = NULL;
2094					return -EFAULT;
2095				}
2096
2097				if(lv_ptr->lv_block_exception[0].rsector_org ==
2098				   LVM_SNAPSHOT_DROPPED_SECTOR)
2099				{
2100					printk(KERN_WARNING
2101   "%s -- lvm_do_lv_create: snapshot has been dropped and will not be activated\n",
2102					       lvm_name);
2103					activate = 0;
2104				}
2105
2106				/* point to the original logical volume */
2107				lv_ptr = lv_ptr->lv_snapshot_org;
2108
2109				lv_ptr->lv_snapshot_minor = 0;
2110				lv_ptr->lv_snapshot_org = lv_ptr;
2111				/* our new one now back points to the previous last in the chain
2112				   which can be the original logical volume */
2113				lv_ptr = vg_ptr->lv[l];
2114				/* now lv_ptr points to our new last snapshot logical volume */
2115				lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe;
2116				lv_ptr->lv_allocated_snapshot_le = lv_ptr->lv_allocated_le;
2117				lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le;
2118				lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le;
2119				lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size;
2120				lv_ptr->lv_stripes = lv_ptr->lv_snapshot_org->lv_stripes;
2121				lv_ptr->lv_stripesize = lv_ptr->lv_snapshot_org->lv_stripesize;
2122
2123				/* Update the VG PE(s) used by snapshot reserve space. */
2124				vg_ptr->pe_allocated += lv_ptr->lv_allocated_snapshot_le;
2125
2126				if ((ret = lvm_snapshot_alloc(lv_ptr)) != 0)
2127				{
2128					vfree(lv_ptr->lv_block_exception);
2129					kfree(lv_ptr);
2130					vg_ptr->lv[l] = NULL;
2131					return ret;
2132				}
2133				for ( e = 0; e < lv_ptr->lv_remap_ptr; e++)
2134					lvm_hash_link (lv_ptr->lv_block_exception + e,
2135						       lv_ptr->lv_block_exception[e].rdev_org,
2136						       lv_ptr->lv_block_exception[e].rsector_org, lv_ptr);
2137				/* need to fill the COW exception table data
2138				   into the page for disk i/o */
2139				if(lvm_snapshot_fill_COW_page(vg_ptr, lv_ptr)) {
2140					kfree(lv_ptr);
2141					vg_ptr->lv[l] = NULL;
2142					return -EINVAL;
2143				}
2144				init_waitqueue_head(&lv_ptr->lv_snapshot_wait);
2145			} else {
2146				kfree(lv_ptr);
2147				vg_ptr->lv[l] = NULL;
2148				return -EFAULT;
2149			}
2150		} else {
2151			kfree(vg_ptr->lv[l]);
2152			vg_ptr->lv[l] = NULL;
2153			return -EINVAL;
2154		}
2155	} /* if ( vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) */
2156
2157	lv_ptr = vg_ptr->lv[l];
2158	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0;
2159	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size;
2160	lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1;
2161	vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = vg_ptr->vg_number;
2162	vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = lv_ptr->lv_number;
2163	LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
2164	vg_ptr->lv_cur++;
2165	lv_ptr->lv_status = lv_status_save;
2166	lv_ptr->vg = vg_ptr;
2167
2168	__update_hardsectsize(lv_ptr);
2169
2170	/* optionally add our new snapshot LV */
2171	if (lv_ptr->lv_access & LV_SNAPSHOT) {
2172		lv_t *org = lv_ptr->lv_snapshot_org, *last;
2173
2174		/* sync the original logical volume */
2175		fsync_dev(org->lv_dev);
2176#ifdef	LVM_VFS_ENHANCEMENT
2177		/* VFS function call to sync and lock the filesystem */
2178		fsync_dev_lockfs(org->lv_dev);
2179#endif
2180
2181		down_write(&org->lv_lock);
2182		org->lv_access |= LV_SNAPSHOT_ORG;
2183		lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG; /* this can only hide an userspace bug */
2184
2185
2186		/* Link in the list of snapshot volumes */
2187		for (last = org; last->lv_snapshot_next; last = last->lv_snapshot_next);
2188		lv_ptr->lv_snapshot_prev = last;
2189		last->lv_snapshot_next = lv_ptr;
2190		up_write(&org->lv_lock);
2191	}
2192
2193	/* activate the logical volume */
2194	if(activate)
2195		lv_ptr->lv_status |= LV_ACTIVE;
2196	else
2197		lv_ptr->lv_status &= ~LV_ACTIVE;
2198
2199	if ( lv_ptr->lv_access & LV_WRITE)
2200		set_device_ro(lv_ptr->lv_dev, 0);
2201	else
2202		set_device_ro(lv_ptr->lv_dev, 1);
2203
2204#ifdef	LVM_VFS_ENHANCEMENT
2205/* VFS function call to unlock the filesystem */
2206	if (lv_ptr->lv_access & LV_SNAPSHOT)
2207		unlockfs(lv_ptr->lv_snapshot_org->lv_dev);
2208#endif
2209
2210	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
2211	    lvm_fs_create_lv(vg_ptr, lv_ptr);
2212	return 0;
2213} /* lvm_do_lv_create() */
2214
2215
2216/*
2217 * character device support function logical volume remove
2218 */
2219static int lvm_do_lv_remove(int minor, char *lv_name, int l)
2220{
2221	uint le, p;
2222	vg_t *vg_ptr = vg[VG_CHR(minor)];
2223	lv_t *lv_ptr;
2224
2225	if (!vg_ptr)
2226		return -ENXIO;
2227
2228	if (l == -1) {
2229		for (l = 0; l < vg_ptr->lv_max; l++) {
2230			if (vg_ptr->lv[l] != NULL &&
2231			    strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) {
2232				break;
2233			}
2234		}
2235	}
2236	if (l == vg_ptr->lv_max) return -ENXIO;
2237
2238	lv_ptr = vg_ptr->lv[l];
2239#ifdef LVM_TOTAL_RESET
2240	if (lv_ptr->lv_open > 0 && lvm_reset_spindown == 0)
2241#else
2242	if (lv_ptr->lv_open > 0)
2243#endif
2244		return -EBUSY;
2245
2246	/* check for deletion of snapshot source while
2247	   snapshot volume still exists */
2248	if ((lv_ptr->lv_access & LV_SNAPSHOT_ORG) &&
2249	    lv_ptr->lv_snapshot_next != NULL)
2250		return -EPERM;
2251
2252	lvm_fs_remove_lv(vg_ptr, lv_ptr);
2253
2254	if (lv_ptr->lv_access & LV_SNAPSHOT) {
2255		/*
2256		 * Atomically make the the snapshot invisible
2257		 * to the original lv before playing with it.
2258		 */
2259		lv_t * org = lv_ptr->lv_snapshot_org;
2260		down_write(&org->lv_lock);
2261
2262		/* remove this snapshot logical volume from the chain */
2263		lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next;
2264		if (lv_ptr->lv_snapshot_next != NULL) {
2265			lv_ptr->lv_snapshot_next->lv_snapshot_prev =
2266			    lv_ptr->lv_snapshot_prev;
2267		}
2268
2269		/* no more snapshots? */
2270		if (!org->lv_snapshot_next) {
2271			org->lv_access &= ~LV_SNAPSHOT_ORG;
2272		}
2273		up_write(&org->lv_lock);
2274
2275		lvm_snapshot_release(lv_ptr);
2276
2277		/* Update the VG PE(s) used by snapshot reserve space. */
2278		vg_ptr->pe_allocated -= lv_ptr->lv_allocated_snapshot_le;
2279	}
2280
2281	lv_ptr->lv_status |= LV_SPINDOWN;
2282
2283	/* sync the buffers */
2284	fsync_dev(lv_ptr->lv_dev);
2285
2286	lv_ptr->lv_status &= ~LV_ACTIVE;
2287
2288	/* invalidate the buffers */
2289	invalidate_buffers(lv_ptr->lv_dev);
2290
2291	/* reset generic hd */
2292	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1;
2293	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0;
2294	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = 0;
2295	lvm_size[MINOR(lv_ptr->lv_dev)] = 0;
2296
2297	/* reset VG/LV mapping */
2298	vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = ABS_MAX_VG;
2299	vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = -1;
2300
2301	/* correct the PE count in PVs if this is not a snapshot
2302           logical volume */
2303	if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
2304		/* only if this is no snapshot logical volume because
2305		   we share the lv_current_pe[] structs with the
2306		   original logical volume */
2307		for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
2308			vg_ptr->pe_allocated--;
2309			for (p = 0; p < vg_ptr->pv_cur; p++) {
2310				if (vg_ptr->pv[p]->pv_dev ==
2311				    lv_ptr->lv_current_pe[le].dev)
2312					vg_ptr->pv[p]->pe_allocated--;
2313			}
2314		}
2315		vfree(lv_ptr->lv_current_pe);
2316	}
2317
2318	P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
2319	kfree(lv_ptr);
2320	vg_ptr->lv[l] = NULL;
2321	vg_ptr->lv_cur--;
2322	return 0;
2323} /* lvm_do_lv_remove() */
2324
2325
2326/*
2327 * logical volume extend / reduce
2328 */
2329static int __extend_reduce_snapshot(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) {
2330	ulong size;
2331	lv_block_exception_t *lvbe;
2332
2333	if (!new_lv->lv_block_exception)
2334		return -ENXIO;
2335
2336	size = new_lv->lv_remap_end * sizeof(lv_block_exception_t);
2337	if ((lvbe = vmalloc(size)) == NULL) {
2338		printk(KERN_CRIT
2339		       "%s -- lvm_do_lv_extend_reduce: vmalloc "
2340		       "error LV_BLOCK_EXCEPTION of %lu Byte at line %d\n",
2341		       lvm_name, size, __LINE__);
2342		return -ENOMEM;
2343	}
2344
2345	if ((new_lv->lv_remap_end > old_lv->lv_remap_end) &&
2346	    (copy_from_user(lvbe, new_lv->lv_block_exception, size))) {
2347		vfree(lvbe);
2348		return -EFAULT;
2349	}
2350	new_lv->lv_block_exception = lvbe;
2351
2352	if (lvm_snapshot_alloc_hash_table(new_lv)) {
2353		vfree(new_lv->lv_block_exception);
2354		return -ENOMEM;
2355	}
2356
2357	return 0;
2358}
2359
2360static int __extend_reduce(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) {
2361	ulong size, l, p, end;
2362	pe_t *pe;
2363
2364	/* allocate space for new pe structures */
2365	size = new_lv->lv_current_le * sizeof(pe_t);
2366	if ((pe = vmalloc(size)) == NULL) {
2367		printk(KERN_CRIT
2368		       "%s -- lvm_do_lv_extend_reduce: "
2369		       "vmalloc error LV_CURRENT_PE of %lu Byte at line %d\n",
2370		       lvm_name, size, __LINE__);
2371		return -ENOMEM;
2372	}
2373
2374	/* get the PE structures from user space */
2375	if (copy_from_user(pe, new_lv->lv_current_pe, size)) {
2376		if(old_lv->lv_access & LV_SNAPSHOT)
2377			vfree(new_lv->lv_snapshot_hash_table);
2378		vfree(pe);
2379		return -EFAULT;
2380	}
2381
2382	new_lv->lv_current_pe = pe;
2383
2384	/* reduce allocation counters on PV(s) */
2385	for (l = 0; l < old_lv->lv_allocated_le; l++) {
2386		vg_ptr->pe_allocated--;
2387		for (p = 0; p < vg_ptr->pv_cur; p++) {
2388			if (vg_ptr->pv[p]->pv_dev ==
2389			    old_lv->lv_current_pe[l].dev) {
2390				vg_ptr->pv[p]->pe_allocated--;
2391				break;
2392			}
2393		}
2394	}
2395
2396	/* extend the PE count in PVs */
2397	for (l = 0; l < new_lv->lv_allocated_le; l++) {
2398		vg_ptr->pe_allocated++;
2399		for (p = 0; p < vg_ptr->pv_cur; p++) {
2400			if (vg_ptr->pv[p]->pv_dev ==
2401                            new_lv->lv_current_pe[l].dev) {
2402				vg_ptr->pv[p]->pe_allocated++;
2403				break;
2404			}
2405		}
2406	}
2407
2408	/* save availiable i/o statistic data */
2409	if (old_lv->lv_stripes < 2) {	/* linear logical volume */
2410		end = min(old_lv->lv_current_le, new_lv->lv_current_le);
2411		for (l = 0; l < end; l++) {
2412			new_lv->lv_current_pe[l].reads +=
2413				old_lv->lv_current_pe[l].reads;
2414
2415			new_lv->lv_current_pe[l].writes +=
2416				old_lv->lv_current_pe[l].writes;
2417		}
2418
2419	} else {		/* striped logical volume */
2420		uint i, j, source, dest, end, old_stripe_size, new_stripe_size;
2421
2422		old_stripe_size = old_lv->lv_allocated_le / old_lv->lv_stripes;
2423		new_stripe_size = new_lv->lv_allocated_le / new_lv->lv_stripes;
2424		end = min(old_stripe_size, new_stripe_size);
2425
2426		for (i = source = dest = 0; i < new_lv->lv_stripes; i++) {
2427			for (j = 0; j < end; j++) {
2428				new_lv->lv_current_pe[dest + j].reads +=
2429				    old_lv->lv_current_pe[source + j].reads;
2430				new_lv->lv_current_pe[dest + j].writes +=
2431				    old_lv->lv_current_pe[source + j].writes;
2432			}
2433			source += old_stripe_size;
2434			dest += new_stripe_size;
2435		}
2436	}
2437
2438	return 0;
2439}
2440
2441static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *new_lv)
2442{
2443	int r;
2444	ulong l, e, size;
2445	vg_t *vg_ptr = vg[VG_CHR(minor)];
2446	lv_t *old_lv;
2447	pe_t *pe;
2448
2449	if (!vg_ptr)
2450		return -ENXIO;
2451
2452	if ((pe = new_lv->lv_current_pe) == NULL)
2453		return -EINVAL;
2454
2455	for (l = 0; l < vg_ptr->lv_max; l++)
2456		if (vg_ptr->lv[l] && !strcmp(vg_ptr->lv[l]->lv_name, lv_name))
2457			break;
2458
2459	if (l == vg_ptr->lv_max)
2460		return -ENXIO;
2461
2462	old_lv = vg_ptr->lv[l];
2463
2464	if (old_lv->lv_access & LV_SNAPSHOT) {
2465		/* only perform this operation on active snapshots */
2466		if (old_lv->lv_status & LV_ACTIVE)
2467			r = __extend_reduce_snapshot(vg_ptr, old_lv, new_lv);
2468		else
2469			r = -EPERM;
2470
2471	} else
2472		r = __extend_reduce(vg_ptr, old_lv, new_lv);
2473
2474	if(r)
2475		return r;
2476
2477	/* copy relevent fields */
2478	down_write(&old_lv->lv_lock);
2479
2480	if(new_lv->lv_access & LV_SNAPSHOT) {
2481		size = (new_lv->lv_remap_end > old_lv->lv_remap_end) ?
2482			old_lv->lv_remap_ptr : new_lv->lv_remap_end;
2483		size *= sizeof(lv_block_exception_t);
2484		memcpy(new_lv->lv_block_exception,
2485		       old_lv->lv_block_exception, size);
2486
2487		old_lv->lv_remap_end = new_lv->lv_remap_end;
2488		old_lv->lv_block_exception = new_lv->lv_block_exception;
2489		old_lv->lv_snapshot_hash_table =
2490			new_lv->lv_snapshot_hash_table;
2491		old_lv->lv_snapshot_hash_table_size =
2492			new_lv->lv_snapshot_hash_table_size;
2493		old_lv->lv_snapshot_hash_mask =
2494			new_lv->lv_snapshot_hash_mask;
2495
2496		for (e = 0; e < new_lv->lv_remap_ptr; e++)
2497			lvm_hash_link(new_lv->lv_block_exception + e,
2498				      new_lv->lv_block_exception[e].rdev_org,
2499				      new_lv->lv_block_exception[e].rsector_org,
2500				      new_lv);
2501
2502	} else {
2503
2504		vfree(old_lv->lv_current_pe);
2505		vfree(old_lv->lv_snapshot_hash_table);
2506
2507		old_lv->lv_size = new_lv->lv_size;
2508		old_lv->lv_allocated_le = new_lv->lv_allocated_le;
2509		old_lv->lv_current_le = new_lv->lv_current_le;
2510		old_lv->lv_current_pe = new_lv->lv_current_pe;
2511		lvm_gendisk.part[MINOR(old_lv->lv_dev)].nr_sects =
2512			old_lv->lv_size;
2513		lvm_size[MINOR(old_lv->lv_dev)] = old_lv->lv_size >> 1;
2514
2515		if (old_lv->lv_access & LV_SNAPSHOT_ORG) {
2516			lv_t *snap;
2517			for(snap = old_lv->lv_snapshot_next; snap;
2518			    snap = snap->lv_snapshot_next) {
2519				down_write(&snap->lv_lock);
2520				snap->lv_current_pe = old_lv->lv_current_pe;
2521				snap->lv_allocated_le =
2522					old_lv->lv_allocated_le;
2523				snap->lv_current_le = old_lv->lv_current_le;
2524				snap->lv_size = old_lv->lv_size;
2525
2526				lvm_gendisk.part[MINOR(snap->lv_dev)].nr_sects
2527					= old_lv->lv_size;
2528				lvm_size[MINOR(snap->lv_dev)] =
2529					old_lv->lv_size >> 1;
2530				__update_hardsectsize(snap);
2531				up_write(&snap->lv_lock);
2532			}
2533		}
2534	}
2535
2536	__update_hardsectsize(old_lv);
2537	up_write(&old_lv->lv_lock);
2538
2539	return 0;
2540} /* lvm_do_lv_extend_reduce() */
2541
2542
2543/*
2544 * character device support function logical volume status by name
2545 */
2546static int lvm_do_lv_status_byname(vg_t *vg_ptr, void *arg)
2547{
2548	uint l;
2549	lv_status_byname_req_t lv_status_byname_req;
2550	void *saved_ptr1;
2551	void *saved_ptr2;
2552	lv_t *lv_ptr;
2553
2554	if (vg_ptr == NULL) return -ENXIO;
2555	if (copy_from_user(&lv_status_byname_req, arg,
2556			   sizeof(lv_status_byname_req_t)) != 0)
2557		return -EFAULT;
2558
2559	if (lv_status_byname_req.lv == NULL) return -EINVAL;
2560
2561	for (l = 0; l < vg_ptr->lv_max; l++) {
2562		if ((lv_ptr = vg_ptr->lv[l]) != NULL &&
2563		    strcmp(lv_ptr->lv_name,
2564			   lv_status_byname_req.lv_name) == 0) {
2565		        /* Save usermode pointers */
2566		        if (copy_from_user(&saved_ptr1, &lv_status_byname_req.lv->lv_current_pe, sizeof(void*)) != 0)
2567				return -EFAULT;
2568			if (copy_from_user(&saved_ptr2, &lv_status_byname_req.lv->lv_block_exception, sizeof(void*)) != 0)
2569			        return -EFAULT;
2570		        if (copy_to_user(lv_status_byname_req.lv,
2571					 lv_ptr,
2572					 sizeof(lv_t)) != 0)
2573				return -EFAULT;
2574			if (saved_ptr1 != NULL) {
2575				if (copy_to_user(saved_ptr1,
2576						 lv_ptr->lv_current_pe,
2577						 lv_ptr->lv_allocated_le *
2578				       		 sizeof(pe_t)) != 0)
2579					return -EFAULT;
2580			}
2581			/* Restore usermode pointers */
2582			if (copy_to_user(&lv_status_byname_req.lv->lv_current_pe, &saved_ptr1, sizeof(void*)) != 0)
2583			        return -EFAULT;
2584			return 0;
2585		}
2586	}
2587	return -ENXIO;
2588} /* lvm_do_lv_status_byname() */
2589
2590
2591/*
2592 * character device support function logical volume status by index
2593 */
2594static int lvm_do_lv_status_byindex(vg_t *vg_ptr,void *arg)
2595{
2596	lv_status_byindex_req_t lv_status_byindex_req;
2597	void *saved_ptr1;
2598	void *saved_ptr2;
2599	lv_t *lv_ptr;
2600
2601	if (vg_ptr == NULL) return -ENXIO;
2602	if (copy_from_user(&lv_status_byindex_req, arg,
2603			   sizeof(lv_status_byindex_req)) != 0)
2604		return -EFAULT;
2605
2606	if (lv_status_byindex_req.lv == NULL)
2607		return -EINVAL;
2608	if ( ( lv_ptr = vg_ptr->lv[lv_status_byindex_req.lv_index]) == NULL)
2609		return -ENXIO;
2610
2611	/* Save usermode pointers */
2612	if (copy_from_user(&saved_ptr1, &lv_status_byindex_req.lv->lv_current_pe, sizeof(void*)) != 0)
2613	        return -EFAULT;
2614	if (copy_from_user(&saved_ptr2, &lv_status_byindex_req.lv->lv_block_exception, sizeof(void*)) != 0)
2615	        return -EFAULT;
2616
2617	if (copy_to_user(lv_status_byindex_req.lv, lv_ptr, sizeof(lv_t)) != 0)
2618		return -EFAULT;
2619	if (saved_ptr1 != NULL) {
2620		if (copy_to_user(saved_ptr1,
2621				 lv_ptr->lv_current_pe,
2622				 lv_ptr->lv_allocated_le *
2623		       		 sizeof(pe_t)) != 0)
2624			return -EFAULT;
2625	}
2626
2627	/* Restore usermode pointers */
2628	if (copy_to_user(&lv_status_byindex_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0)
2629	        return -EFAULT;
2630
2631	return 0;
2632} /* lvm_do_lv_status_byindex() */
2633
2634
2635/*
2636 * character device support function logical volume status by device number
2637 */
2638static int lvm_do_lv_status_bydev(vg_t * vg_ptr, void * arg) {
2639	int l;
2640	lv_status_bydev_req_t lv_status_bydev_req;
2641	void *saved_ptr1;
2642	void *saved_ptr2;
2643	lv_t *lv_ptr;
2644
2645	if (vg_ptr == NULL) return -ENXIO;
2646	if (copy_from_user(&lv_status_bydev_req, arg,
2647			   sizeof(lv_status_bydev_req)) != 0)
2648		return -EFAULT;
2649
2650	for ( l = 0; l < vg_ptr->lv_max; l++) {
2651		if ( vg_ptr->lv[l] == NULL) continue;
2652		if ( vg_ptr->lv[l]->lv_dev == lv_status_bydev_req.dev) break;
2653	}
2654
2655	if ( l == vg_ptr->lv_max) return -ENXIO;
2656	lv_ptr = vg_ptr->lv[l];
2657
2658	/* Save usermode pointers */
2659	if (copy_from_user(&saved_ptr1, &lv_status_bydev_req.lv->lv_current_pe, sizeof(void*)) != 0)
2660	        return -EFAULT;
2661	if (copy_from_user(&saved_ptr2, &lv_status_bydev_req.lv->lv_block_exception, sizeof(void*)) != 0)
2662	        return -EFAULT;
2663
2664	if (copy_to_user(lv_status_bydev_req.lv, lv_ptr, sizeof(lv_t)) != 0)
2665		return -EFAULT;
2666	if (saved_ptr1 != NULL) {
2667		if (copy_to_user(saved_ptr1,
2668				 lv_ptr->lv_current_pe,
2669				 lv_ptr->lv_allocated_le *
2670		       		 sizeof(pe_t)) != 0)
2671			return -EFAULT;
2672	}
2673	/* Restore usermode pointers */
2674	if (copy_to_user(&lv_status_bydev_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0)
2675	        return -EFAULT;
2676
2677	return 0;
2678} /* lvm_do_lv_status_bydev() */
2679
2680
2681/*
2682 * character device support function rename a logical volume
2683 */
2684static int lvm_do_lv_rename(vg_t *vg_ptr, lv_req_t *lv_req, lv_t *lv)
2685{
2686	int l = 0;
2687	int ret = 0;
2688	lv_t *lv_ptr = NULL;
2689
2690	if (!vg_ptr)
2691		return -ENXIO;
2692
2693	for (l = 0; l < vg_ptr->lv_max; l++)
2694	{
2695		if ( (lv_ptr = vg_ptr->lv[l]) == NULL) continue;
2696		if (lv_ptr->lv_dev == lv->lv_dev)
2697		{
2698			lvm_fs_remove_lv(vg_ptr, lv_ptr);
2699			strncpy(lv_ptr->lv_name, lv_req->lv_name, NAME_LEN);
2700			lvm_fs_create_lv(vg_ptr, lv_ptr);
2701			break;
2702		}
2703	}
2704	if (l == vg_ptr->lv_max) ret = -ENODEV;
2705
2706	return ret;
2707} /* lvm_do_lv_rename */
2708
2709
2710/*
2711 * character device support function physical volume change
2712 */
2713static int lvm_do_pv_change(vg_t *vg_ptr, void *arg)
2714{
2715	uint p;
2716	pv_t *pv_ptr;
2717	struct block_device *bd;
2718
2719	if (vg_ptr == NULL) return -ENXIO;
2720	if (copy_from_user(&pv_change_req, arg,
2721			   sizeof(pv_change_req)) != 0)
2722		return -EFAULT;
2723
2724	for (p = 0; p < vg_ptr->pv_max; p++) {
2725		pv_ptr = vg_ptr->pv[p];
2726		if (pv_ptr != NULL &&
2727		    strcmp(pv_ptr->pv_name,
2728			       pv_change_req.pv_name) == 0) {
2729
2730			bd = pv_ptr->bd;
2731			if (copy_from_user(pv_ptr,
2732					   pv_change_req.pv,
2733					   sizeof(pv_t)) != 0)
2734				return -EFAULT;
2735			pv_ptr->bd = bd;
2736
2737			/* We don't need the PE list
2738			   in kernel space as with LVs pe_t list */
2739			pv_ptr->pe = NULL;
2740			return 0;
2741		}
2742	}
2743	return -ENXIO;
2744} /* lvm_do_pv_change() */
2745
2746/*
2747 * character device support function get physical volume status
2748 */
2749static int lvm_do_pv_status(vg_t *vg_ptr, void *arg)
2750{
2751	uint p;
2752	pv_t *pv_ptr;
2753
2754	if (vg_ptr == NULL) return -ENXIO;
2755	if (copy_from_user(&pv_status_req, arg,
2756			   sizeof(pv_status_req)) != 0)
2757		return -EFAULT;
2758
2759	for (p = 0; p < vg_ptr->pv_max; p++) {
2760		pv_ptr = vg_ptr->pv[p];
2761		if (pv_ptr != NULL &&
2762		    strcmp(pv_ptr->pv_name,
2763			       pv_status_req.pv_name) == 0) {
2764			if (copy_to_user(pv_status_req.pv,
2765					 pv_ptr,
2766				         sizeof(pv_t)) != 0)
2767				return -EFAULT;
2768			return 0;
2769		}
2770	}
2771	return -ENXIO;
2772} /* lvm_do_pv_status() */
2773
2774
2775/*
2776 * character device support function flush and invalidate all buffers of a PV
2777 */
2778static int lvm_do_pv_flush(void *arg)
2779{
2780	pv_flush_req_t pv_flush_req;
2781
2782	if (copy_from_user(&pv_flush_req, arg, sizeof(pv_flush_req)) != 0)
2783		return -EFAULT;
2784
2785	fsync_dev(pv_flush_req.pv_dev);
2786	invalidate_buffers(pv_flush_req.pv_dev);
2787
2788	return 0;
2789}
2790
2791
2792/*
2793 * support function initialize gendisk variables
2794 */
2795static void __init lvm_geninit(struct gendisk *lvm_gdisk)
2796{
2797	int i = 0;
2798
2799#ifdef DEBUG_GENDISK
2800	printk(KERN_DEBUG "%s -- lvm_gendisk\n", lvm_name);
2801#endif
2802
2803	for (i = 0; i < MAX_LV; i++) {
2804		lvm_gendisk.part[i].start_sect = -1;	/* avoid partition check */
2805		lvm_size[i] = lvm_gendisk.part[i].nr_sects = 0;
2806		lvm_blocksizes[i] = BLOCK_SIZE;
2807	}
2808
2809	blk_size[MAJOR_NR] = lvm_size;
2810	blksize_size[MAJOR_NR] = lvm_blocksizes;
2811	hardsect_size[MAJOR_NR] = lvm_hardsectsizes;
2812
2813	return;
2814} /* lvm_gen_init() */
2815
2816
2817
2818/* Must have down_write(_pe_lock) when we enqueue buffers */
2819static void _queue_io(struct buffer_head *bh, int rw) {
2820	if (bh->b_reqnext) BUG();
2821	bh->b_reqnext = _pe_requests;
2822	_pe_requests = bh;
2823}
2824
2825/* Must have down_write(_pe_lock) when we dequeue buffers */
2826static struct buffer_head *_dequeue_io(void)
2827{
2828	struct buffer_head *bh = _pe_requests;
2829	_pe_requests = NULL;
2830	return bh;
2831}
2832
2833/*
2834 * We do not need to hold _pe_lock to flush buffers.  bh should be taken from
2835 * _pe_requests under down_write(_pe_lock), and then _pe_requests can be set
2836 * NULL and we drop _pe_lock.  Any new buffers defered at this time will be
2837 * added to a new list, and the old buffers can have their I/O restarted
2838 * asynchronously.
2839 *
2840 * If, for some reason, the same PE is locked again before all of these writes
2841 * have finished, then these buffers will just be re-queued (i.e. no danger).
2842 */
2843static void _flush_io(struct buffer_head *bh)
2844{
2845	while (bh) {
2846		struct buffer_head *next = bh->b_reqnext;
2847		bh->b_reqnext = NULL;
2848		/* resubmit this buffer head */
2849		generic_make_request(WRITE, bh);
2850		bh = next;
2851	}
2852}
2853
2854
2855/*
2856 * we must open the pv's before we use them
2857 */
2858static int _open_pv(pv_t *pv) {
2859	int err;
2860	struct block_device *bd;
2861
2862	if (!(bd = bdget(kdev_t_to_nr(pv->pv_dev))))
2863		return -ENOMEM;
2864
2865	err = blkdev_get(bd, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE);
2866	if (err)
2867		return err;
2868
2869	pv->bd = bd;
2870	return 0;
2871}
2872
2873static void _close_pv(pv_t *pv) {
2874	if (pv) {
2875		struct block_device *bdev = pv->bd;
2876		pv->bd = NULL;
2877		if (bdev)
2878			blkdev_put(bdev, BDEV_FILE);
2879	}
2880}
2881
2882
2883static unsigned long _sectors_to_k(unsigned long sect)
2884{
2885	if(SECTOR_SIZE > 1024) {
2886		return sect * (SECTOR_SIZE / 1024);
2887	}
2888
2889	return sect / (1024 / SECTOR_SIZE);
2890}
2891
2892MODULE_AUTHOR("Heinz Mauelshagen, Sistina Software");
2893MODULE_DESCRIPTION("Logical Volume Manager");
2894#ifdef MODULE_LICENSE
2895MODULE_LICENSE("GPL");
2896#endif
2897
2898module_init(lvm_init);
2899module_exit(lvm_cleanup);
2900