1/* 2 * kernel/lvm.c 3 * 4 * Copyright (C) 1997 - 2002 Heinz Mauelshagen, Sistina Software 5 * 6 * February-November 1997 7 * April-May,July-August,November 1998 8 * January-March,May,July,September,October 1999 9 * January,February,July,September-November 2000 10 * January-May,June,October 2001 11 * May-July 2002 12 * 13 * 14 * LVM driver is free software; you can redistribute it and/or modify 15 * it under the terms of the GNU General Public License as published by 16 * the Free Software Foundation; either version 2, or (at your option) 17 * any later version. 18 * 19 * LVM driver is distributed in the hope that it will be useful, 20 * but WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 * GNU General Public License for more details. 23 * 24 * You should have received a copy of the GNU General Public License 25 * along with GNU CC; see the file COPYING. If not, write to 26 * the Free Software Foundation, 59 Temple Place - Suite 330, 27 * Boston, MA 02111-1307, USA. 28 * 29 */ 30 31/* 32 * Changelog 33 * 34 * 09/11/1997 - added chr ioctls VG_STATUS_GET_COUNT 35 * and VG_STATUS_GET_NAMELIST 36 * 18/01/1998 - change lvm_chr_open/close lock handling 37 * 30/04/1998 - changed LV_STATUS ioctl to LV_STATUS_BYNAME and 38 * - added LV_STATUS_BYINDEX ioctl 39 * - used lvm_status_byname_req_t and 40 * lvm_status_byindex_req_t vars 41 * 04/05/1998 - added multiple device support 42 * 08/05/1998 - added support to set/clear extendable flag in volume group 43 * 09/05/1998 - changed output of lvm_proc_get_global_info() because of 44 * support for free (eg. longer) logical volume names 45 * 12/05/1998 - added spin_locks (thanks to Pascal van Dam 46 * <pascal@ramoth.xs4all.nl>) 47 * 25/05/1998 - fixed handling of locked PEs in lvm_map() and 48 * lvm_chr_ioctl() 49 * 26/05/1998 - reactivated verify_area by access_ok 50 * 07/06/1998 - used vmalloc/vfree instead of kmalloc/kfree to go 51 * beyond 128/256 KB max allocation limit per call 52 * - #ifdef blocked spin_lock calls to avoid compile errors 53 * with 2.0.x 54 * 11/06/1998 - another enhancement to spinlock code in lvm_chr_open() 55 * and use of LVM_VERSION_CODE instead of my own macros 56 * (thanks to Michael Marxmeier <mike@msede.com>) 57 * 07/07/1998 - added statistics in lvm_map() 58 * 08/07/1998 - saved statistics in lvm_do_lv_extend_reduce() 59 * 25/07/1998 - used __initfunc macro 60 * 02/08/1998 - changes for official char/block major numbers 61 * 07/08/1998 - avoided init_module() and cleanup_module() to be static 62 * 30/08/1998 - changed VG lv_open counter from sum of LV lv_open counters 63 * to sum of LVs open (no matter how often each is) 64 * 01/09/1998 - fixed lvm_gendisk.part[] index error 65 * 07/09/1998 - added copying of lv_current_pe-array 66 * in LV_STATUS_BYINDEX ioctl 67 * 17/11/1998 - added KERN_* levels to printk 68 * 13/01/1999 - fixed LV index bug in lvm_do_lv_create() which hit lvrename 69 * 07/02/1999 - fixed spinlock handling bug in case of LVM_RESET 70 * by moving spinlock code from lvm_chr_open() 71 * to lvm_chr_ioctl() 72 * - added LVM_LOCK_LVM ioctl to lvm_chr_ioctl() 73 * - allowed LVM_RESET and retrieval commands to go ahead; 74 * only other update ioctls are blocked now 75 * - fixed pv->pe to NULL for pv_status 76 * - using lv_req structure in lvm_chr_ioctl() now 77 * - fixed NULL ptr reference bug in lvm_do_lv_extend_reduce() 78 * caused by uncontiguous PV array in lvm_chr_ioctl(VG_REDUCE) 79 * 09/02/1999 - changed BLKRASET and BLKRAGET in lvm_chr_ioctl() to 80 * handle lgoical volume private read ahead sector 81 * - implemented LV read_ahead handling with lvm_blk_read() 82 * and lvm_blk_write() 83 * 10/02/1999 - implemented 2.[12].* support function lvm_hd_name() 84 * to be used in drivers/block/genhd.c by disk_name() 85 * 12/02/1999 - fixed index bug in lvm_blk_ioctl(), HDIO_GETGEO 86 * - enhanced gendisk insert/remove handling 87 * 16/02/1999 - changed to dynamic block minor number allocation to 88 * have as much as 99 volume groups with 256 logical volumes 89 * as the grand total; this allows having 1 volume group with 90 * up to 256 logical volumes in it 91 * 21/02/1999 - added LV open count information to proc filesystem 92 * - substituted redundant LVM_RESET code by calls 93 * to lvm_do_vg_remove() 94 * 22/02/1999 - used schedule_timeout() to be more responsive 95 * in case of lvm_do_vg_remove() with lots of logical volumes 96 * 19/03/1999 - fixed NULL pointer bug in module_init/lvm_init 97 * 17/05/1999 - used DECLARE_WAIT_QUEUE_HEAD macro (>2.3.0) 98 * - enhanced lvm_hd_name support 99 * 03/07/1999 - avoided use of KERNEL_VERSION macro based ifdefs and 100 * memcpy_tofs/memcpy_fromfs macro redefinitions 101 * 06/07/1999 - corrected reads/writes statistic counter copy in case 102 * of striped logical volume 103 * 28/07/1999 - implemented snapshot logical volumes 104 * - lvm_chr_ioctl 105 * - LV_STATUS_BYINDEX 106 * - LV_STATUS_BYNAME 107 * - lvm_do_lv_create 108 * - lvm_do_lv_remove 109 * - lvm_map 110 * - new lvm_snapshot_remap_block 111 * - new lvm_snapshot_remap_new_block 112 * 08/10/1999 - implemented support for multiple snapshots per 113 * original logical volume 114 * 12/10/1999 - support for 2.3.19 115 * 11/11/1999 - support for 2.3.28 116 * 21/11/1999 - changed lvm_map() interface to buffer_head based 117 * 19/12/1999 - support for 2.3.33 118 * 01/01/2000 - changed locking concept in lvm_map(), 119 * lvm_do_vg_create() and lvm_do_lv_remove() 120 * 15/01/2000 - fixed PV_FLUSH bug in lvm_chr_ioctl() 121 * 24/01/2000 - ported to 2.3.40 including Alan Cox's pointer changes etc. 122 * 29/01/2000 - used kmalloc/kfree again for all small structures 123 * 20/01/2000 - cleaned up lvm_chr_ioctl by moving code 124 * to seperated functions 125 * - avoided "/dev/" in proc filesystem output 126 * - avoided inline strings functions lvm_strlen etc. 127 * 14/02/2000 - support for 2.3.43 128 * - integrated Andrea Arcagneli's snapshot code 129 * 25/06/2000 - james (chip) , IKKHAYD! roffl 130 * 26/06/2000 - enhanced lv_extend_reduce for snapshot logical volume 131 * support 132 * 06/09/2000 - added devfs support 133 * 07/09/2000 - changed IOP version to 9 134 * - started to add new char ioctl LV_STATUS_BYDEV_T to support 135 * getting an lv_t based on the dev_t of the Logical Volume 136 * 14/09/2000 - enhanced lvm_do_lv_create to upcall VFS functions 137 * to sync and lock, activate snapshot and unlock the FS 138 * (to support journaled filesystems) 139 * 18/09/2000 - hardsector size support 140 * 27/09/2000 - implemented lvm_do_lv_rename() and lvm_do_vg_rename() 141 * 30/10/2000 - added Andi Kleen's LV_BMAP ioctl to support LILO 142 * 01/11/2000 - added memory information on hash tables to 143 * lvm_proc_get_global_info() 144 * 02/11/2000 - implemented /proc/lvm/ hierarchy 145 * 22/11/2000 - changed lvm_do_create_proc_entry_of_pv () to work 146 * with devfs 147 * 26/11/2000 - corrected #ifdef locations for PROC_FS 148 * 28/11/2000 - fixed lvm_do_vg_extend() NULL pointer BUG 149 * - fixed lvm_do_create_proc_entry_of_pv() buffer tampering BUG 150 * 08/01/2001 - Removed conditional compiles related to PROC_FS, 151 * procfs is always supported now. (JT) 152 * 12/01/2001 - avoided flushing logical volume in case of shrinking 153 * because of unecessary overhead in case of heavy updates 154 * 25/01/2001 - Allow RO open of an inactive LV so it can be reactivated. 155 * 31/01/2001 - removed blk_init_queue/blk_cleanup_queue queueing will be 156 * handled by the proper devices. 157 * - If you try and BMAP a snapshot you now get an -EPERM 158 * 01/01/2001 - lvm_map() now calls buffer_IO_error on error for 2.4 159 * - factored __remap_snapshot out of lvm_map 160 * 12/02/2001 - move devfs code to create VG before LVs 161 * 13/02/2001 - allow VG_CREATE on /dev/lvm 162 * 14/02/2001 - removed modversions.h 163 * - tidied device defines for blk.h 164 * - tidied debug statements 165 * - bug: vg[] member not set back to NULL if activation fails 166 * - more lvm_map tidying 167 * 15/02/2001 - register /dev/lvm with devfs correctly (major/minor 168 * were swapped) 169 * 19/02/2001 - preallocated buffer_heads for rawio when using 170 * snapshots [JT] 171 * 28/02/2001 - introduced the P_DEV macro and changed some internel 172 * functions to be static [AD] 173 * 28/02/2001 - factored lvm_get_snapshot_use_rate out of blk_ioctl [AD] 174 * - fixed user address accessing bug in lvm_do_lv_create() 175 * where the check for an existing LV takes place right at 176 * the beginning 177 * 01/03/2001 - Add VG_CREATE_OLD for IOP 10 compatibility 178 * 02/03/2001 - Don't destroy usermode pointers in lv_t structures duing 179 * LV_STATUS_BYxxx 180 * and remove redundant lv_t variables from same. 181 * - avoid compilation of lvm_dummy_device_request in case of 182 * Linux >= 2.3.0 to avoid a warning 183 * - added lvm_name argument to printk in buffer allocation 184 * in order to avoid a warning 185 * 04/03/2001 - moved linux/version.h above first use of KERNEL_VERSION 186 * macros 187 * 05/03/2001 - restore copying pe_t array in lvm_do_lv_status_byname. For 188 * lvdisplay -v (PC) 189 * - restore copying pe_t array in lvm_do_lv_status_byindex (HM) 190 * - added copying pe_t array in lvm_do_lv_status_bydev (HM) 191 * - enhanced lvm_do_lv_status_by{name,index,dev} to be capable 192 * to copy the lv_block_exception_t array to userspace (HM) 193 * 08/03/2001 - initialize new lv_ptr->lv_COW_table_iobuf for snapshots; 194 * removed obsolete lv_ptr->lv_COW_table_page initialization 195 * - factored lvm_do_pv_flush out of lvm_chr_ioctl (HM) 196 * 09/03/2001 - Added _lock_open_count to ensure we only drop the lock 197 * when the locking process closes. 198 * 05/04/2001 - Defer writes to an extent that is being moved [JT] 199 * 05/04/2001 - use b_rdev and b_rsector rather than b_dev and b_blocknr in 200 * lvm_map() in order to make stacking devices more happy (HM) 201 * 11/04/2001 - cleaned up the pvmove queue code. I no longer retain the 202 * rw flag, instead WRITEA's are just dropped [JT] 203 * 30/04/2001 - added KERNEL_VERSION > 2.4.3 get_hardsect_size() rather 204 * than get_hardblocksize() call 205 * 03/05/2001 - Use copy_to/from_user to preserve pointers in 206 * lvm_do_status_by* 207 * 11/05/2001 - avoid accesses to inactive snapshot data in 208 * __update_hardsectsize() and lvm_do_lv_extend_reduce() (JW) 209 * 28/05/2001 - implemented missing BLKSSZGET ioctl 210 * 05/06/2001 - Move _pe_lock out of fast path for lvm_map when no PEs 211 * locked. Make buffer queue flush not need locking. 212 * Fix lvm_user_bmap() to set b_rsector for new lvm_map(). [AED] 213 * 30/06/2001 - Speed up __update_hardsectsize() by checking if PVs have 214 * the same hardsectsize (very likely) before scanning all LEs 215 * in the LV each time. [AED] 216 * 12/10/2001 - Use add/del_gendisk() routines in 2.4.10+ 217 * 01/11/2001 - Backport read_ahead change from Linus kernel [AED] 218 * 24/05/2002 - fixed locking bug in lvm_do_le_remap() introduced with 1.0.4 219 * 13/06/2002 - use blk_ioctl() to support various standard block ioctls 220 * - support HDIO_GETGEO_BIG ioctl 221 * 05/07/2002 - fixed OBO error on vg array access [benh@kernel.crashing.org] 222 * 22/07/2002 - streamlined blk_ioctl() call 223 * 224 */ 225 226#include <linux/version.h> 227 228#define MAJOR_NR LVM_BLK_MAJOR 229#define DEVICE_OFF(device) 230#define LOCAL_END_REQUEST 231 232/* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */ 233/* #define LVM_VFS_ENHANCEMENT */ 234 235#include <linux/config.h> 236#include <linux/module.h> 237#include <linux/kernel.h> 238#include <linux/vmalloc.h> 239 240#include <linux/slab.h> 241#include <linux/init.h> 242 243#include <linux/hdreg.h> 244#include <linux/stat.h> 245#include <linux/fs.h> 246#include <linux/proc_fs.h> 247#include <linux/blkdev.h> 248#include <linux/genhd.h> 249#include <linux/locks.h> 250 251 252#include <linux/devfs_fs_kernel.h> 253#include <linux/smp_lock.h> 254#include <asm/ioctl.h> 255#include <asm/segment.h> 256#include <asm/uaccess.h> 257 258#ifdef CONFIG_KERNELD 259#include <linux/kerneld.h> 260#endif 261 262#include <linux/blk.h> 263#include <linux/blkpg.h> 264 265#include <linux/errno.h> 266#include <linux/lvm.h> 267 268#include "lvm-internal.h" 269 270#define LVM_CORRECT_READ_AHEAD(a) \ 271do { \ 272 if ((a) < LVM_MIN_READ_AHEAD || \ 273 (a) > LVM_MAX_READ_AHEAD) \ 274 (a) = LVM_DEFAULT_READ_AHEAD; \ 275 read_ahead[MAJOR_NR] = (a); \ 276} while(0) 277 278#ifndef WRITEA 279# define WRITEA WRITE 280#endif 281 282 283/* 284 * External function prototypes 285 */ 286static int lvm_make_request_fn(request_queue_t*, int, struct buffer_head*); 287 288static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong); 289static int lvm_blk_open(struct inode *, struct file *); 290 291static int lvm_blk_close(struct inode *, struct file *); 292static int lvm_get_snapshot_use_rate(lv_t *lv_ptr, void *arg); 293static int lvm_user_bmap(struct inode *, struct lv_bmap *); 294 295static int lvm_chr_open(struct inode *, struct file *); 296static int lvm_chr_close(struct inode *, struct file *); 297static int lvm_chr_ioctl(struct inode *, struct file *, uint, ulong); 298 299 300/* End external function prototypes */ 301 302 303/* 304 * Internal function prototypes 305 */ 306static void lvm_cleanup(void); 307static void lvm_init_vars(void); 308 309#ifdef LVM_HD_NAME 310extern void (*lvm_hd_name_ptr) (char *, int); 311#endif 312static int lvm_map(struct buffer_head *, int); 313static int lvm_do_lock_lvm(void); 314static int lvm_do_le_remap(vg_t *, void *); 315 316static int lvm_do_pv_create(pv_t *, vg_t *, ulong); 317static int lvm_do_pv_remove(vg_t *, ulong); 318static int lvm_do_lv_create(int, char *, lv_t *); 319static int lvm_do_lv_extend_reduce(int, char *, lv_t *); 320static int lvm_do_lv_remove(int, char *, int); 321static int lvm_do_lv_rename(vg_t *, lv_req_t *, lv_t *); 322static int lvm_do_lv_status_byname(vg_t *r, void *); 323static int lvm_do_lv_status_byindex(vg_t *, void *); 324static int lvm_do_lv_status_bydev(vg_t *, void *); 325 326static int lvm_do_pe_lock_unlock(vg_t *r, void *); 327 328static int lvm_do_pv_change(vg_t*, void*); 329static int lvm_do_pv_status(vg_t *, void *); 330static int lvm_do_pv_flush(void *); 331 332static int lvm_do_vg_create(void *, int minor); 333static int lvm_do_vg_extend(vg_t *, void *); 334static int lvm_do_vg_reduce(vg_t *, void *); 335static int lvm_do_vg_rename(vg_t *, void *); 336static int lvm_do_vg_remove(int); 337static void lvm_geninit(struct gendisk *); 338static void __update_hardsectsize(lv_t *lv); 339 340 341static void _queue_io(struct buffer_head *bh, int rw); 342static struct buffer_head *_dequeue_io(void); 343static void _flush_io(struct buffer_head *bh); 344 345static int _open_pv(pv_t *pv); 346static void _close_pv(pv_t *pv); 347 348static unsigned long _sectors_to_k(unsigned long sect); 349 350#ifdef LVM_HD_NAME 351void lvm_hd_name(char *, int); 352#endif 353/* END Internal function prototypes */ 354 355 356/* variables */ 357char *lvm_version = "LVM version "LVM_RELEASE_NAME"("LVM_RELEASE_DATE")"; 358ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION; 359int loadtime = 0; 360const char *const lvm_name = LVM_NAME; 361 362 363/* volume group descriptor area pointers */ 364vg_t *vg[ABS_MAX_VG + 1]; 365 366/* map from block minor number to VG and LV numbers */ 367static struct { 368 int vg_number; 369 int lv_number; 370} vg_lv_map[ABS_MAX_LV]; 371 372 373/* Request structures (lvm_chr_ioctl()) */ 374static pv_change_req_t pv_change_req; 375static pv_status_req_t pv_status_req; 376volatile static pe_lock_req_t pe_lock_req; 377static le_remap_req_t le_remap_req; 378static lv_req_t lv_req; 379 380#ifdef LVM_TOTAL_RESET 381static int lvm_reset_spindown = 0; 382#endif 383 384static char pv_name[NAME_LEN]; 385/* static char rootvg[NAME_LEN] = { 0, }; */ 386static int lock = 0; 387static int _lock_open_count = 0; 388static uint vg_count = 0; 389static long lvm_chr_open_count = 0; 390static DECLARE_WAIT_QUEUE_HEAD(lvm_wait); 391 392static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED; 393static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; 394 395static struct buffer_head *_pe_requests; 396static DECLARE_RWSEM(_pe_lock); 397 398 399struct file_operations lvm_chr_fops = { 400 owner: THIS_MODULE, 401 open: lvm_chr_open, 402 release: lvm_chr_close, 403 ioctl: lvm_chr_ioctl, 404}; 405 406/* block device operations structure needed for 2.3.38? and above */ 407struct block_device_operations lvm_blk_dops = 408{ 409 owner: THIS_MODULE, 410 open: lvm_blk_open, 411 release: lvm_blk_close, 412 ioctl: lvm_blk_ioctl, 413}; 414 415 416/* gendisk structures */ 417static struct hd_struct lvm_hd_struct[MAX_LV]; 418static int lvm_blocksizes[MAX_LV]; 419static int lvm_hardsectsizes[MAX_LV]; 420static int lvm_size[MAX_LV]; 421 422static struct gendisk lvm_gendisk = 423{ 424 major: MAJOR_NR, 425 major_name: LVM_NAME, 426 minor_shift: 0, 427 max_p: 1, 428 part: lvm_hd_struct, 429 sizes: lvm_size, 430 nr_real: MAX_LV, 431}; 432 433 434/* 435 * Driver initialization... 436 */ 437int lvm_init(void) 438{ 439 if (devfs_register_chrdev(LVM_CHAR_MAJOR, 440 lvm_name, &lvm_chr_fops) < 0) { 441 printk(KERN_ERR "%s -- devfs_register_chrdev failed\n", 442 lvm_name); 443 return -EIO; 444 } 445 if (devfs_register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0) 446 { 447 printk("%s -- devfs_register_blkdev failed\n", lvm_name); 448 if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) 449 printk(KERN_ERR 450 "%s -- devfs_unregister_chrdev failed\n", 451 lvm_name); 452 return -EIO; 453 } 454 455 lvm_init_fs(); 456 lvm_init_vars(); 457 lvm_geninit(&lvm_gendisk); 458 459 /* insert our gendisk at the corresponding major */ 460 add_gendisk(&lvm_gendisk); 461 462#ifdef LVM_HD_NAME 463 /* reference from drivers/block/genhd.c */ 464 lvm_hd_name_ptr = lvm_hd_name; 465#endif 466 467 blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn); 468 469 470 /* initialise the pe lock */ 471 pe_lock_req.lock = UNLOCK_PE; 472 473 /* optional read root VGDA */ 474/* 475 if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg); 476*/ 477 478#ifdef MODULE 479 printk(KERN_INFO "%s module loaded\n", lvm_version); 480#else 481 printk(KERN_INFO "%s\n", lvm_version); 482#endif 483 484 return 0; 485} /* lvm_init() */ 486 487/* 488 * cleanup... 489 */ 490 491static void lvm_cleanup(void) 492{ 493 if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) 494 printk(KERN_ERR "%s -- devfs_unregister_chrdev failed\n", 495 lvm_name); 496 if (devfs_unregister_blkdev(MAJOR_NR, lvm_name) < 0) 497 printk(KERN_ERR "%s -- devfs_unregister_blkdev failed\n", 498 lvm_name); 499 500 501 502 /* delete our gendisk from chain */ 503 del_gendisk(&lvm_gendisk); 504 505 blk_size[MAJOR_NR] = NULL; 506 blksize_size[MAJOR_NR] = NULL; 507 hardsect_size[MAJOR_NR] = NULL; 508 509#ifdef LVM_HD_NAME 510 /* reference from linux/drivers/block/genhd.c */ 511 lvm_hd_name_ptr = NULL; 512#endif 513 514 /* unregister with procfs and devfs */ 515 lvm_fin_fs(); 516 517#ifdef MODULE 518 printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name); 519#endif 520 521 return; 522} /* lvm_cleanup() */ 523 524/* 525 * support function to initialize lvm variables 526 */ 527static void __init lvm_init_vars(void) 528{ 529 int v; 530 531 loadtime = CURRENT_TIME; 532 533 lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; 534 535 pe_lock_req.lock = UNLOCK_PE; 536 pe_lock_req.data.lv_dev = 0; 537 pe_lock_req.data.pv_dev = 0; 538 pe_lock_req.data.pv_offset = 0; 539 540 /* Initialize VG pointers */ 541 for (v = 0; v < ABS_MAX_VG + 1; v++) 542 vg[v] = NULL; 543 544 /* Initialize LV -> VG association */ 545 for (v = 0; v < ABS_MAX_LV; v++) { 546 /* index ABS_MAX_VG never used for real VG */ 547 vg_lv_map[v].vg_number = ABS_MAX_VG; 548 vg_lv_map[v].lv_number = -1; 549 } 550 551 return; 552} /* lvm_init_vars() */ 553 554 555/******************************************************************** 556 * 557 * Character device functions 558 * 559 ********************************************************************/ 560 561#define MODE_TO_STR(mode) (mode) & FMODE_READ ? "READ" : "", \ 562 (mode) & FMODE_WRITE ? "WRITE" : "" 563 564/* 565 * character device open routine 566 */ 567static int lvm_chr_open(struct inode *inode, struct file *file) 568{ 569 int minor = MINOR(inode->i_rdev); 570 571 P_DEV("chr_open MINOR: %d VG#: %d mode: %s%s lock: %d\n", 572 minor, VG_CHR(minor), MODE_TO_STR(file->f_mode), lock); 573 574 /* super user validation */ 575 if (!capable(CAP_SYS_ADMIN)) return -EACCES; 576 577 /* Group special file open */ 578 if (VG_CHR(minor) > MAX_VG) return -ENXIO; 579 580 spin_lock(&lvm_lock); 581 if(lock == current->pid) 582 _lock_open_count++; 583 spin_unlock(&lvm_lock); 584 585 lvm_chr_open_count++; 586 587 MOD_INC_USE_COUNT; 588 589 return 0; 590} /* lvm_chr_open() */ 591 592 593/* 594 * character device i/o-control routine 595 * 596 * Only one changing process can do changing ioctl at one time, 597 * others will block. 598 * 599 */ 600static int lvm_chr_ioctl(struct inode *inode, struct file *file, 601 uint command, ulong a) 602{ 603 int minor = MINOR(inode->i_rdev); 604 uint extendable, l, v; 605 void *arg = (void *) a; 606 lv_t lv; 607 vg_t* vg_ptr = vg[VG_CHR(minor)]; 608 609 /* otherwise cc will complain about unused variables */ 610 (void) lvm_lock; 611 612 P_IOCTL("chr MINOR: %d command: 0x%X arg: %p VG#: %d mode: %s%s\n", 613 minor, command, arg, VG_CHR(minor), MODE_TO_STR(file->f_mode)); 614 615#ifdef LVM_TOTAL_RESET 616 if (lvm_reset_spindown > 0) return -EACCES; 617#endif 618 619 /* Main command switch */ 620 switch (command) { 621 case LVM_LOCK_LVM: 622 /* lock the LVM */ 623 return lvm_do_lock_lvm(); 624 625 case LVM_GET_IOP_VERSION: 626 /* check lvm version to ensure driver/tools+lib 627 interoperability */ 628 if (copy_to_user(arg, &lvm_iop_version, sizeof(ushort)) != 0) 629 return -EFAULT; 630 return 0; 631 632#ifdef LVM_TOTAL_RESET 633 case LVM_RESET: 634 /* lock reset function */ 635 lvm_reset_spindown = 1; 636 for (v = 0; v < ABS_MAX_VG; v++) { 637 if (vg[v] != NULL) lvm_do_vg_remove(v); 638 } 639 640#ifdef MODULE 641 while (GET_USE_COUNT(&__this_module) < 1) 642 MOD_INC_USE_COUNT; 643 while (GET_USE_COUNT(&__this_module) > 1) 644 MOD_DEC_USE_COUNT; 645#endif /* MODULE */ 646 lock = 0; /* release lock */ 647 wake_up_interruptible(&lvm_wait); 648 return 0; 649#endif /* LVM_TOTAL_RESET */ 650 651 652 case LE_REMAP: 653 /* remap a logical extent (after moving the physical extent) */ 654 return lvm_do_le_remap(vg_ptr,arg); 655 656 case PE_LOCK_UNLOCK: 657 /* lock/unlock i/o to a physical extent to move it to another 658 physical volume (move's done in user space's pvmove) */ 659 return lvm_do_pe_lock_unlock(vg_ptr,arg); 660 661 case VG_CREATE_OLD: 662 /* create a VGDA */ 663 return lvm_do_vg_create(arg, minor); 664 665 case VG_CREATE: 666 /* create a VGDA, assume VG number is filled in */ 667 return lvm_do_vg_create(arg, -1); 668 669 case VG_EXTEND: 670 /* extend a volume group */ 671 return lvm_do_vg_extend(vg_ptr, arg); 672 673 case VG_REDUCE: 674 /* reduce a volume group */ 675 return lvm_do_vg_reduce(vg_ptr, arg); 676 677 case VG_RENAME: 678 /* rename a volume group */ 679 return lvm_do_vg_rename(vg_ptr, arg); 680 681 case VG_REMOVE: 682 /* remove an inactive VGDA */ 683 return lvm_do_vg_remove(minor); 684 685 686 case VG_SET_EXTENDABLE: 687 /* set/clear extendability flag of volume group */ 688 if (vg_ptr == NULL) return -ENXIO; 689 if (copy_from_user(&extendable, arg, sizeof(extendable)) != 0) 690 return -EFAULT; 691 692 if (extendable == VG_EXTENDABLE || 693 extendable == ~VG_EXTENDABLE) { 694 if (extendable == VG_EXTENDABLE) 695 vg_ptr->vg_status |= VG_EXTENDABLE; 696 else 697 vg_ptr->vg_status &= ~VG_EXTENDABLE; 698 } else return -EINVAL; 699 return 0; 700 701 702 case VG_STATUS: 703 /* get volume group data (only the vg_t struct) */ 704 if (vg_ptr == NULL) return -ENXIO; 705 if (copy_to_user(arg, vg_ptr, sizeof(vg_t)) != 0) 706 return -EFAULT; 707 return 0; 708 709 710 case VG_STATUS_GET_COUNT: 711 /* get volume group count */ 712 if (copy_to_user(arg, &vg_count, sizeof(vg_count)) != 0) 713 return -EFAULT; 714 return 0; 715 716 717 case VG_STATUS_GET_NAMELIST: 718 /* get volume group names */ 719 for (l = v = 0; v < ABS_MAX_VG; v++) { 720 if (vg[v] != NULL) { 721 if (copy_to_user(arg + l * NAME_LEN, 722 vg[v]->vg_name, 723 NAME_LEN) != 0) 724 return -EFAULT; 725 l++; 726 } 727 } 728 return 0; 729 730 731 case LV_CREATE: 732 case LV_EXTEND: 733 case LV_REDUCE: 734 case LV_REMOVE: 735 case LV_RENAME: 736 /* create, extend, reduce, remove or rename a logical volume */ 737 if (vg_ptr == NULL) return -ENXIO; 738 if (copy_from_user(&lv_req, arg, sizeof(lv_req)) != 0) 739 return -EFAULT; 740 741 if (command != LV_REMOVE) { 742 if (copy_from_user(&lv, lv_req.lv, sizeof(lv_t)) != 0) 743 return -EFAULT; 744 } 745 switch (command) { 746 case LV_CREATE: 747 return lvm_do_lv_create(minor, lv_req.lv_name, &lv); 748 749 case LV_EXTEND: 750 case LV_REDUCE: 751 return lvm_do_lv_extend_reduce(minor, lv_req.lv_name, &lv); 752 case LV_REMOVE: 753 return lvm_do_lv_remove(minor, lv_req.lv_name, -1); 754 755 case LV_RENAME: 756 return lvm_do_lv_rename(vg_ptr, &lv_req, &lv); 757 } 758 759 760 761 762 case LV_STATUS_BYNAME: 763 /* get status of a logical volume by name */ 764 return lvm_do_lv_status_byname(vg_ptr, arg); 765 766 767 case LV_STATUS_BYINDEX: 768 /* get status of a logical volume by index */ 769 return lvm_do_lv_status_byindex(vg_ptr, arg); 770 771 772 case LV_STATUS_BYDEV: 773 /* get status of a logical volume by device */ 774 return lvm_do_lv_status_bydev(vg_ptr, arg); 775 776 777 case PV_CHANGE: 778 /* change a physical volume */ 779 return lvm_do_pv_change(vg_ptr,arg); 780 781 782 case PV_STATUS: 783 /* get physical volume data (pv_t structure only) */ 784 return lvm_do_pv_status(vg_ptr,arg); 785 786 787 case PV_FLUSH: 788 /* physical volume buffer flush/invalidate */ 789 return lvm_do_pv_flush(arg); 790 791 792 default: 793 printk(KERN_WARNING 794 "%s -- lvm_chr_ioctl: unknown command 0x%x\n", 795 lvm_name, command); 796 return -ENOTTY; 797 } 798 799 return 0; 800} /* lvm_chr_ioctl */ 801 802 803/* 804 * character device close routine 805 */ 806static int lvm_chr_close(struct inode *inode, struct file *file) 807{ 808 P_DEV("chr_close MINOR: %d VG#: %d\n", 809 MINOR(inode->i_rdev), VG_CHR(MINOR(inode->i_rdev))); 810 811#ifdef LVM_TOTAL_RESET 812 if (lvm_reset_spindown > 0) { 813 lvm_reset_spindown = 0; 814 lvm_chr_open_count = 0; 815 } 816#endif 817 818 if (lvm_chr_open_count > 0) lvm_chr_open_count--; 819 820 spin_lock(&lvm_lock); 821 if(lock == current->pid) { 822 if(!_lock_open_count) { 823 P_DEV("chr_close: unlocking LVM for pid %d\n", lock); 824 lock = 0; 825 wake_up_interruptible(&lvm_wait); 826 } else 827 _lock_open_count--; 828 } 829 spin_unlock(&lvm_lock); 830 831 MOD_DEC_USE_COUNT; 832 833 return 0; 834} /* lvm_chr_close() */ 835 836 837 838/******************************************************************** 839 * 840 * Block device functions 841 * 842 ********************************************************************/ 843 844/* 845 * block device open routine 846 */ 847static int lvm_blk_open(struct inode *inode, struct file *file) 848{ 849 int minor = MINOR(inode->i_rdev); 850 lv_t *lv_ptr; 851 vg_t *vg_ptr = vg[VG_BLK(minor)]; 852 853 P_DEV("blk_open MINOR: %d VG#: %d LV#: %d mode: %s%s\n", 854 minor, VG_BLK(minor), LV_BLK(minor), MODE_TO_STR(file->f_mode)); 855 856#ifdef LVM_TOTAL_RESET 857 if (lvm_reset_spindown > 0) 858 return -EPERM; 859#endif 860 861 if (vg_ptr != NULL && 862 (vg_ptr->vg_status & VG_ACTIVE) && 863 (lv_ptr = vg_ptr->lv[LV_BLK(minor)]) != NULL && 864 LV_BLK(minor) >= 0 && 865 LV_BLK(minor) < vg_ptr->lv_max) { 866 867 /* Check parallel LV spindown (LV remove) */ 868 if (lv_ptr->lv_status & LV_SPINDOWN) return -EPERM; 869 870 /* Check inactive LV and open for read/write */ 871 /* We need to be able to "read" an inactive LV 872 to re-activate it again */ 873 if ((file->f_mode & FMODE_WRITE) && 874 (!(lv_ptr->lv_status & LV_ACTIVE))) 875 return -EPERM; 876 877 if (!(lv_ptr->lv_access & LV_WRITE) && 878 (file->f_mode & FMODE_WRITE)) 879 return -EACCES; 880 881 882 /* be sure to increment VG counter */ 883 if (lv_ptr->lv_open == 0) vg_ptr->lv_open++; 884 lv_ptr->lv_open++; 885 886 MOD_INC_USE_COUNT; 887 888 P_DEV("blk_open OK, LV size %d\n", lv_ptr->lv_size); 889 890 return 0; 891 } 892 return -ENXIO; 893} /* lvm_blk_open() */ 894 895/* Deliver "hard disk geometry" */ 896static int _hdio_getgeo(ulong a, lv_t *lv_ptr, int what) 897{ 898 int ret = 0; 899 uchar heads = 128; 900 uchar sectors = 128; 901 ulong start = 0; 902 uint cylinders; 903 904 while ( heads * sectors > lv_ptr->lv_size) { 905 heads >>= 1; 906 sectors >>= 1; 907 } 908 cylinders = lv_ptr->lv_size / heads / sectors; 909 910 switch (what) { 911 case 0: 912 { 913 struct hd_geometry *hd = (struct hd_geometry *) a; 914 915 if (put_user(heads, &hd->heads) || 916 put_user(sectors, &hd->sectors) || 917 put_user((ushort) cylinders, &hd->cylinders) || 918 put_user(start, &hd->start)) 919 return -EFAULT; 920 break; 921 } 922 923#ifdef HDIO_GETGEO_BIG 924 case 1: 925 { 926 struct hd_big_geometry *hd = 927 (struct hd_big_geometry *) a; 928 929 if (put_user(heads, &hd->heads) || 930 put_user(sectors, &hd->sectors) || 931 put_user(cylinders, &hd->cylinders) || 932 put_user(start, &hd->start)) 933 return -EFAULT; 934 break; 935 } 936#endif 937 938 } 939 940 P_IOCTL("%s -- lvm_blk_ioctl -- cylinders: %d\n", 941 lvm_name, cylinders); 942 return ret; 943} 944 945 946/* 947 * block device i/o-control routine 948 */ 949static int lvm_blk_ioctl(struct inode *inode, struct file *file, 950 uint cmd, ulong a) 951{ 952 kdev_t dev = inode->i_rdev; 953 int minor = MINOR(dev), ret; 954 vg_t *vg_ptr = vg[VG_BLK(minor)]; 955 lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)]; 956 void *arg = (void *) a; 957 958 P_IOCTL("blk MINOR: %d cmd: 0x%X arg: %p VG#: %d LV#: %d " 959 "mode: %s%s\n", minor, cmd, arg, VG_BLK(minor), 960 LV_BLK(minor), MODE_TO_STR(file->f_mode)); 961 962 switch (cmd) { 963 case BLKRASET: 964 /* set read ahead for block device */ 965 ret = blk_ioctl(dev, cmd, a); 966 if (ret) 967 return ret; 968 lv_ptr->lv_read_ahead = (long) a; 969 LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); 970 break; 971 972 case HDIO_GETGEO: 973#ifdef HDIO_GETGEO_BIG 974 case HDIO_GETGEO_BIG: 975#endif 976 /* get disk geometry */ 977 P_IOCTL("%s -- lvm_blk_ioctl -- HDIO_GETGEO\n", 978 lvm_name); 979 if (!a) 980 return -EINVAL; 981 982 switch (cmd) { 983 case HDIO_GETGEO: 984 return _hdio_getgeo(a, lv_ptr, 0); 985#ifdef HDIO_GETGEO_BIG 986 case HDIO_GETGEO_BIG: 987 return _hdio_getgeo(a, lv_ptr, 1); 988#endif 989 } 990 991 case LV_BMAP: 992 /* turn logical block into (dev_t, block). non privileged. */ 993 /* don't bmap a snapshot, since the mapping can change */ 994 if (lv_ptr->lv_access & LV_SNAPSHOT) 995 return -EPERM; 996 997 return lvm_user_bmap(inode, (struct lv_bmap *) arg); 998 999 case LV_SET_ACCESS: 1000 /* set access flags of a logical volume */ 1001 if (!capable(CAP_SYS_ADMIN)) return -EACCES; 1002 1003 down_write(&lv_ptr->lv_lock); 1004 lv_ptr->lv_access = (ulong) arg; 1005 up_write(&lv_ptr->lv_lock); 1006 1007 if ( lv_ptr->lv_access & LV_WRITE) 1008 set_device_ro(lv_ptr->lv_dev, 0); 1009 else 1010 set_device_ro(lv_ptr->lv_dev, 1); 1011 break; 1012 1013 1014 case LV_SET_ALLOCATION: 1015 /* set allocation flags of a logical volume */ 1016 if (!capable(CAP_SYS_ADMIN)) return -EACCES; 1017 down_write(&lv_ptr->lv_lock); 1018 lv_ptr->lv_allocation = (ulong) arg; 1019 up_write(&lv_ptr->lv_lock); 1020 break; 1021 1022 case LV_SET_STATUS: 1023 /* set status flags of a logical volume */ 1024 if (!capable(CAP_SYS_ADMIN)) return -EACCES; 1025 if (!((ulong) arg & LV_ACTIVE) && lv_ptr->lv_open > 1) 1026 return -EPERM; 1027 down_write(&lv_ptr->lv_lock); 1028 lv_ptr->lv_status = (ulong) arg; 1029 up_write(&lv_ptr->lv_lock); 1030 break; 1031 1032 case LV_SNAPSHOT_USE_RATE: 1033 return lvm_get_snapshot_use_rate(lv_ptr, arg); 1034 1035 default: 1036 /* Handle rest here */ 1037 ret = blk_ioctl(dev, cmd, a); 1038 if (ret) 1039 printk(KERN_WARNING 1040 "%s -- lvm_blk_ioctl: unknown " 1041 "cmd 0x%x\n", 1042 lvm_name, cmd); 1043 return ret; 1044 } 1045 1046 return 0; 1047} /* lvm_blk_ioctl() */ 1048 1049 1050/* 1051 * block device close routine 1052 */ 1053static int lvm_blk_close(struct inode *inode, struct file *file) 1054{ 1055 int minor = MINOR(inode->i_rdev); 1056 vg_t *vg_ptr = vg[VG_BLK(minor)]; 1057 lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)]; 1058 1059 P_DEV("blk_close MINOR: %d VG#: %d LV#: %d\n", 1060 minor, VG_BLK(minor), LV_BLK(minor)); 1061 1062 if (lv_ptr->lv_open == 1) vg_ptr->lv_open--; 1063 lv_ptr->lv_open--; 1064 1065 MOD_DEC_USE_COUNT; 1066 1067 return 0; 1068} /* lvm_blk_close() */ 1069 1070static int lvm_get_snapshot_use_rate(lv_t *lv, void *arg) 1071{ 1072 lv_snapshot_use_rate_req_t lv_rate_req; 1073 1074 down_read(&lv->lv_lock); 1075 if (!(lv->lv_access & LV_SNAPSHOT)) { 1076 up_read(&lv->lv_lock); 1077 return -EPERM; 1078 } 1079 up_read(&lv->lv_lock); 1080 1081 if (copy_from_user(&lv_rate_req, arg, sizeof(lv_rate_req))) 1082 return -EFAULT; 1083 1084 if (lv_rate_req.rate < 0 || lv_rate_req.rate > 100) 1085 return -EINVAL; 1086 1087 switch (lv_rate_req.block) { 1088 case 0: 1089 down_write(&lv->lv_lock); 1090 lv->lv_snapshot_use_rate = lv_rate_req.rate; 1091 up_write(&lv->lv_lock); 1092 down_read(&lv->lv_lock); 1093 if (lv->lv_remap_ptr * 100 / lv->lv_remap_end < 1094 lv->lv_snapshot_use_rate) { 1095 up_read(&lv->lv_lock); 1096 interruptible_sleep_on(&lv->lv_snapshot_wait); 1097 down_read(&lv->lv_lock); 1098 } 1099 up_read(&lv->lv_lock); 1100 break; 1101 1102 case O_NONBLOCK: 1103 break; 1104 1105 default: 1106 return -EINVAL; 1107 } 1108 down_read(&lv->lv_lock); 1109 lv_rate_req.rate = lv->lv_remap_ptr * 100 / lv->lv_remap_end; 1110 up_read(&lv->lv_lock); 1111 1112 return copy_to_user(arg, &lv_rate_req, 1113 sizeof(lv_rate_req)) ? -EFAULT : 0; 1114} 1115 1116static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result) 1117{ 1118 struct buffer_head bh; 1119 unsigned long block; 1120 int err; 1121 1122 if (get_user(block, &user_result->lv_block)) 1123 return -EFAULT; 1124 1125 memset(&bh,0,sizeof bh); 1126 bh.b_blocknr = block; 1127 bh.b_dev = bh.b_rdev = inode->i_rdev; 1128 bh.b_size = lvm_get_blksize(bh.b_dev); 1129 bh.b_rsector = block * (bh.b_size >> 9); 1130 bh.b_end_io = NULL; 1131 if ((err = lvm_map(&bh, READ)) < 0) { 1132 printk("lvm map failed: %d\n", err); 1133 return -EINVAL; 1134 } 1135 1136 return put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) || 1137 put_user(bh.b_rsector/(bh.b_size>>9), &user_result->lv_block) ? 1138 -EFAULT : 0; 1139} 1140 1141 1142/* 1143 * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c 1144 * (see init_module/lvm_init) 1145 */ 1146static void __remap_snapshot(kdev_t rdev, ulong rsector, 1147 ulong pe_start, lv_t *lv, vg_t *vg) { 1148 1149 /* copy a chunk from the origin to a snapshot device */ 1150 down_write(&lv->lv_lock); 1151 1152 /* we must redo lvm_snapshot_remap_block in order to avoid a 1153 race condition in the gap where no lock was held */ 1154 if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) && 1155 !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv)) 1156 lvm_write_COW_table_block(vg, lv); 1157 1158 up_write(&lv->lv_lock); 1159} 1160 1161static inline void _remap_snapshot(kdev_t rdev, ulong rsector, 1162 ulong pe_start, lv_t *lv, vg_t *vg) { 1163 int r; 1164 1165 /* check to see if this chunk is already in the snapshot */ 1166 down_read(&lv->lv_lock); 1167 r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv); 1168 up_read(&lv->lv_lock); 1169 1170 if (!r) 1171 /* we haven't yet copied this block to the snapshot */ 1172 __remap_snapshot(rdev, rsector, pe_start, lv, vg); 1173} 1174 1175 1176/* 1177 * extents destined for a pe that is on the move should be deferred 1178 */ 1179static inline int _should_defer(kdev_t pv, ulong sector, uint32_t pe_size) { 1180 return ((pe_lock_req.lock == LOCK_PE) && 1181 (pv == pe_lock_req.data.pv_dev) && 1182 (sector >= pe_lock_req.data.pv_offset) && 1183 (sector < (pe_lock_req.data.pv_offset + pe_size))); 1184} 1185 1186static inline int _defer_extent(struct buffer_head *bh, int rw, 1187 kdev_t pv, ulong sector, uint32_t pe_size) 1188{ 1189 if (pe_lock_req.lock == LOCK_PE) { 1190 down_read(&_pe_lock); 1191 if (_should_defer(pv, sector, pe_size)) { 1192 up_read(&_pe_lock); 1193 down_write(&_pe_lock); 1194 if (_should_defer(pv, sector, pe_size)) 1195 _queue_io(bh, rw); 1196 up_write(&_pe_lock); 1197 return 1; 1198 } 1199 up_read(&_pe_lock); 1200 } 1201 return 0; 1202} 1203 1204 1205static int lvm_map(struct buffer_head *bh, int rw) 1206{ 1207 int minor = MINOR(bh->b_rdev); 1208 ulong index; 1209 ulong pe_start; 1210 ulong size = bh->b_size >> 9; 1211 ulong rsector_org = bh->b_rsector; 1212 ulong rsector_map; 1213 kdev_t rdev_map; 1214 vg_t *vg_this = vg[VG_BLK(minor)]; 1215 lv_t *lv = vg_this->lv[LV_BLK(minor)]; 1216 1217 1218 down_read(&lv->lv_lock); 1219 if (!(lv->lv_status & LV_ACTIVE)) { 1220 printk(KERN_ALERT 1221 "%s - lvm_map: ll_rw_blk for inactive LV %s\n", 1222 lvm_name, lv->lv_name); 1223 goto bad; 1224 } 1225 1226 if ((rw == WRITE || rw == WRITEA) && 1227 !(lv->lv_access & LV_WRITE)) { 1228 printk(KERN_CRIT 1229 "%s - lvm_map: ll_rw_blk write for readonly LV %s\n", 1230 lvm_name, lv->lv_name); 1231 goto bad; 1232 } 1233 1234 P_MAP("%s - lvm_map minor: %d *rdev: %s *rsector: %lu size:%lu\n", 1235 lvm_name, minor, 1236 kdevname(bh->b_rdev), 1237 rsector_org, size); 1238 1239 if (rsector_org + size > lv->lv_size) { 1240 printk(KERN_ALERT 1241 "%s - lvm_map access beyond end of device; *rsector: " 1242 "%lu or size: %lu wrong for minor: %2d\n", 1243 lvm_name, rsector_org, size, minor); 1244 goto bad; 1245 } 1246 1247 1248 if (lv->lv_stripes < 2) { /* linear mapping */ 1249 /* get the index */ 1250 index = rsector_org / vg_this->pe_size; 1251 pe_start = lv->lv_current_pe[index].pe; 1252 rsector_map = lv->lv_current_pe[index].pe + 1253 (rsector_org % vg_this->pe_size); 1254 rdev_map = lv->lv_current_pe[index].dev; 1255 1256 P_MAP("lv_current_pe[%ld].pe: %d rdev: %s rsector:%ld\n", 1257 index, lv->lv_current_pe[index].pe, 1258 kdevname(rdev_map), rsector_map); 1259 1260 } else { /* striped mapping */ 1261 ulong stripe_index; 1262 ulong stripe_length; 1263 1264 stripe_length = vg_this->pe_size * lv->lv_stripes; 1265 stripe_index = (rsector_org % stripe_length) / 1266 lv->lv_stripesize; 1267 index = rsector_org / stripe_length + 1268 (stripe_index % lv->lv_stripes) * 1269 (lv->lv_allocated_le / lv->lv_stripes); 1270 pe_start = lv->lv_current_pe[index].pe; 1271 rsector_map = lv->lv_current_pe[index].pe + 1272 (rsector_org % stripe_length) - 1273 (stripe_index % lv->lv_stripes) * lv->lv_stripesize - 1274 stripe_index / lv->lv_stripes * 1275 (lv->lv_stripes - 1) * lv->lv_stripesize; 1276 rdev_map = lv->lv_current_pe[index].dev; 1277 1278 P_MAP("lv_current_pe[%ld].pe: %d rdev: %s rsector:%ld\n" 1279 "stripe_length: %ld stripe_index: %ld\n", 1280 index, lv->lv_current_pe[index].pe, kdevname(rdev_map), 1281 rsector_map, stripe_length, stripe_index); 1282 } 1283 1284 /* 1285 * Queue writes to physical extents on the move until move completes. 1286 * Don't get _pe_lock until there is a reasonable expectation that 1287 * we need to queue this request, because this is in the fast path. 1288 */ 1289 if (rw == WRITE || rw == WRITEA) { 1290 if(_defer_extent(bh, rw, rdev_map, 1291 rsector_map, vg_this->pe_size)) { 1292 1293 up_read(&lv->lv_lock); 1294 return 0; 1295 } 1296 1297 lv->lv_current_pe[index].writes++; /* statistic */ 1298 } else 1299 lv->lv_current_pe[index].reads++; /* statistic */ 1300 1301 /* snapshot volume exception handling on physical device address base */ 1302 if (!(lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG))) 1303 goto out; 1304 1305 if (lv->lv_access & LV_SNAPSHOT) { /* remap snapshot */ 1306 if (lvm_snapshot_remap_block(&rdev_map, &rsector_map, 1307 pe_start, lv) < 0) 1308 goto bad; 1309 1310 } else if (rw == WRITE || rw == WRITEA) { /* snapshot origin */ 1311 lv_t *snap; 1312 1313 /* start with first snapshot and loop through all of 1314 them */ 1315 for (snap = lv->lv_snapshot_next; snap; 1316 snap = snap->lv_snapshot_next) { 1317 /* Check for inactive snapshot */ 1318 if (!(snap->lv_status & LV_ACTIVE)) 1319 continue; 1320 1321 /* Serializes the COW with the accesses to the 1322 snapshot device */ 1323 _remap_snapshot(rdev_map, rsector_map, 1324 pe_start, snap, vg_this); 1325 } 1326 } 1327 1328 out: 1329 bh->b_rdev = rdev_map; 1330 bh->b_rsector = rsector_map; 1331 up_read(&lv->lv_lock); 1332 return 1; 1333 1334 bad: 1335 if (bh->b_end_io) 1336 buffer_IO_error(bh); 1337 up_read(&lv->lv_lock); 1338 return -1; 1339} /* lvm_map() */ 1340 1341 1342/* 1343 * internal support functions 1344 */ 1345 1346#ifdef LVM_HD_NAME 1347/* 1348 * generate "hard disk" name 1349 */ 1350void lvm_hd_name(char *buf, int minor) 1351{ 1352 int len = 0; 1353 lv_t *lv_ptr; 1354 1355 if (vg[VG_BLK(minor)] == NULL || 1356 (lv_ptr = vg[VG_BLK(minor)]->lv[LV_BLK(minor)]) == NULL) 1357 return; 1358 len = strlen(lv_ptr->lv_name) - 5; 1359 memcpy(buf, &lv_ptr->lv_name[5], len); 1360 buf[len] = 0; 1361 return; 1362} 1363#endif 1364 1365 1366 1367 1368/* 1369 * make request function 1370 */ 1371static int lvm_make_request_fn(request_queue_t *q, 1372 int rw, 1373 struct buffer_head *bh) 1374{ 1375 return (lvm_map(bh, rw) <= 0) ? 0 : 1; 1376} 1377 1378 1379/******************************************************************** 1380 * 1381 * Character device support functions 1382 * 1383 ********************************************************************/ 1384/* 1385 * character device support function logical volume manager lock 1386 */ 1387static int lvm_do_lock_lvm(void) 1388{ 1389lock_try_again: 1390 spin_lock(&lvm_lock); 1391 if (lock != 0 && lock != current->pid) { 1392 P_DEV("lvm_do_lock_lvm: locked by pid %d ...\n", lock); 1393 spin_unlock(&lvm_lock); 1394 interruptible_sleep_on(&lvm_wait); 1395 if (current->sigpending != 0) 1396 return -EINTR; 1397#ifdef LVM_TOTAL_RESET 1398 if (lvm_reset_spindown > 0) 1399 return -EACCES; 1400#endif 1401 goto lock_try_again; 1402 } 1403 lock = current->pid; 1404 P_DEV("lvm_do_lock_lvm: locking LVM for pid %d\n", lock); 1405 spin_unlock(&lvm_lock); 1406 return 0; 1407} /* lvm_do_lock_lvm */ 1408 1409 1410/* 1411 * character device support function lock/unlock physical extend 1412 */ 1413static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg) 1414{ 1415 pe_lock_req_t new_lock; 1416 struct buffer_head *bh; 1417 uint p; 1418 1419 if (vg_ptr == NULL) return -ENXIO; 1420 if (copy_from_user(&new_lock, arg, sizeof(new_lock)) != 0) 1421 return -EFAULT; 1422 1423 switch (new_lock.lock) { 1424 case LOCK_PE: 1425 for (p = 0; p < vg_ptr->pv_max; p++) { 1426 if (vg_ptr->pv[p] != NULL && 1427 new_lock.data.pv_dev == vg_ptr->pv[p]->pv_dev) 1428 break; 1429 } 1430 if (p == vg_ptr->pv_max) return -ENXIO; 1431 1432 /* 1433 * this sync releaves memory pressure to lessen the 1434 * likelyhood of pvmove being paged out - resulting in 1435 * deadlock. 1436 * 1437 * This method of doing a pvmove is broken 1438 */ 1439 fsync_dev(pe_lock_req.data.lv_dev); 1440 1441 down_write(&_pe_lock); 1442 if (pe_lock_req.lock == LOCK_PE) { 1443 up_write(&_pe_lock); 1444 return -EBUSY; 1445 } 1446 1447 /* Should we do to_kdev_t() on the pv_dev and lv_dev??? */ 1448 pe_lock_req.lock = LOCK_PE; 1449 pe_lock_req.data.lv_dev = new_lock.data.lv_dev; 1450 pe_lock_req.data.pv_dev = new_lock.data.pv_dev; 1451 pe_lock_req.data.pv_offset = new_lock.data.pv_offset; 1452 up_write(&_pe_lock); 1453 1454 /* some requests may have got through since the fsync */ 1455 fsync_dev(pe_lock_req.data.pv_dev); 1456 break; 1457 1458 case UNLOCK_PE: 1459 down_write(&_pe_lock); 1460 pe_lock_req.lock = UNLOCK_PE; 1461 pe_lock_req.data.lv_dev = 0; 1462 pe_lock_req.data.pv_dev = 0; 1463 pe_lock_req.data.pv_offset = 0; 1464 bh = _dequeue_io(); 1465 up_write(&_pe_lock); 1466 1467 /* handle all deferred io for this PE */ 1468 _flush_io(bh); 1469 break; 1470 1471 default: 1472 return -EINVAL; 1473 } 1474 return 0; 1475} 1476 1477 1478/* 1479 * character device support function logical extend remap 1480 */ 1481static int lvm_do_le_remap(vg_t *vg_ptr, void *arg) 1482{ 1483 uint l, le; 1484 lv_t *lv_ptr; 1485 1486 if (vg_ptr == NULL) return -ENXIO; 1487 if (copy_from_user(&le_remap_req, arg, 1488 sizeof(le_remap_req_t)) != 0) 1489 return -EFAULT; 1490 1491 for (l = 0; l < vg_ptr->lv_max; l++) { 1492 lv_ptr = vg_ptr->lv[l]; 1493 1494 if (!lv_ptr) 1495 continue; 1496 1497 if (strcmp(lv_ptr->lv_name, le_remap_req.lv_name) == 0) { 1498 down_write(&lv_ptr->lv_lock); 1499 for (le = 0; le < lv_ptr->lv_allocated_le; le++) { 1500 if (lv_ptr->lv_current_pe[le].dev == 1501 le_remap_req.old_dev && 1502 lv_ptr->lv_current_pe[le].pe == 1503 le_remap_req.old_pe) { 1504 lv_ptr->lv_current_pe[le].dev = 1505 le_remap_req.new_dev; 1506 lv_ptr->lv_current_pe[le].pe = 1507 le_remap_req.new_pe; 1508 __update_hardsectsize(lv_ptr); 1509 up_write(&lv_ptr->lv_lock); 1510 return 0; 1511 } 1512 } 1513 up_write(&lv_ptr->lv_lock); 1514 return -EINVAL; 1515 } 1516 } 1517 return -ENXIO; 1518} /* lvm_do_le_remap() */ 1519 1520 1521/* 1522 * character device support function VGDA create 1523 */ 1524static int lvm_do_vg_create(void *arg, int minor) 1525{ 1526 int ret = 0; 1527 ulong l, ls = 0, p, size; 1528 lv_t lv; 1529 vg_t *vg_ptr; 1530 lv_t **snap_lv_ptr; 1531 1532 if ((vg_ptr = kmalloc(sizeof(vg_t),GFP_KERNEL)) == NULL) { 1533 printk(KERN_CRIT 1534 "%s -- VG_CREATE: kmalloc error VG at line %d\n", 1535 lvm_name, __LINE__); 1536 return -ENOMEM; 1537 } 1538 /* get the volume group structure */ 1539 if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) { 1540 P_IOCTL("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n", 1541 arg, sizeof(vg_t)); 1542 kfree(vg_ptr); 1543 return -EFAULT; 1544 } 1545 1546 /* VG_CREATE now uses minor number in VG structure */ 1547 if (minor == -1) 1548 minor = vg_ptr->vg_number; 1549 1550 /* check limits */ 1551 if (minor >= ABS_MAX_VG) 1552 return -EFAULT; 1553 1554 /* Validate it */ 1555 if (vg[VG_CHR(minor)] != NULL) { 1556 P_IOCTL("lvm_do_vg_create ERROR: VG %d in use\n", minor); 1557 kfree(vg_ptr); 1558 return -EPERM; 1559 } 1560 1561 /* we are not that active so far... */ 1562 vg_ptr->vg_status &= ~VG_ACTIVE; 1563 vg_ptr->pe_allocated = 0; 1564 1565 if (vg_ptr->pv_max > ABS_MAX_PV) { 1566 printk(KERN_WARNING 1567 "%s -- Can't activate VG: ABS_MAX_PV too small\n", 1568 lvm_name); 1569 kfree(vg_ptr); 1570 return -EPERM; 1571 } 1572 1573 if (vg_ptr->lv_max > ABS_MAX_LV) { 1574 printk(KERN_WARNING 1575 "%s -- Can't activate VG: ABS_MAX_LV too small for %u\n", 1576 lvm_name, vg_ptr->lv_max); 1577 kfree(vg_ptr); 1578 return -EPERM; 1579 } 1580 1581 /* create devfs and procfs entries */ 1582 lvm_fs_create_vg(vg_ptr); 1583 1584 vg[VG_CHR(minor)] = vg_ptr; 1585 1586 /* get the physical volume structures */ 1587 vg_ptr->pv_act = vg_ptr->pv_cur = 0; 1588 for (p = 0; p < vg_ptr->pv_max; p++) { 1589 pv_t *pvp; 1590 /* user space address */ 1591 if ((pvp = vg_ptr->pv[p]) != NULL) { 1592 ret = lvm_do_pv_create(pvp, vg_ptr, p); 1593 if ( ret != 0) { 1594 lvm_do_vg_remove(minor); 1595 return ret; 1596 } 1597 } 1598 } 1599 1600 size = vg_ptr->lv_max * sizeof(lv_t *); 1601 if ((snap_lv_ptr = vmalloc ( size)) == NULL) { 1602 printk(KERN_CRIT 1603 "%s -- VG_CREATE: vmalloc error snapshot LVs at line %d\n", 1604 lvm_name, __LINE__); 1605 lvm_do_vg_remove(minor); 1606 return -EFAULT; 1607 } 1608 memset(snap_lv_ptr, 0, size); 1609 1610 /* get the logical volume structures */ 1611 vg_ptr->lv_cur = 0; 1612 for (l = 0; l < vg_ptr->lv_max; l++) { 1613 lv_t *lvp; 1614 /* user space address */ 1615 if ((lvp = vg_ptr->lv[l]) != NULL) { 1616 if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) { 1617 P_IOCTL("ERROR: copying LV ptr %p (%d bytes)\n", 1618 lvp, sizeof(lv_t)); 1619 lvm_do_vg_remove(minor); 1620 return -EFAULT; 1621 } 1622 if ( lv.lv_access & LV_SNAPSHOT) { 1623 snap_lv_ptr[ls] = lvp; 1624 vg_ptr->lv[l] = NULL; 1625 ls++; 1626 continue; 1627 } 1628 vg_ptr->lv[l] = NULL; 1629 /* only create original logical volumes for now */ 1630 if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) { 1631 lvm_do_vg_remove(minor); 1632 return -EFAULT; 1633 } 1634 } 1635 } 1636 1637 /* Second path to correct snapshot logical volumes which are not 1638 in place during first path above */ 1639 for (l = 0; l < ls; l++) { 1640 lv_t *lvp = snap_lv_ptr[l]; 1641 if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) { 1642 lvm_do_vg_remove(minor); 1643 return -EFAULT; 1644 } 1645 if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) { 1646 lvm_do_vg_remove(minor); 1647 return -EFAULT; 1648 } 1649 } 1650 1651 vfree(snap_lv_ptr); 1652 1653 vg_count++; 1654 1655 1656 MOD_INC_USE_COUNT; 1657 1658 /* let's go active */ 1659 vg_ptr->vg_status |= VG_ACTIVE; 1660 1661 return 0; 1662} /* lvm_do_vg_create() */ 1663 1664 1665/* 1666 * character device support function VGDA extend 1667 */ 1668static int lvm_do_vg_extend(vg_t *vg_ptr, void *arg) 1669{ 1670 int ret = 0; 1671 uint p; 1672 pv_t *pv_ptr; 1673 1674 if (vg_ptr == NULL) return -ENXIO; 1675 if (vg_ptr->pv_cur < vg_ptr->pv_max) { 1676 for (p = 0; p < vg_ptr->pv_max; p++) { 1677 if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) { 1678 ret = lvm_do_pv_create(arg, vg_ptr, p); 1679 if ( ret != 0) return ret; 1680 pv_ptr = vg_ptr->pv[p]; 1681 vg_ptr->pe_total += pv_ptr->pe_total; 1682 return 0; 1683 } 1684 } 1685 } 1686 return -EPERM; 1687} /* lvm_do_vg_extend() */ 1688 1689 1690/* 1691 * character device support function VGDA reduce 1692 */ 1693static int lvm_do_vg_reduce(vg_t *vg_ptr, void *arg) { 1694 uint p; 1695 pv_t *pv_ptr; 1696 1697 if (vg_ptr == NULL) return -ENXIO; 1698 if (copy_from_user(pv_name, arg, sizeof(pv_name)) != 0) 1699 return -EFAULT; 1700 1701 for (p = 0; p < vg_ptr->pv_max; p++) { 1702 pv_ptr = vg_ptr->pv[p]; 1703 if (pv_ptr != NULL && 1704 strcmp(pv_ptr->pv_name, 1705 pv_name) == 0) { 1706 if (pv_ptr->lv_cur > 0) return -EPERM; 1707 lvm_do_pv_remove(vg_ptr, p); 1708 /* Make PV pointer array contiguous */ 1709 for (; p < vg_ptr->pv_max - 1; p++) 1710 vg_ptr->pv[p] = vg_ptr->pv[p + 1]; 1711 vg_ptr->pv[p + 1] = NULL; 1712 return 0; 1713 } 1714 } 1715 return -ENXIO; 1716} /* lvm_do_vg_reduce */ 1717 1718 1719/* 1720 * character device support function VG rename 1721 */ 1722static int lvm_do_vg_rename(vg_t *vg_ptr, void *arg) 1723{ 1724 int l = 0, p = 0, len = 0; 1725 char vg_name[NAME_LEN] = { 0,}; 1726 char lv_name[NAME_LEN] = { 0,}; 1727 char *ptr = NULL; 1728 lv_t *lv_ptr = NULL; 1729 pv_t *pv_ptr = NULL; 1730 1731 /* If the VG doesn't exist in the kernel then just exit */ 1732 if (!vg_ptr) return 0; 1733 1734 if (copy_from_user(vg_name, arg, sizeof(vg_name)) != 0) 1735 return -EFAULT; 1736 1737 lvm_fs_remove_vg(vg_ptr); 1738 1739 strncpy ( vg_ptr->vg_name, vg_name, sizeof ( vg_name)-1); 1740 for ( l = 0; l < vg_ptr->lv_max; l++) 1741 { 1742 if ((lv_ptr = vg_ptr->lv[l]) == NULL) continue; 1743 memset (lv_ptr->vg_name, 0, sizeof (*vg_name)); 1744 strncpy(lv_ptr->vg_name, vg_name, sizeof ( vg_name)); 1745 ptr = strrchr(lv_ptr->lv_name, '/'); 1746 ptr = ptr ? ptr + 1 : lv_ptr->lv_name; 1747 strncpy(lv_name, ptr, sizeof ( lv_name)); 1748 len = sizeof(LVM_DIR_PREFIX); 1749 strcpy(lv_ptr->lv_name, LVM_DIR_PREFIX); 1750 strncat(lv_ptr->lv_name, vg_name, NAME_LEN - len); 1751 strcat (lv_ptr->lv_name, "/"); 1752 len += strlen(vg_name) + 1; 1753 strncat(lv_ptr->lv_name, lv_name, NAME_LEN - len); 1754 } 1755 for ( p = 0; p < vg_ptr->pv_max; p++) 1756 { 1757 if ( (pv_ptr = vg_ptr->pv[p]) == NULL) continue; 1758 strncpy(pv_ptr->vg_name, vg_name, NAME_LEN); 1759 } 1760 1761 lvm_fs_create_vg(vg_ptr); 1762 1763 /* Need to add PV entries */ 1764 for ( p = 0; p < vg_ptr->pv_act; p++) { 1765 pv_t *pv_ptr = vg_ptr->pv[p]; 1766 1767 if (pv_ptr) 1768 lvm_fs_create_pv(vg_ptr, pv_ptr); 1769 } 1770 1771 /* Need to add LV entries */ 1772 for ( l = 0; l < vg_ptr->lv_max; l++) { 1773 lv_t *lv_ptr = vg_ptr->lv[l]; 1774 1775 if (!lv_ptr) 1776 continue; 1777 1778 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = 1779 lvm_fs_create_lv(vg_ptr, lv_ptr); 1780 } 1781 1782 return 0; 1783} /* lvm_do_vg_rename */ 1784 1785 1786/* 1787 * character device support function VGDA remove 1788 */ 1789static int lvm_do_vg_remove(int minor) 1790{ 1791 int i; 1792 vg_t *vg_ptr = vg[VG_CHR(minor)]; 1793 pv_t *pv_ptr; 1794 1795 if (vg_ptr == NULL) return -ENXIO; 1796 1797#ifdef LVM_TOTAL_RESET 1798 if (vg_ptr->lv_open > 0 && lvm_reset_spindown == 0) 1799#else 1800 if (vg_ptr->lv_open > 0) 1801#endif 1802 return -EPERM; 1803 1804 /* let's go inactive */ 1805 vg_ptr->vg_status &= ~VG_ACTIVE; 1806 1807 /* remove from procfs and devfs */ 1808 lvm_fs_remove_vg(vg_ptr); 1809 1810 /* free LVs */ 1811 /* first free snapshot logical volumes */ 1812 for (i = 0; i < vg_ptr->lv_max; i++) { 1813 if (vg_ptr->lv[i] != NULL && 1814 vg_ptr->lv[i]->lv_access & LV_SNAPSHOT) { 1815 lvm_do_lv_remove(minor, NULL, i); 1816 current->state = TASK_UNINTERRUPTIBLE; 1817 schedule_timeout(1); 1818 } 1819 } 1820 /* then free the rest of the LVs */ 1821 for (i = 0; i < vg_ptr->lv_max; i++) { 1822 if (vg_ptr->lv[i] != NULL) { 1823 lvm_do_lv_remove(minor, NULL, i); 1824 current->state = TASK_UNINTERRUPTIBLE; 1825 schedule_timeout(1); 1826 } 1827 } 1828 1829 /* free PVs */ 1830 for (i = 0; i < vg_ptr->pv_max; i++) { 1831 if ((pv_ptr = vg_ptr->pv[i]) != NULL) { 1832 P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__); 1833 lvm_do_pv_remove(vg_ptr, i); 1834 } 1835 } 1836 1837 P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__); 1838 kfree(vg_ptr); 1839 vg[VG_CHR(minor)] = NULL; 1840 1841 vg_count--; 1842 1843 MOD_DEC_USE_COUNT; 1844 1845 return 0; 1846} /* lvm_do_vg_remove() */ 1847 1848 1849/* 1850 * character device support function physical volume create 1851 */ 1852static int lvm_do_pv_create(pv_t *pvp, vg_t *vg_ptr, ulong p) { 1853 pv_t *pv; 1854 int err; 1855 1856 if (!vg_ptr) 1857 return -ENXIO; 1858 1859 pv = kmalloc(sizeof(pv_t),GFP_KERNEL); 1860 if (pv == NULL) { 1861 printk(KERN_CRIT 1862 "%s -- PV_CREATE: kmalloc error PV at line %d\n", 1863 lvm_name, __LINE__); 1864 return -ENOMEM; 1865 } 1866 1867 memset(pv, 0, sizeof(*pv)); 1868 1869 if (copy_from_user(pv, pvp, sizeof(pv_t)) != 0) { 1870 P_IOCTL("lvm_do_pv_create ERROR: copy PV ptr %p (%d bytes)\n", 1871 pvp, sizeof(pv_t)); 1872 kfree(pv); 1873 return -EFAULT; 1874 } 1875 1876 if ((err = _open_pv(pv))) { 1877 kfree(pv); 1878 return err; 1879 } 1880 1881 /* We don't need the PE list 1882 in kernel space as with LVs pe_t list (see below) */ 1883 pv->pe = NULL; 1884 pv->pe_allocated = 0; 1885 pv->pv_status = PV_ACTIVE; 1886 vg_ptr->pv_act++; 1887 vg_ptr->pv_cur++; 1888 lvm_fs_create_pv(vg_ptr, pv); 1889 1890 vg_ptr->pv[p] = pv; 1891 return 0; 1892} /* lvm_do_pv_create() */ 1893 1894 1895/* 1896 * character device support function physical volume remove 1897 */ 1898static int lvm_do_pv_remove(vg_t *vg_ptr, ulong p) { 1899 pv_t *pv = vg_ptr->pv[p]; 1900 1901 lvm_fs_remove_pv(vg_ptr, pv); 1902 1903 vg_ptr->pe_total -= pv->pe_total; 1904 vg_ptr->pv_cur--; 1905 vg_ptr->pv_act--; 1906 1907 _close_pv(pv); 1908 kfree(pv); 1909 1910 vg_ptr->pv[p] = NULL; 1911 1912 return 0; 1913} 1914 1915 1916static void __update_hardsectsize(lv_t *lv) 1917{ 1918 int max_hardsectsize = 0, hardsectsize = 0; 1919 int p; 1920 1921 /* Check PVs first to see if they all have same sector size */ 1922 for (p = 0; p < lv->vg->pv_cur; p++) { 1923 pv_t *pv = lv->vg->pv[p]; 1924 if (pv && (hardsectsize = lvm_sectsize(pv->pv_dev))) { 1925 if (max_hardsectsize == 0) 1926 max_hardsectsize = hardsectsize; 1927 else if (hardsectsize != max_hardsectsize) { 1928 P_DEV("%s PV[%d] (%s) sector size %d, not %d\n", 1929 lv->lv_name, p, kdevname(pv->pv_dev), 1930 hardsectsize, max_hardsectsize); 1931 break; 1932 } 1933 } 1934 } 1935 1936 /* PVs have different block size, need to check each LE sector size */ 1937 if (hardsectsize != max_hardsectsize) { 1938 int le; 1939 for (le = 0; le < lv->lv_allocated_le; le++) { 1940 hardsectsize = lvm_sectsize(lv->lv_current_pe[le].dev); 1941 if (hardsectsize > max_hardsectsize) { 1942 P_DEV("%s LE[%d] (%s) blocksize %d not %d\n", 1943 lv->lv_name, le, 1944 kdevname(lv->lv_current_pe[le].dev), 1945 hardsectsize, max_hardsectsize); 1946 max_hardsectsize = hardsectsize; 1947 } 1948 } 1949 1950 /* only perform this operation on active snapshots */ 1951 if ((lv->lv_access & LV_SNAPSHOT) && 1952 (lv->lv_status & LV_ACTIVE)) { 1953 int e; 1954 for (e = 0; e < lv->lv_remap_end; e++) { 1955 hardsectsize = lvm_sectsize(lv->lv_block_exception[e].rdev_new); 1956 if (hardsectsize > max_hardsectsize) 1957 max_hardsectsize = hardsectsize; 1958 } 1959 } 1960 } 1961 1962 if (max_hardsectsize == 0) 1963 max_hardsectsize = SECTOR_SIZE; 1964 P_DEV("hardblocksize for LV %s is %d\n", 1965 kdevname(lv->lv_dev), max_hardsectsize); 1966 lvm_hardsectsizes[MINOR(lv->lv_dev)] = max_hardsectsize; 1967} 1968 1969/* 1970 * character device support function logical volume create 1971 */ 1972static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) 1973{ 1974 int e, ret, l, le, l_new, p, size, activate = 1; 1975 ulong lv_status_save; 1976 lv_block_exception_t *lvbe = lv->lv_block_exception; 1977 vg_t *vg_ptr = vg[VG_CHR(minor)]; 1978 lv_t *lv_ptr = NULL; 1979 pe_t *pep; 1980 1981 if (!(pep = lv->lv_current_pe)) 1982 return -EINVAL; 1983 1984 if (_sectors_to_k(lv->lv_chunk_size) > LVM_SNAPSHOT_MAX_CHUNK) 1985 return -EINVAL; 1986 1987 for (l = 0; l < vg_ptr->lv_cur; l++) { 1988 if (vg_ptr->lv[l] != NULL && 1989 strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) 1990 return -EEXIST; 1991 } 1992 1993 /* in case of lv_remove(), lv_create() pair */ 1994 l_new = -1; 1995 if (vg_ptr->lv[lv->lv_number] == NULL) 1996 l_new = lv->lv_number; 1997 else { 1998 for (l = 0; l < vg_ptr->lv_max; l++) { 1999 if (vg_ptr->lv[l] == NULL) 2000 if (l_new == -1) l_new = l; 2001 } 2002 } 2003 if (l_new == -1) return -EPERM; 2004 else l = l_new; 2005 2006 if ((lv_ptr = kmalloc(sizeof(lv_t),GFP_KERNEL)) == NULL) {; 2007 printk(KERN_CRIT "%s -- LV_CREATE: kmalloc error LV at line %d\n", 2008 lvm_name, __LINE__); 2009 return -ENOMEM; 2010 } 2011 /* copy preloaded LV */ 2012 memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t)); 2013 2014 lv_status_save = lv_ptr->lv_status; 2015 lv_ptr->lv_status &= ~LV_ACTIVE; 2016 lv_ptr->lv_snapshot_org = NULL; 2017 lv_ptr->lv_snapshot_prev = NULL; 2018 lv_ptr->lv_snapshot_next = NULL; 2019 lv_ptr->lv_block_exception = NULL; 2020 lv_ptr->lv_iobuf = NULL; 2021 lv_ptr->lv_COW_table_iobuf = NULL; 2022 lv_ptr->lv_snapshot_hash_table = NULL; 2023 lv_ptr->lv_snapshot_hash_table_size = 0; 2024 lv_ptr->lv_snapshot_hash_mask = 0; 2025 init_rwsem(&lv_ptr->lv_lock); 2026 2027 lv_ptr->lv_snapshot_use_rate = 0; 2028 2029 vg_ptr->lv[l] = lv_ptr; 2030 2031 /* get the PE structures from user space if this 2032 is not a snapshot logical volume */ 2033 if (!(lv_ptr->lv_access & LV_SNAPSHOT)) { 2034 size = lv_ptr->lv_allocated_le * sizeof(pe_t); 2035 2036 if ((lv_ptr->lv_current_pe = vmalloc(size)) == NULL) { 2037 printk(KERN_CRIT 2038 "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte " 2039 "at line %d\n", 2040 lvm_name, size, __LINE__); 2041 P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__); 2042 kfree(lv_ptr); 2043 vg_ptr->lv[l] = NULL; 2044 return -ENOMEM; 2045 } 2046 if (copy_from_user(lv_ptr->lv_current_pe, pep, size)) { 2047 P_IOCTL("ERROR: copying PE ptr %p (%d bytes)\n", 2048 pep, sizeof(size)); 2049 vfree(lv_ptr->lv_current_pe); 2050 kfree(lv_ptr); 2051 vg_ptr->lv[l] = NULL; 2052 return -EFAULT; 2053 } 2054 /* correct the PE count in PVs */ 2055 for (le = 0; le < lv_ptr->lv_allocated_le; le++) { 2056 vg_ptr->pe_allocated++; 2057 for (p = 0; p < vg_ptr->pv_cur; p++) { 2058 if (vg_ptr->pv[p]->pv_dev == 2059 lv_ptr->lv_current_pe[le].dev) 2060 vg_ptr->pv[p]->pe_allocated++; 2061 } 2062 } 2063 } else { 2064 /* Get snapshot exception data and block list */ 2065 if (lvbe != NULL) { 2066 lv_ptr->lv_snapshot_org = 2067 vg_ptr->lv[LV_BLK(lv_ptr->lv_snapshot_minor)]; 2068 if (lv_ptr->lv_snapshot_org != NULL) { 2069 size = lv_ptr->lv_remap_end * sizeof(lv_block_exception_t); 2070 2071 if (!size) { 2072 printk(KERN_WARNING 2073 "%s -- zero length exception table requested\n", 2074 lvm_name); 2075 kfree(lv_ptr); 2076 return -EINVAL; 2077 } 2078 2079 if ((lv_ptr->lv_block_exception = vmalloc(size)) == NULL) { 2080 printk(KERN_CRIT 2081 "%s -- lvm_do_lv_create: vmalloc error LV_BLOCK_EXCEPTION " 2082 "of %d byte at line %d\n", 2083 lvm_name, size, __LINE__); 2084 P_KFREE("%s -- kfree %d\n", lvm_name, 2085 __LINE__); 2086 kfree(lv_ptr); 2087 vg_ptr->lv[l] = NULL; 2088 return -ENOMEM; 2089 } 2090 if (copy_from_user(lv_ptr->lv_block_exception, lvbe, size)) { 2091 vfree(lv_ptr->lv_block_exception); 2092 kfree(lv_ptr); 2093 vg_ptr->lv[l] = NULL; 2094 return -EFAULT; 2095 } 2096 2097 if(lv_ptr->lv_block_exception[0].rsector_org == 2098 LVM_SNAPSHOT_DROPPED_SECTOR) 2099 { 2100 printk(KERN_WARNING 2101 "%s -- lvm_do_lv_create: snapshot has been dropped and will not be activated\n", 2102 lvm_name); 2103 activate = 0; 2104 } 2105 2106 /* point to the original logical volume */ 2107 lv_ptr = lv_ptr->lv_snapshot_org; 2108 2109 lv_ptr->lv_snapshot_minor = 0; 2110 lv_ptr->lv_snapshot_org = lv_ptr; 2111 /* our new one now back points to the previous last in the chain 2112 which can be the original logical volume */ 2113 lv_ptr = vg_ptr->lv[l]; 2114 /* now lv_ptr points to our new last snapshot logical volume */ 2115 lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe; 2116 lv_ptr->lv_allocated_snapshot_le = lv_ptr->lv_allocated_le; 2117 lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le; 2118 lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le; 2119 lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size; 2120 lv_ptr->lv_stripes = lv_ptr->lv_snapshot_org->lv_stripes; 2121 lv_ptr->lv_stripesize = lv_ptr->lv_snapshot_org->lv_stripesize; 2122 2123 /* Update the VG PE(s) used by snapshot reserve space. */ 2124 vg_ptr->pe_allocated += lv_ptr->lv_allocated_snapshot_le; 2125 2126 if ((ret = lvm_snapshot_alloc(lv_ptr)) != 0) 2127 { 2128 vfree(lv_ptr->lv_block_exception); 2129 kfree(lv_ptr); 2130 vg_ptr->lv[l] = NULL; 2131 return ret; 2132 } 2133 for ( e = 0; e < lv_ptr->lv_remap_ptr; e++) 2134 lvm_hash_link (lv_ptr->lv_block_exception + e, 2135 lv_ptr->lv_block_exception[e].rdev_org, 2136 lv_ptr->lv_block_exception[e].rsector_org, lv_ptr); 2137 /* need to fill the COW exception table data 2138 into the page for disk i/o */ 2139 if(lvm_snapshot_fill_COW_page(vg_ptr, lv_ptr)) { 2140 kfree(lv_ptr); 2141 vg_ptr->lv[l] = NULL; 2142 return -EINVAL; 2143 } 2144 init_waitqueue_head(&lv_ptr->lv_snapshot_wait); 2145 } else { 2146 kfree(lv_ptr); 2147 vg_ptr->lv[l] = NULL; 2148 return -EFAULT; 2149 } 2150 } else { 2151 kfree(vg_ptr->lv[l]); 2152 vg_ptr->lv[l] = NULL; 2153 return -EINVAL; 2154 } 2155 } /* if ( vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) */ 2156 2157 lv_ptr = vg_ptr->lv[l]; 2158 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0; 2159 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size; 2160 lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; 2161 vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = vg_ptr->vg_number; 2162 vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = lv_ptr->lv_number; 2163 LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); 2164 vg_ptr->lv_cur++; 2165 lv_ptr->lv_status = lv_status_save; 2166 lv_ptr->vg = vg_ptr; 2167 2168 __update_hardsectsize(lv_ptr); 2169 2170 /* optionally add our new snapshot LV */ 2171 if (lv_ptr->lv_access & LV_SNAPSHOT) { 2172 lv_t *org = lv_ptr->lv_snapshot_org, *last; 2173 2174 /* sync the original logical volume */ 2175 fsync_dev(org->lv_dev); 2176#ifdef LVM_VFS_ENHANCEMENT 2177 /* VFS function call to sync and lock the filesystem */ 2178 fsync_dev_lockfs(org->lv_dev); 2179#endif 2180 2181 down_write(&org->lv_lock); 2182 org->lv_access |= LV_SNAPSHOT_ORG; 2183 lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG; /* this can only hide an userspace bug */ 2184 2185 2186 /* Link in the list of snapshot volumes */ 2187 for (last = org; last->lv_snapshot_next; last = last->lv_snapshot_next); 2188 lv_ptr->lv_snapshot_prev = last; 2189 last->lv_snapshot_next = lv_ptr; 2190 up_write(&org->lv_lock); 2191 } 2192 2193 /* activate the logical volume */ 2194 if(activate) 2195 lv_ptr->lv_status |= LV_ACTIVE; 2196 else 2197 lv_ptr->lv_status &= ~LV_ACTIVE; 2198 2199 if ( lv_ptr->lv_access & LV_WRITE) 2200 set_device_ro(lv_ptr->lv_dev, 0); 2201 else 2202 set_device_ro(lv_ptr->lv_dev, 1); 2203 2204#ifdef LVM_VFS_ENHANCEMENT 2205/* VFS function call to unlock the filesystem */ 2206 if (lv_ptr->lv_access & LV_SNAPSHOT) 2207 unlockfs(lv_ptr->lv_snapshot_org->lv_dev); 2208#endif 2209 2210 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = 2211 lvm_fs_create_lv(vg_ptr, lv_ptr); 2212 return 0; 2213} /* lvm_do_lv_create() */ 2214 2215 2216/* 2217 * character device support function logical volume remove 2218 */ 2219static int lvm_do_lv_remove(int minor, char *lv_name, int l) 2220{ 2221 uint le, p; 2222 vg_t *vg_ptr = vg[VG_CHR(minor)]; 2223 lv_t *lv_ptr; 2224 2225 if (!vg_ptr) 2226 return -ENXIO; 2227 2228 if (l == -1) { 2229 for (l = 0; l < vg_ptr->lv_max; l++) { 2230 if (vg_ptr->lv[l] != NULL && 2231 strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) { 2232 break; 2233 } 2234 } 2235 } 2236 if (l == vg_ptr->lv_max) return -ENXIO; 2237 2238 lv_ptr = vg_ptr->lv[l]; 2239#ifdef LVM_TOTAL_RESET 2240 if (lv_ptr->lv_open > 0 && lvm_reset_spindown == 0) 2241#else 2242 if (lv_ptr->lv_open > 0) 2243#endif 2244 return -EBUSY; 2245 2246 /* check for deletion of snapshot source while 2247 snapshot volume still exists */ 2248 if ((lv_ptr->lv_access & LV_SNAPSHOT_ORG) && 2249 lv_ptr->lv_snapshot_next != NULL) 2250 return -EPERM; 2251 2252 lvm_fs_remove_lv(vg_ptr, lv_ptr); 2253 2254 if (lv_ptr->lv_access & LV_SNAPSHOT) { 2255 /* 2256 * Atomically make the the snapshot invisible 2257 * to the original lv before playing with it. 2258 */ 2259 lv_t * org = lv_ptr->lv_snapshot_org; 2260 down_write(&org->lv_lock); 2261 2262 /* remove this snapshot logical volume from the chain */ 2263 lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next; 2264 if (lv_ptr->lv_snapshot_next != NULL) { 2265 lv_ptr->lv_snapshot_next->lv_snapshot_prev = 2266 lv_ptr->lv_snapshot_prev; 2267 } 2268 2269 /* no more snapshots? */ 2270 if (!org->lv_snapshot_next) { 2271 org->lv_access &= ~LV_SNAPSHOT_ORG; 2272 } 2273 up_write(&org->lv_lock); 2274 2275 lvm_snapshot_release(lv_ptr); 2276 2277 /* Update the VG PE(s) used by snapshot reserve space. */ 2278 vg_ptr->pe_allocated -= lv_ptr->lv_allocated_snapshot_le; 2279 } 2280 2281 lv_ptr->lv_status |= LV_SPINDOWN; 2282 2283 /* sync the buffers */ 2284 fsync_dev(lv_ptr->lv_dev); 2285 2286 lv_ptr->lv_status &= ~LV_ACTIVE; 2287 2288 /* invalidate the buffers */ 2289 invalidate_buffers(lv_ptr->lv_dev); 2290 2291 /* reset generic hd */ 2292 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1; 2293 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0; 2294 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = 0; 2295 lvm_size[MINOR(lv_ptr->lv_dev)] = 0; 2296 2297 /* reset VG/LV mapping */ 2298 vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = ABS_MAX_VG; 2299 vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = -1; 2300 2301 /* correct the PE count in PVs if this is not a snapshot 2302 logical volume */ 2303 if (!(lv_ptr->lv_access & LV_SNAPSHOT)) { 2304 /* only if this is no snapshot logical volume because 2305 we share the lv_current_pe[] structs with the 2306 original logical volume */ 2307 for (le = 0; le < lv_ptr->lv_allocated_le; le++) { 2308 vg_ptr->pe_allocated--; 2309 for (p = 0; p < vg_ptr->pv_cur; p++) { 2310 if (vg_ptr->pv[p]->pv_dev == 2311 lv_ptr->lv_current_pe[le].dev) 2312 vg_ptr->pv[p]->pe_allocated--; 2313 } 2314 } 2315 vfree(lv_ptr->lv_current_pe); 2316 } 2317 2318 P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__); 2319 kfree(lv_ptr); 2320 vg_ptr->lv[l] = NULL; 2321 vg_ptr->lv_cur--; 2322 return 0; 2323} /* lvm_do_lv_remove() */ 2324 2325 2326/* 2327 * logical volume extend / reduce 2328 */ 2329static int __extend_reduce_snapshot(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) { 2330 ulong size; 2331 lv_block_exception_t *lvbe; 2332 2333 if (!new_lv->lv_block_exception) 2334 return -ENXIO; 2335 2336 size = new_lv->lv_remap_end * sizeof(lv_block_exception_t); 2337 if ((lvbe = vmalloc(size)) == NULL) { 2338 printk(KERN_CRIT 2339 "%s -- lvm_do_lv_extend_reduce: vmalloc " 2340 "error LV_BLOCK_EXCEPTION of %lu Byte at line %d\n", 2341 lvm_name, size, __LINE__); 2342 return -ENOMEM; 2343 } 2344 2345 if ((new_lv->lv_remap_end > old_lv->lv_remap_end) && 2346 (copy_from_user(lvbe, new_lv->lv_block_exception, size))) { 2347 vfree(lvbe); 2348 return -EFAULT; 2349 } 2350 new_lv->lv_block_exception = lvbe; 2351 2352 if (lvm_snapshot_alloc_hash_table(new_lv)) { 2353 vfree(new_lv->lv_block_exception); 2354 return -ENOMEM; 2355 } 2356 2357 return 0; 2358} 2359 2360static int __extend_reduce(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) { 2361 ulong size, l, p, end; 2362 pe_t *pe; 2363 2364 /* allocate space for new pe structures */ 2365 size = new_lv->lv_current_le * sizeof(pe_t); 2366 if ((pe = vmalloc(size)) == NULL) { 2367 printk(KERN_CRIT 2368 "%s -- lvm_do_lv_extend_reduce: " 2369 "vmalloc error LV_CURRENT_PE of %lu Byte at line %d\n", 2370 lvm_name, size, __LINE__); 2371 return -ENOMEM; 2372 } 2373 2374 /* get the PE structures from user space */ 2375 if (copy_from_user(pe, new_lv->lv_current_pe, size)) { 2376 if(old_lv->lv_access & LV_SNAPSHOT) 2377 vfree(new_lv->lv_snapshot_hash_table); 2378 vfree(pe); 2379 return -EFAULT; 2380 } 2381 2382 new_lv->lv_current_pe = pe; 2383 2384 /* reduce allocation counters on PV(s) */ 2385 for (l = 0; l < old_lv->lv_allocated_le; l++) { 2386 vg_ptr->pe_allocated--; 2387 for (p = 0; p < vg_ptr->pv_cur; p++) { 2388 if (vg_ptr->pv[p]->pv_dev == 2389 old_lv->lv_current_pe[l].dev) { 2390 vg_ptr->pv[p]->pe_allocated--; 2391 break; 2392 } 2393 } 2394 } 2395 2396 /* extend the PE count in PVs */ 2397 for (l = 0; l < new_lv->lv_allocated_le; l++) { 2398 vg_ptr->pe_allocated++; 2399 for (p = 0; p < vg_ptr->pv_cur; p++) { 2400 if (vg_ptr->pv[p]->pv_dev == 2401 new_lv->lv_current_pe[l].dev) { 2402 vg_ptr->pv[p]->pe_allocated++; 2403 break; 2404 } 2405 } 2406 } 2407 2408 /* save availiable i/o statistic data */ 2409 if (old_lv->lv_stripes < 2) { /* linear logical volume */ 2410 end = min(old_lv->lv_current_le, new_lv->lv_current_le); 2411 for (l = 0; l < end; l++) { 2412 new_lv->lv_current_pe[l].reads += 2413 old_lv->lv_current_pe[l].reads; 2414 2415 new_lv->lv_current_pe[l].writes += 2416 old_lv->lv_current_pe[l].writes; 2417 } 2418 2419 } else { /* striped logical volume */ 2420 uint i, j, source, dest, end, old_stripe_size, new_stripe_size; 2421 2422 old_stripe_size = old_lv->lv_allocated_le / old_lv->lv_stripes; 2423 new_stripe_size = new_lv->lv_allocated_le / new_lv->lv_stripes; 2424 end = min(old_stripe_size, new_stripe_size); 2425 2426 for (i = source = dest = 0; i < new_lv->lv_stripes; i++) { 2427 for (j = 0; j < end; j++) { 2428 new_lv->lv_current_pe[dest + j].reads += 2429 old_lv->lv_current_pe[source + j].reads; 2430 new_lv->lv_current_pe[dest + j].writes += 2431 old_lv->lv_current_pe[source + j].writes; 2432 } 2433 source += old_stripe_size; 2434 dest += new_stripe_size; 2435 } 2436 } 2437 2438 return 0; 2439} 2440 2441static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *new_lv) 2442{ 2443 int r; 2444 ulong l, e, size; 2445 vg_t *vg_ptr = vg[VG_CHR(minor)]; 2446 lv_t *old_lv; 2447 pe_t *pe; 2448 2449 if (!vg_ptr) 2450 return -ENXIO; 2451 2452 if ((pe = new_lv->lv_current_pe) == NULL) 2453 return -EINVAL; 2454 2455 for (l = 0; l < vg_ptr->lv_max; l++) 2456 if (vg_ptr->lv[l] && !strcmp(vg_ptr->lv[l]->lv_name, lv_name)) 2457 break; 2458 2459 if (l == vg_ptr->lv_max) 2460 return -ENXIO; 2461 2462 old_lv = vg_ptr->lv[l]; 2463 2464 if (old_lv->lv_access & LV_SNAPSHOT) { 2465 /* only perform this operation on active snapshots */ 2466 if (old_lv->lv_status & LV_ACTIVE) 2467 r = __extend_reduce_snapshot(vg_ptr, old_lv, new_lv); 2468 else 2469 r = -EPERM; 2470 2471 } else 2472 r = __extend_reduce(vg_ptr, old_lv, new_lv); 2473 2474 if(r) 2475 return r; 2476 2477 /* copy relevent fields */ 2478 down_write(&old_lv->lv_lock); 2479 2480 if(new_lv->lv_access & LV_SNAPSHOT) { 2481 size = (new_lv->lv_remap_end > old_lv->lv_remap_end) ? 2482 old_lv->lv_remap_ptr : new_lv->lv_remap_end; 2483 size *= sizeof(lv_block_exception_t); 2484 memcpy(new_lv->lv_block_exception, 2485 old_lv->lv_block_exception, size); 2486 2487 old_lv->lv_remap_end = new_lv->lv_remap_end; 2488 old_lv->lv_block_exception = new_lv->lv_block_exception; 2489 old_lv->lv_snapshot_hash_table = 2490 new_lv->lv_snapshot_hash_table; 2491 old_lv->lv_snapshot_hash_table_size = 2492 new_lv->lv_snapshot_hash_table_size; 2493 old_lv->lv_snapshot_hash_mask = 2494 new_lv->lv_snapshot_hash_mask; 2495 2496 for (e = 0; e < new_lv->lv_remap_ptr; e++) 2497 lvm_hash_link(new_lv->lv_block_exception + e, 2498 new_lv->lv_block_exception[e].rdev_org, 2499 new_lv->lv_block_exception[e].rsector_org, 2500 new_lv); 2501 2502 } else { 2503 2504 vfree(old_lv->lv_current_pe); 2505 vfree(old_lv->lv_snapshot_hash_table); 2506 2507 old_lv->lv_size = new_lv->lv_size; 2508 old_lv->lv_allocated_le = new_lv->lv_allocated_le; 2509 old_lv->lv_current_le = new_lv->lv_current_le; 2510 old_lv->lv_current_pe = new_lv->lv_current_pe; 2511 lvm_gendisk.part[MINOR(old_lv->lv_dev)].nr_sects = 2512 old_lv->lv_size; 2513 lvm_size[MINOR(old_lv->lv_dev)] = old_lv->lv_size >> 1; 2514 2515 if (old_lv->lv_access & LV_SNAPSHOT_ORG) { 2516 lv_t *snap; 2517 for(snap = old_lv->lv_snapshot_next; snap; 2518 snap = snap->lv_snapshot_next) { 2519 down_write(&snap->lv_lock); 2520 snap->lv_current_pe = old_lv->lv_current_pe; 2521 snap->lv_allocated_le = 2522 old_lv->lv_allocated_le; 2523 snap->lv_current_le = old_lv->lv_current_le; 2524 snap->lv_size = old_lv->lv_size; 2525 2526 lvm_gendisk.part[MINOR(snap->lv_dev)].nr_sects 2527 = old_lv->lv_size; 2528 lvm_size[MINOR(snap->lv_dev)] = 2529 old_lv->lv_size >> 1; 2530 __update_hardsectsize(snap); 2531 up_write(&snap->lv_lock); 2532 } 2533 } 2534 } 2535 2536 __update_hardsectsize(old_lv); 2537 up_write(&old_lv->lv_lock); 2538 2539 return 0; 2540} /* lvm_do_lv_extend_reduce() */ 2541 2542 2543/* 2544 * character device support function logical volume status by name 2545 */ 2546static int lvm_do_lv_status_byname(vg_t *vg_ptr, void *arg) 2547{ 2548 uint l; 2549 lv_status_byname_req_t lv_status_byname_req; 2550 void *saved_ptr1; 2551 void *saved_ptr2; 2552 lv_t *lv_ptr; 2553 2554 if (vg_ptr == NULL) return -ENXIO; 2555 if (copy_from_user(&lv_status_byname_req, arg, 2556 sizeof(lv_status_byname_req_t)) != 0) 2557 return -EFAULT; 2558 2559 if (lv_status_byname_req.lv == NULL) return -EINVAL; 2560 2561 for (l = 0; l < vg_ptr->lv_max; l++) { 2562 if ((lv_ptr = vg_ptr->lv[l]) != NULL && 2563 strcmp(lv_ptr->lv_name, 2564 lv_status_byname_req.lv_name) == 0) { 2565 /* Save usermode pointers */ 2566 if (copy_from_user(&saved_ptr1, &lv_status_byname_req.lv->lv_current_pe, sizeof(void*)) != 0) 2567 return -EFAULT; 2568 if (copy_from_user(&saved_ptr2, &lv_status_byname_req.lv->lv_block_exception, sizeof(void*)) != 0) 2569 return -EFAULT; 2570 if (copy_to_user(lv_status_byname_req.lv, 2571 lv_ptr, 2572 sizeof(lv_t)) != 0) 2573 return -EFAULT; 2574 if (saved_ptr1 != NULL) { 2575 if (copy_to_user(saved_ptr1, 2576 lv_ptr->lv_current_pe, 2577 lv_ptr->lv_allocated_le * 2578 sizeof(pe_t)) != 0) 2579 return -EFAULT; 2580 } 2581 /* Restore usermode pointers */ 2582 if (copy_to_user(&lv_status_byname_req.lv->lv_current_pe, &saved_ptr1, sizeof(void*)) != 0) 2583 return -EFAULT; 2584 return 0; 2585 } 2586 } 2587 return -ENXIO; 2588} /* lvm_do_lv_status_byname() */ 2589 2590 2591/* 2592 * character device support function logical volume status by index 2593 */ 2594static int lvm_do_lv_status_byindex(vg_t *vg_ptr,void *arg) 2595{ 2596 lv_status_byindex_req_t lv_status_byindex_req; 2597 void *saved_ptr1; 2598 void *saved_ptr2; 2599 lv_t *lv_ptr; 2600 2601 if (vg_ptr == NULL) return -ENXIO; 2602 if (copy_from_user(&lv_status_byindex_req, arg, 2603 sizeof(lv_status_byindex_req)) != 0) 2604 return -EFAULT; 2605 2606 if (lv_status_byindex_req.lv == NULL) 2607 return -EINVAL; 2608 if ( ( lv_ptr = vg_ptr->lv[lv_status_byindex_req.lv_index]) == NULL) 2609 return -ENXIO; 2610 2611 /* Save usermode pointers */ 2612 if (copy_from_user(&saved_ptr1, &lv_status_byindex_req.lv->lv_current_pe, sizeof(void*)) != 0) 2613 return -EFAULT; 2614 if (copy_from_user(&saved_ptr2, &lv_status_byindex_req.lv->lv_block_exception, sizeof(void*)) != 0) 2615 return -EFAULT; 2616 2617 if (copy_to_user(lv_status_byindex_req.lv, lv_ptr, sizeof(lv_t)) != 0) 2618 return -EFAULT; 2619 if (saved_ptr1 != NULL) { 2620 if (copy_to_user(saved_ptr1, 2621 lv_ptr->lv_current_pe, 2622 lv_ptr->lv_allocated_le * 2623 sizeof(pe_t)) != 0) 2624 return -EFAULT; 2625 } 2626 2627 /* Restore usermode pointers */ 2628 if (copy_to_user(&lv_status_byindex_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0) 2629 return -EFAULT; 2630 2631 return 0; 2632} /* lvm_do_lv_status_byindex() */ 2633 2634 2635/* 2636 * character device support function logical volume status by device number 2637 */ 2638static int lvm_do_lv_status_bydev(vg_t * vg_ptr, void * arg) { 2639 int l; 2640 lv_status_bydev_req_t lv_status_bydev_req; 2641 void *saved_ptr1; 2642 void *saved_ptr2; 2643 lv_t *lv_ptr; 2644 2645 if (vg_ptr == NULL) return -ENXIO; 2646 if (copy_from_user(&lv_status_bydev_req, arg, 2647 sizeof(lv_status_bydev_req)) != 0) 2648 return -EFAULT; 2649 2650 for ( l = 0; l < vg_ptr->lv_max; l++) { 2651 if ( vg_ptr->lv[l] == NULL) continue; 2652 if ( vg_ptr->lv[l]->lv_dev == lv_status_bydev_req.dev) break; 2653 } 2654 2655 if ( l == vg_ptr->lv_max) return -ENXIO; 2656 lv_ptr = vg_ptr->lv[l]; 2657 2658 /* Save usermode pointers */ 2659 if (copy_from_user(&saved_ptr1, &lv_status_bydev_req.lv->lv_current_pe, sizeof(void*)) != 0) 2660 return -EFAULT; 2661 if (copy_from_user(&saved_ptr2, &lv_status_bydev_req.lv->lv_block_exception, sizeof(void*)) != 0) 2662 return -EFAULT; 2663 2664 if (copy_to_user(lv_status_bydev_req.lv, lv_ptr, sizeof(lv_t)) != 0) 2665 return -EFAULT; 2666 if (saved_ptr1 != NULL) { 2667 if (copy_to_user(saved_ptr1, 2668 lv_ptr->lv_current_pe, 2669 lv_ptr->lv_allocated_le * 2670 sizeof(pe_t)) != 0) 2671 return -EFAULT; 2672 } 2673 /* Restore usermode pointers */ 2674 if (copy_to_user(&lv_status_bydev_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0) 2675 return -EFAULT; 2676 2677 return 0; 2678} /* lvm_do_lv_status_bydev() */ 2679 2680 2681/* 2682 * character device support function rename a logical volume 2683 */ 2684static int lvm_do_lv_rename(vg_t *vg_ptr, lv_req_t *lv_req, lv_t *lv) 2685{ 2686 int l = 0; 2687 int ret = 0; 2688 lv_t *lv_ptr = NULL; 2689 2690 if (!vg_ptr) 2691 return -ENXIO; 2692 2693 for (l = 0; l < vg_ptr->lv_max; l++) 2694 { 2695 if ( (lv_ptr = vg_ptr->lv[l]) == NULL) continue; 2696 if (lv_ptr->lv_dev == lv->lv_dev) 2697 { 2698 lvm_fs_remove_lv(vg_ptr, lv_ptr); 2699 strncpy(lv_ptr->lv_name, lv_req->lv_name, NAME_LEN); 2700 lvm_fs_create_lv(vg_ptr, lv_ptr); 2701 break; 2702 } 2703 } 2704 if (l == vg_ptr->lv_max) ret = -ENODEV; 2705 2706 return ret; 2707} /* lvm_do_lv_rename */ 2708 2709 2710/* 2711 * character device support function physical volume change 2712 */ 2713static int lvm_do_pv_change(vg_t *vg_ptr, void *arg) 2714{ 2715 uint p; 2716 pv_t *pv_ptr; 2717 struct block_device *bd; 2718 2719 if (vg_ptr == NULL) return -ENXIO; 2720 if (copy_from_user(&pv_change_req, arg, 2721 sizeof(pv_change_req)) != 0) 2722 return -EFAULT; 2723 2724 for (p = 0; p < vg_ptr->pv_max; p++) { 2725 pv_ptr = vg_ptr->pv[p]; 2726 if (pv_ptr != NULL && 2727 strcmp(pv_ptr->pv_name, 2728 pv_change_req.pv_name) == 0) { 2729 2730 bd = pv_ptr->bd; 2731 if (copy_from_user(pv_ptr, 2732 pv_change_req.pv, 2733 sizeof(pv_t)) != 0) 2734 return -EFAULT; 2735 pv_ptr->bd = bd; 2736 2737 /* We don't need the PE list 2738 in kernel space as with LVs pe_t list */ 2739 pv_ptr->pe = NULL; 2740 return 0; 2741 } 2742 } 2743 return -ENXIO; 2744} /* lvm_do_pv_change() */ 2745 2746/* 2747 * character device support function get physical volume status 2748 */ 2749static int lvm_do_pv_status(vg_t *vg_ptr, void *arg) 2750{ 2751 uint p; 2752 pv_t *pv_ptr; 2753 2754 if (vg_ptr == NULL) return -ENXIO; 2755 if (copy_from_user(&pv_status_req, arg, 2756 sizeof(pv_status_req)) != 0) 2757 return -EFAULT; 2758 2759 for (p = 0; p < vg_ptr->pv_max; p++) { 2760 pv_ptr = vg_ptr->pv[p]; 2761 if (pv_ptr != NULL && 2762 strcmp(pv_ptr->pv_name, 2763 pv_status_req.pv_name) == 0) { 2764 if (copy_to_user(pv_status_req.pv, 2765 pv_ptr, 2766 sizeof(pv_t)) != 0) 2767 return -EFAULT; 2768 return 0; 2769 } 2770 } 2771 return -ENXIO; 2772} /* lvm_do_pv_status() */ 2773 2774 2775/* 2776 * character device support function flush and invalidate all buffers of a PV 2777 */ 2778static int lvm_do_pv_flush(void *arg) 2779{ 2780 pv_flush_req_t pv_flush_req; 2781 2782 if (copy_from_user(&pv_flush_req, arg, sizeof(pv_flush_req)) != 0) 2783 return -EFAULT; 2784 2785 fsync_dev(pv_flush_req.pv_dev); 2786 invalidate_buffers(pv_flush_req.pv_dev); 2787 2788 return 0; 2789} 2790 2791 2792/* 2793 * support function initialize gendisk variables 2794 */ 2795static void __init lvm_geninit(struct gendisk *lvm_gdisk) 2796{ 2797 int i = 0; 2798 2799#ifdef DEBUG_GENDISK 2800 printk(KERN_DEBUG "%s -- lvm_gendisk\n", lvm_name); 2801#endif 2802 2803 for (i = 0; i < MAX_LV; i++) { 2804 lvm_gendisk.part[i].start_sect = -1; /* avoid partition check */ 2805 lvm_size[i] = lvm_gendisk.part[i].nr_sects = 0; 2806 lvm_blocksizes[i] = BLOCK_SIZE; 2807 } 2808 2809 blk_size[MAJOR_NR] = lvm_size; 2810 blksize_size[MAJOR_NR] = lvm_blocksizes; 2811 hardsect_size[MAJOR_NR] = lvm_hardsectsizes; 2812 2813 return; 2814} /* lvm_gen_init() */ 2815 2816 2817 2818/* Must have down_write(_pe_lock) when we enqueue buffers */ 2819static void _queue_io(struct buffer_head *bh, int rw) { 2820 if (bh->b_reqnext) BUG(); 2821 bh->b_reqnext = _pe_requests; 2822 _pe_requests = bh; 2823} 2824 2825/* Must have down_write(_pe_lock) when we dequeue buffers */ 2826static struct buffer_head *_dequeue_io(void) 2827{ 2828 struct buffer_head *bh = _pe_requests; 2829 _pe_requests = NULL; 2830 return bh; 2831} 2832 2833/* 2834 * We do not need to hold _pe_lock to flush buffers. bh should be taken from 2835 * _pe_requests under down_write(_pe_lock), and then _pe_requests can be set 2836 * NULL and we drop _pe_lock. Any new buffers defered at this time will be 2837 * added to a new list, and the old buffers can have their I/O restarted 2838 * asynchronously. 2839 * 2840 * If, for some reason, the same PE is locked again before all of these writes 2841 * have finished, then these buffers will just be re-queued (i.e. no danger). 2842 */ 2843static void _flush_io(struct buffer_head *bh) 2844{ 2845 while (bh) { 2846 struct buffer_head *next = bh->b_reqnext; 2847 bh->b_reqnext = NULL; 2848 /* resubmit this buffer head */ 2849 generic_make_request(WRITE, bh); 2850 bh = next; 2851 } 2852} 2853 2854 2855/* 2856 * we must open the pv's before we use them 2857 */ 2858static int _open_pv(pv_t *pv) { 2859 int err; 2860 struct block_device *bd; 2861 2862 if (!(bd = bdget(kdev_t_to_nr(pv->pv_dev)))) 2863 return -ENOMEM; 2864 2865 err = blkdev_get(bd, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE); 2866 if (err) 2867 return err; 2868 2869 pv->bd = bd; 2870 return 0; 2871} 2872 2873static void _close_pv(pv_t *pv) { 2874 if (pv) { 2875 struct block_device *bdev = pv->bd; 2876 pv->bd = NULL; 2877 if (bdev) 2878 blkdev_put(bdev, BDEV_FILE); 2879 } 2880} 2881 2882 2883static unsigned long _sectors_to_k(unsigned long sect) 2884{ 2885 if(SECTOR_SIZE > 1024) { 2886 return sect * (SECTOR_SIZE / 1024); 2887 } 2888 2889 return sect / (1024 / SECTOR_SIZE); 2890} 2891 2892MODULE_AUTHOR("Heinz Mauelshagen, Sistina Software"); 2893MODULE_DESCRIPTION("Logical Volume Manager"); 2894#ifdef MODULE_LICENSE 2895MODULE_LICENSE("GPL"); 2896#endif 2897 2898module_init(lvm_init); 2899module_exit(lvm_cleanup); 2900