1/*	$NetBSD: vgreduce.c,v 1.1.1.2 2009/12/02 00:25:57 haad Exp $	*/
2
3/*
4 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
6 *
7 * This file is part of LVM2.
8 *
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU Lesser General Public License v.2.1.
12 *
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17
18#include "tools.h"
19#include "lv_alloc.h"
20
21static int _remove_pv(struct volume_group *vg, struct pv_list *pvl, int silent)
22{
23	char uuid[64] __attribute((aligned(8)));
24
25	if (vg->pv_count == 1) {
26		log_error("Volume Groups must always contain at least one PV");
27		return 0;
28	}
29
30	if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid)))
31		return_0;
32
33	log_verbose("Removing PV with UUID %s from VG %s", uuid, vg->name);
34
35	if (pvl->pv->pe_alloc_count) {
36		if (!silent)
37			log_error("LVs still present on PV with UUID %s: "
38				  "Can't remove from VG %s", uuid, vg->name);
39		return 0;
40	}
41
42	vg->free_count -= pvl->pv->pe_count;
43	vg->extent_count -= pvl->pv->pe_count;
44	vg->pv_count--;
45
46	dm_list_del(&pvl->list);
47
48	return 1;
49}
50
51static int _remove_lv(struct cmd_context *cmd, struct logical_volume *lv,
52		      int *list_unsafe, struct dm_list *lvs_changed)
53{
54	struct lv_segment *snap_seg;
55	struct dm_list *snh, *snht;
56	struct logical_volume *cow;
57	struct lv_list *lvl;
58	struct lvinfo info;
59	int first = 1;
60
61	log_verbose("%s/%s has missing extents: removing (including "
62		    "dependencies)", lv->vg->name, lv->name);
63
64	/* FIXME Cope properly with stacked devices & snapshots. */
65
66	/* If snapshot device is missing, deactivate origin. */
67	if (lv_is_cow(lv) && (snap_seg = find_cow(lv))) {
68		log_verbose("Deactivating (if active) logical volume %s "
69			    "(origin of %s)", snap_seg->origin->name, lv->name);
70
71		if (!test_mode() && !deactivate_lv(cmd, snap_seg->origin)) {
72			log_error("Failed to deactivate LV %s",
73				  snap_seg->origin->name);
74			return 0;
75		}
76
77		/* Use the origin LV */
78		lv = snap_seg->origin;
79	}
80
81	/* Remove snapshot dependencies */
82	dm_list_iterate_safe(snh, snht, &lv->snapshot_segs) {
83		snap_seg = dm_list_struct_base(snh, struct lv_segment,
84					    origin_list);
85		cow = snap_seg->cow;
86
87		if (first && !test_mode() &&
88		    !deactivate_lv(cmd, snap_seg->origin)) {
89			log_error("Failed to deactivate LV %s",
90				  snap_seg->origin->name);
91			return 0;
92		}
93
94		*list_unsafe = 1;	/* May remove caller's lvht! */
95		if (!vg_remove_snapshot(cow))
96			return_0;
97		log_verbose("Removing LV %s from VG %s", cow->name,
98			    lv->vg->name);
99		if (!lv_remove(cow))
100			return_0;
101
102		first = 0;
103	}
104
105	/*
106	 * If LV is active, replace it with error segment
107	 * and add to list of LVs to be removed later.
108	 * Doesn't apply to snapshots/origins yet - they're already deactivated.
109	 */
110	/*
111	 * If the LV is a part of mirror segment,
112	 * the mirrored LV also should be cleaned up.
113	 * Clean-up is currently done by caller (_make_vg_consistent()).
114	 */
115	if ((lv_info(cmd, lv, &info, 0, 0) && info.exists) ||
116	    find_mirror_seg(first_seg(lv))) {
117		if (!replace_lv_with_error_segment(lv))
118			return_0;
119
120		if (!(lvl = dm_pool_alloc(cmd->mem, sizeof(*lvl)))) {
121			log_error("lv_list alloc failed");
122			return 0;
123		}
124		lvl->lv = lv;
125		dm_list_add(lvs_changed, &lvl->list);
126	} else {
127		/* Remove LV immediately. */
128		log_verbose("Removing LV %s from VG %s", lv->name, lv->vg->name);
129		if (!lv_remove(lv))
130			return_0;
131	}
132
133	return 1;
134}
135
136static int _consolidate_vg(struct cmd_context *cmd, struct volume_group *vg)
137{
138	struct pv_list *pvl;
139	struct lv_list *lvl;
140	int r = 1;
141
142	dm_list_iterate_items(lvl, &vg->lvs)
143		if (lvl->lv->status & PARTIAL_LV) {
144			log_warn("WARNING: Partial LV %s needs to be repaired "
145				 "or removed. ", lvl->lv->name);
146			r = 0;
147		}
148
149	if (!r) {
150		cmd->handles_missing_pvs = 1;
151		log_warn("WARNING: There are still partial LVs in VG %s.", vg->name);
152		log_warn("To remove them unconditionally use: vgreduce --removemissing --force.");
153		log_warn("Proceeding to remove empty missing PVs.");
154	}
155
156	dm_list_iterate_items(pvl, &vg->pvs) {
157		if (pvl->pv->dev && !(pvl->pv->status & MISSING_PV))
158			continue;
159		if (r && !_remove_pv(vg, pvl, 0))
160			return_0;
161	}
162
163	return r;
164}
165
166static int _make_vg_consistent(struct cmd_context *cmd, struct volume_group *vg)
167{
168	struct dm_list *pvh, *pvht;
169	struct dm_list *lvh, *lvht;
170	struct pv_list *pvl;
171	struct lv_list *lvl, *lvl2, *lvlt;
172	struct logical_volume *lv;
173	struct physical_volume *pv;
174	struct lv_segment *seg, *mirrored_seg;
175	unsigned s;
176	uint32_t mimages, remove_log;
177	int list_unsafe, only_mirror_images_found;
178	DM_LIST_INIT(lvs_changed);
179	only_mirror_images_found = 1;
180
181	/* Deactivate & remove necessary LVs */
182      restart_loop:
183	list_unsafe = 0;	/* Set if we delete a different list-member */
184
185	dm_list_iterate_safe(lvh, lvht, &vg->lvs) {
186		lv = dm_list_item(lvh, struct lv_list)->lv;
187
188		/* Are any segments of this LV on missing PVs? */
189		dm_list_iterate_items(seg, &lv->segments) {
190			for (s = 0; s < seg->area_count; s++) {
191				if (seg_type(seg, s) != AREA_PV)
192					continue;
193
194				/* FIXME Also check for segs on deleted LVs (incl pvmove) */
195
196				pv = seg_pv(seg, s);
197				if (!pv || !pv_dev(pv) ||
198				    (pv->status & MISSING_PV)) {
199					if (arg_count(cmd, mirrorsonly_ARG) &&
200					    !(lv->status & MIRROR_IMAGE)) {
201						log_error("Non-mirror-image LV %s found: can't remove.", lv->name);
202						only_mirror_images_found = 0;
203						continue;
204					}
205					if (!_remove_lv(cmd, lv, &list_unsafe, &lvs_changed))
206						return_0;
207					if (list_unsafe)
208						goto restart_loop;
209				}
210			}
211		}
212	}
213
214	if (!only_mirror_images_found) {
215		log_error("Aborting because --mirrorsonly was specified.");
216		return 0;
217	}
218
219	/*
220	 * Remove missing PVs. FIXME: This duplicates _consolidate_vg above,
221	 * but we cannot use that right now, since the LV removal code in this
222	 * function leaves the VG in a "somewhat inconsistent" state and
223	 * _consolidate_vg doesn't like that -- specifically, mirrors are fixed
224	 * up *after* the PVs are removed. All this should be gradually
225	 * superseded by lvconvert --repair.
226	 */
227	dm_list_iterate_safe(pvh, pvht, &vg->pvs) {
228		pvl = dm_list_item(pvh, struct pv_list);
229		if (pvl->pv->dev)
230			continue;
231		if (!_remove_pv(vg, pvl, 0))
232			return_0;
233	}
234
235	/* FIXME Recovery.  For now people must clean up by hand. */
236
237	if (!dm_list_empty(&lvs_changed)) {
238		if (!vg_write(vg)) {
239			log_error("Failed to write out a consistent VG for %s",
240				  vg->name);
241			return 0;
242		}
243
244		if (!test_mode()) {
245			/* Suspend lvs_changed */
246			if (!suspend_lvs(cmd, &lvs_changed)) {
247				stack;
248				vg_revert(vg);
249				return 0;
250			}
251		}
252
253		if (!vg_commit(vg)) {
254			log_error("Failed to commit consistent VG for %s",
255				  vg->name);
256			vg_revert(vg);
257			return 0;
258		}
259
260		if (!test_mode()) {
261			if (!resume_lvs(cmd, &lvs_changed)) {
262				log_error("Failed to resume LVs using error segments.");
263				return 0;
264			}
265		}
266
267  lvs_changed_altered:
268		/* Remove lost mirror images from mirrors */
269		dm_list_iterate_items(lvl, &vg->lvs) {
270  mirrored_seg_altered:
271			mirrored_seg = first_seg(lvl->lv);
272			if (!seg_is_mirrored(mirrored_seg))
273				continue;
274
275			mimages = mirrored_seg->area_count;
276			remove_log = 0;
277
278			for (s = 0; s < mirrored_seg->area_count; s++) {
279				dm_list_iterate_items_safe(lvl2, lvlt, &lvs_changed) {
280					if (seg_type(mirrored_seg, s) != AREA_LV ||
281					    lvl2->lv != seg_lv(mirrored_seg, s))
282						continue;
283					dm_list_del(&lvl2->list);
284					if (!shift_mirror_images(mirrored_seg, s))
285						return_0;
286					mimages--;	/* FIXME Assumes uniqueness */
287				}
288			}
289
290			if (mirrored_seg->log_lv) {
291				dm_list_iterate_items(seg, &mirrored_seg->log_lv->segments) {
292					/* FIXME: The second test shouldn't be required */
293					if ((seg->segtype ==
294					     get_segtype_from_string(vg->cmd, "error"))) {
295						log_print("The log device for %s/%s has failed.",
296							  vg->name, mirrored_seg->lv->name);
297						remove_log = 1;
298						break;
299					}
300					if (!strcmp(seg->segtype->name, "error")) {
301						log_print("Log device for %s/%s has failed.",
302							  vg->name, mirrored_seg->lv->name);
303						remove_log = 1;
304						break;
305					}
306				}
307			}
308
309			if ((mimages != mirrored_seg->area_count) || remove_log){
310				if (!reconfigure_mirror_images(mirrored_seg, mimages,
311							       NULL, remove_log))
312					return_0;
313
314				if (!vg_write(vg)) {
315					log_error("Failed to write out updated "
316						  "VG for %s", vg->name);
317					return 0;
318				}
319
320				if (!vg_commit(vg)) {
321					log_error("Failed to commit updated VG "
322						  "for %s", vg->name);
323					vg_revert(vg);
324					return 0;
325				}
326
327				/* mirrored LV no longer has valid mimages.
328				 * So add it to lvs_changed for removal.
329				 * For this LV may be an area of other mirror,
330				 * restart the loop. */
331				if (!mimages) {
332					if (!_remove_lv(cmd, lvl->lv,
333						 &list_unsafe, &lvs_changed))
334						return_0;
335					goto lvs_changed_altered;
336				}
337
338				/* As a result of reconfigure_mirror_images(),
339				 * first_seg(lv) may now be different seg.
340				 * e.g. a temporary layer might be removed.
341				 * So check the mirrored_seg again. */
342				goto mirrored_seg_altered;
343			}
344		}
345
346		/* Deactivate error LVs */
347		if (!test_mode()) {
348			dm_list_iterate_items_safe(lvl, lvlt, &lvs_changed) {
349				log_verbose("Deactivating (if active) logical volume %s",
350					    lvl->lv->name);
351
352				if (!deactivate_lv(cmd, lvl->lv)) {
353					log_error("Failed to deactivate LV %s",
354						  lvl->lv->name);
355					/*
356					 * We failed to deactivate.
357					 * Probably because this was a mirror log.
358					 * Don't try to lv_remove it.
359					 * Continue work on others.
360					 */
361					dm_list_del(&lvl->list);
362				}
363			}
364		}
365
366		/* Remove remaining LVs */
367		dm_list_iterate_items(lvl, &lvs_changed) {
368			log_verbose("Removing LV %s from VG %s", lvl->lv->name,
369				    lvl->lv->vg->name);
370				/* Skip LVs already removed by mirror code */
371				if (find_lv_in_vg(vg, lvl->lv->name) &&
372				    !lv_remove(lvl->lv))
373					return_0;
374		}
375	}
376
377	return 1;
378}
379
380/* Or take pv_name instead? */
381static int _vgreduce_single(struct cmd_context *cmd, struct volume_group *vg,
382			    struct physical_volume *pv,
383			    void *handle __attribute((unused)))
384{
385	struct pv_list *pvl;
386	struct volume_group *orphan_vg = NULL;
387	int r = ECMD_FAILED;
388	const char *name = pv_dev_name(pv);
389
390	if (pv_pe_alloc_count(pv)) {
391		log_error("Physical volume \"%s\" still in use", name);
392		return ECMD_FAILED;
393	}
394
395	if (vg->pv_count == 1) {
396		log_error("Can't remove final physical volume \"%s\" from "
397			  "volume group \"%s\"", name, vg->name);
398		return ECMD_FAILED;
399	}
400
401	if (!lock_vol(cmd, VG_ORPHANS, LCK_VG_WRITE)) {
402		log_error("Can't get lock for orphan PVs");
403		return ECMD_FAILED;
404	}
405
406	pvl = find_pv_in_vg(vg, name);
407
408	if (!archive(vg))
409		goto_bad;
410
411	log_verbose("Removing \"%s\" from volume group \"%s\"", name, vg->name);
412
413	if (pvl)
414		dm_list_del(&pvl->list);
415
416	pv->vg_name = vg->fid->fmt->orphan_vg_name;
417	pv->status = ALLOCATABLE_PV;
418
419	if (!dev_get_size(pv_dev(pv), &pv->size)) {
420		log_error("%s: Couldn't get size.", pv_dev_name(pv));
421		goto bad;
422	}
423
424	vg->pv_count--;
425	vg->free_count -= pv_pe_count(pv) - pv_pe_alloc_count(pv);
426	vg->extent_count -= pv_pe_count(pv);
427
428	orphan_vg = vg_read_for_update(cmd, vg->fid->fmt->orphan_vg_name,
429				       NULL, 0);
430
431	if (vg_read_error(orphan_vg))
432		goto bad;
433
434	if (!vg_split_mdas(cmd, vg, orphan_vg) || !vg->pv_count) {
435		log_error("Cannot remove final metadata area on \"%s\" from \"%s\"",
436			  name, vg->name);
437		goto bad;
438	}
439
440	if (!vg_write(vg) || !vg_commit(vg)) {
441		log_error("Removal of physical volume \"%s\" from "
442			  "\"%s\" failed", name, vg->name);
443		goto bad;
444	}
445
446	if (!pv_write(cmd, pv, NULL, INT64_C(-1))) {
447		log_error("Failed to clear metadata from physical "
448			  "volume \"%s\" "
449			  "after removal from \"%s\"", name, vg->name);
450		goto bad;
451	}
452
453	backup(vg);
454
455	log_print("Removed \"%s\" from volume group \"%s\"", name, vg->name);
456	r = ECMD_PROCESSED;
457bad:
458	unlock_and_release_vg(cmd, orphan_vg, VG_ORPHANS);
459	return r;
460}
461
462int vgreduce(struct cmd_context *cmd, int argc, char **argv)
463{
464	struct volume_group *vg;
465	char *vg_name;
466	int ret = ECMD_FAILED;
467	int fixed = 1;
468	int repairing = arg_count(cmd, removemissing_ARG);
469	int saved_ignore_suspended_devices = ignore_suspended_devices();
470
471	if (!argc && !repairing) {
472		log_error("Please give volume group name and "
473			  "physical volume paths");
474		return EINVALID_CMD_LINE;
475	}
476
477	if (!argc && repairing) {
478		log_error("Please give volume group name");
479		return EINVALID_CMD_LINE;
480	}
481
482	if (arg_count(cmd, mirrorsonly_ARG) && !repairing) {
483		log_error("--mirrorsonly requires --removemissing");
484		return EINVALID_CMD_LINE;
485	}
486
487	if (argc == 1 && !arg_count(cmd, all_ARG) && !repairing) {
488		log_error("Please enter physical volume paths or option -a");
489		return EINVALID_CMD_LINE;
490	}
491
492	if (argc > 1 && arg_count(cmd, all_ARG)) {
493		log_error("Option -a and physical volume paths mutually "
494			  "exclusive");
495		return EINVALID_CMD_LINE;
496	}
497
498	if (argc > 1 && repairing) {
499		log_error("Please only specify the volume group");
500		return EINVALID_CMD_LINE;
501	}
502
503	vg_name = skip_dev_dir(cmd, argv[0], NULL);
504	argv++;
505	argc--;
506
507	log_verbose("Finding volume group \"%s\"", vg_name);
508
509	if (repairing) {
510		init_ignore_suspended_devices(1);
511		cmd->handles_missing_pvs = 1;
512	}
513
514	vg = vg_read_for_update(cmd, vg_name, NULL, READ_ALLOW_EXPORTED);
515	if (vg_read_error(vg) == FAILED_ALLOCATION ||
516	    vg_read_error(vg) == FAILED_NOTFOUND)
517		goto_out;
518
519	/* FIXME We want to allow read-only VGs to be changed here? */
520	if (vg_read_error(vg) && vg_read_error(vg) != FAILED_READ_ONLY
521	    && !arg_count(cmd, removemissing_ARG))
522		goto_out;
523
524	if (repairing) {
525		if (!vg_read_error(vg) && !vg_missing_pv_count(vg)) {
526			log_error("Volume group \"%s\" is already consistent",
527				  vg_name);
528			ret = ECMD_PROCESSED;
529			goto out;
530		}
531
532		vg_release(vg);
533		log_verbose("Trying to open VG %s for recovery...", vg_name);
534
535		vg = vg_read_for_update(cmd, vg_name, NULL,
536					READ_ALLOW_INCONSISTENT
537					| READ_ALLOW_EXPORTED);
538
539		if (vg_read_error(vg) && vg_read_error(vg) != FAILED_READ_ONLY
540		    && vg_read_error(vg) != FAILED_INCONSISTENT)
541			goto_out;
542
543		if (!archive(vg))
544			goto_out;
545
546		if (arg_count(cmd, force_ARG)) {
547			if (!_make_vg_consistent(cmd, vg))
548				goto_out;
549		} else
550			fixed = _consolidate_vg(cmd, vg);
551
552		if (!vg_write(vg) || !vg_commit(vg)) {
553			log_error("Failed to write out a consistent VG for %s",
554				  vg_name);
555			goto out;
556		}
557		backup(vg);
558
559		if (fixed) {
560			log_print("Wrote out consistent volume group %s",
561				  vg_name);
562			ret = ECMD_PROCESSED;
563		} else
564			ret = ECMD_FAILED;
565
566	} else {
567		if (!vg_check_status(vg, EXPORTED_VG | LVM_WRITE | RESIZEABLE_VG))
568			goto_out;
569
570		/* FIXME: Pass private struct through to all these functions */
571		/* and update in batch here? */
572		ret = process_each_pv(cmd, argc, argv, vg, READ_FOR_UPDATE, 0, NULL,
573				      _vgreduce_single);
574
575	}
576out:
577	init_ignore_suspended_devices(saved_ignore_suspended_devices);
578	unlock_and_release_vg(cmd, vg, vg_name);
579
580	return ret;
581
582/******* FIXME
583	log_error ("no empty physical volumes found in volume group \"%s\"", vg_name);
584
585	log_verbose
586	    ("volume group \"%s\" will be reduced by %d physical volume%s",
587	     vg_name, np, np > 1 ? "s" : "");
588	log_verbose ("reducing volume group \"%s\" by physical volume \"%s\"",
589		     vg_name, pv_names[p]);
590
591	log_print
592	    ("volume group \"%s\" %ssuccessfully reduced by physical volume%s:",
593	     vg_name, error > 0 ? "NOT " : "", p > 1 ? "s" : "");
594		log_print("%s", pv_this[p]->pv_name);
595********/
596
597}
598