1/*	$NetBSD$	*/
2
3/*
4 * Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6 *
7 * This file is part of LVM2.
8 *
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU Lesser General Public License v.2.1.
12 *
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17
18#include "tools.h"
19#include "polldaemon.h"
20#include "display.h"
21
22#define PVMOVE_FIRST_TIME   0x00000001      /* Called for first time */
23
24static int _pvmove_target_present(struct cmd_context *cmd, int clustered)
25{
26	const struct segment_type *segtype;
27	unsigned attr = 0;
28	int found = 1;
29	static int _clustered_found = -1;
30
31	if (clustered && _clustered_found >= 0)
32		return _clustered_found;
33
34	if (!(segtype = get_segtype_from_string(cmd, "mirror")))
35		return_0;
36
37	if (activation() && segtype->ops->target_present &&
38	    !segtype->ops->target_present(cmd, NULL, clustered ? &attr : NULL))
39		found = 0;
40
41	if (activation() && clustered) {
42		if (found && (attr & MIRROR_LOG_CLUSTERED))
43			_clustered_found = found = 1;
44		else
45			_clustered_found = found = 0;
46	}
47
48	return found;
49}
50
51static unsigned _pvmove_is_exclusive(struct cmd_context *cmd,
52				     struct volume_group *vg)
53{
54	if (vg_is_clustered(vg))
55		if (!_pvmove_target_present(cmd, 1))
56			return 1;
57
58	return 0;
59}
60
61/* Allow /dev/vgname/lvname, vgname/lvname or lvname */
62static const char *_extract_lvname(struct cmd_context *cmd, const char *vgname,
63				   const char *arg)
64{
65	const char *lvname;
66
67	/* Is an lvname supplied directly? */
68	if (!strchr(arg, '/'))
69		return arg;
70
71	lvname = skip_dev_dir(cmd, arg, NULL);
72	while (*lvname == '/')
73		lvname++;
74	if (!strchr(lvname, '/')) {
75		log_error("--name takes a logical volume name");
76		return NULL;
77	}
78	if (strncmp(vgname, lvname, strlen(vgname)) ||
79	    (lvname += strlen(vgname), *lvname != '/')) {
80		log_error("Named LV and old PV must be in the same VG");
81		return NULL;
82	}
83	while (*lvname == '/')
84		lvname++;
85	if (!*lvname) {
86		log_error("Incomplete LV name supplied with --name");
87		return NULL;
88	}
89	return lvname;
90}
91
92static struct volume_group *_get_vg(struct cmd_context *cmd, const char *vgname)
93{
94	dev_close_all();
95
96	return vg_read_for_update(cmd, vgname, NULL, 0);
97}
98
99/* Create list of PVs for allocation of replacement extents */
100static struct dm_list *_get_allocatable_pvs(struct cmd_context *cmd, int argc,
101					 char **argv, struct volume_group *vg,
102					 struct physical_volume *pv,
103					 alloc_policy_t alloc)
104{
105	struct dm_list *allocatable_pvs, *pvht, *pvh;
106	struct pv_list *pvl;
107
108	if (argc)
109		allocatable_pvs = create_pv_list(cmd->mem, vg, argc, argv, 1);
110	else
111		allocatable_pvs = clone_pv_list(cmd->mem, &vg->pvs);
112
113	if (!allocatable_pvs)
114		return_NULL;
115
116	dm_list_iterate_safe(pvh, pvht, allocatable_pvs) {
117		pvl = dm_list_item(pvh, struct pv_list);
118
119		/* Don't allocate onto the PV we're clearing! */
120		if ((alloc != ALLOC_ANYWHERE) && (pvl->pv->dev == pv_dev(pv))) {
121			dm_list_del(&pvl->list);
122			continue;
123		}
124
125		/* Remove PV if full */
126		if ((pvl->pv->pe_count == pvl->pv->pe_alloc_count))
127			dm_list_del(&pvl->list);
128	}
129
130	if (dm_list_empty(allocatable_pvs)) {
131		log_error("No extents available for allocation");
132		return NULL;
133	}
134
135	return allocatable_pvs;
136}
137
138/*
139 * Replace any LV segments on given PV with temporary mirror.
140 * Returns list of LVs changed.
141 */
142static int _insert_pvmove_mirrors(struct cmd_context *cmd,
143				  struct logical_volume *lv_mirr,
144				  struct dm_list *source_pvl,
145				  struct logical_volume *lv,
146				  struct dm_list *lvs_changed)
147
148{
149	struct pv_list *pvl;
150	uint32_t prev_le_count;
151
152	/* Only 1 PV may feature in source_pvl */
153	pvl = dm_list_item(source_pvl->n, struct pv_list);
154
155	prev_le_count = lv_mirr->le_count;
156	if (!insert_layer_for_segments_on_pv(cmd, lv, lv_mirr, PVMOVE,
157					     pvl, lvs_changed))
158		return_0;
159
160	/* check if layer was inserted */
161	if (lv_mirr->le_count - prev_le_count) {
162		lv->status |= LOCKED;
163
164		log_verbose("Moving %u extents of logical volume %s/%s",
165			    lv_mirr->le_count - prev_le_count,
166			    lv->vg->name, lv->name);
167	}
168
169	return 1;
170}
171
172/* Create new LV with mirror segments for the required copies */
173static struct logical_volume *_set_up_pvmove_lv(struct cmd_context *cmd,
174						struct volume_group *vg,
175						struct dm_list *source_pvl,
176						const char *lv_name,
177						struct dm_list *allocatable_pvs,
178						alloc_policy_t alloc,
179						struct dm_list **lvs_changed)
180{
181	struct logical_volume *lv_mirr, *lv;
182	struct lv_list *lvl;
183	uint32_t log_count = 0;
184	int lv_found = 0;
185
186	/* FIXME Cope with non-contiguous => splitting existing segments */
187	if (!(lv_mirr = lv_create_empty("pvmove%d", NULL,
188					LVM_READ | LVM_WRITE,
189					ALLOC_CONTIGUOUS, vg))) {
190		log_error("Creation of temporary pvmove LV failed");
191		return NULL;
192	}
193
194	lv_mirr->status |= (PVMOVE | LOCKED);
195
196	if (!(*lvs_changed = dm_pool_alloc(cmd->mem, sizeof(**lvs_changed)))) {
197		log_error("lvs_changed list struct allocation failed");
198		return NULL;
199	}
200
201	dm_list_init(*lvs_changed);
202
203	/* Find segments to be moved and set up mirrors */
204	dm_list_iterate_items(lvl, &vg->lvs) {
205		lv = lvl->lv;
206		if ((lv == lv_mirr))
207			continue;
208		if (lv_name) {
209			if (strcmp(lv->name, lv_name))
210				continue;
211			lv_found = 1;
212		}
213		if (lv_is_origin(lv) || lv_is_cow(lv)) {
214			log_print("Skipping snapshot-related LV %s", lv->name);
215			continue;
216		}
217		if (lv->status & MIRRORED) {
218			log_print("Skipping mirror LV %s", lv->name);
219			continue;
220		}
221		if (lv->status & MIRROR_LOG) {
222			log_print("Skipping mirror log LV %s", lv->name);
223			continue;
224		}
225		if (lv->status & MIRROR_IMAGE) {
226			log_print("Skipping mirror image LV %s", lv->name);
227			continue;
228		}
229		if (lv->status & LOCKED) {
230			log_print("Skipping locked LV %s", lv->name);
231			continue;
232		}
233		if (!_insert_pvmove_mirrors(cmd, lv_mirr, source_pvl, lv,
234					    *lvs_changed))
235			return_NULL;
236	}
237
238	if (lv_name && !lv_found) {
239		log_error("Logical volume %s not found.", lv_name);
240		return NULL;
241	}
242
243	/* Is temporary mirror empty? */
244	if (!lv_mirr->le_count) {
245		log_error("No data to move for %s", vg->name);
246		return NULL;
247	}
248
249	if (!lv_add_mirrors(cmd, lv_mirr, 1, 1, 0, log_count,
250			    allocatable_pvs, alloc, MIRROR_BY_SEG)) {
251		log_error("Failed to convert pvmove LV to mirrored");
252		return_NULL;
253	}
254
255	if (!split_parent_segments_for_layer(cmd, lv_mirr)) {
256		log_error("Failed to split segments being moved");
257		return_NULL;
258	}
259
260	return lv_mirr;
261}
262
263static int _activate_lv(struct cmd_context *cmd, struct logical_volume *lv_mirr,
264			unsigned exclusive)
265{
266	if (exclusive)
267		return activate_lv_excl(cmd, lv_mirr);
268
269	return activate_lv(cmd, lv_mirr);
270}
271
272static int _finish_pvmove(struct cmd_context *cmd, struct volume_group *vg,
273			  struct logical_volume *lv_mirr,
274			  struct dm_list *lvs_changed);
275
276static int _update_metadata(struct cmd_context *cmd, struct volume_group *vg,
277			    struct logical_volume *lv_mirr,
278			    struct dm_list *lvs_changed, unsigned flags)
279{
280	unsigned exclusive = _pvmove_is_exclusive(cmd, vg);
281	unsigned first_time = (flags & PVMOVE_FIRST_TIME) ? 1 : 0;
282	int r = 0;
283
284	log_verbose("Updating volume group metadata");
285	if (!vg_write(vg)) {
286		log_error("ABORTING: Volume group metadata update failed.");
287		return 0;
288	}
289
290	/* Suspend lvs_changed */
291	if (!suspend_lvs(cmd, lvs_changed))
292		goto_out;
293
294	/* Suspend mirrors on subsequent calls */
295	if (!first_time) {
296		if (!suspend_lv(cmd, lv_mirr)) {
297			resume_lvs(cmd, lvs_changed);
298			vg_revert(vg);
299			goto_out;
300		}
301	}
302
303	/* Commit on-disk metadata */
304	if (!vg_commit(vg)) {
305		log_error("ABORTING: Volume group metadata update failed.");
306		if (!first_time)
307			resume_lv(cmd, lv_mirr);
308		resume_lvs(cmd, lvs_changed);
309		goto out;
310	}
311
312	/* Activate the temporary mirror LV */
313	/* Only the first mirror segment gets activated as a mirror */
314	/* FIXME: Add option to use a log */
315	if (first_time) {
316		if (!_activate_lv(cmd, lv_mirr, exclusive)) {
317			if (test_mode())
318				goto out;
319
320			/*
321			 * Nothing changed yet, try to revert pvmove.
322			 */
323			log_error("Temporary pvmove mirror activation failed.");
324			if (!_finish_pvmove(cmd, vg, lv_mirr, lvs_changed))
325				log_error("ABORTING: Restoring original configuration "
326					  "before pvmove failed. Run pvmove --abort.");
327			goto out;
328		}
329	} else if (!resume_lv(cmd, lv_mirr)) {
330		log_error("Unable to reactivate logical volume \"%s\"",
331			  lv_mirr->name);
332		resume_lvs(cmd, lvs_changed);
333		goto out;
334	}
335
336	/* Unsuspend LVs */
337	if (!resume_lvs(cmd, lvs_changed)) {
338		log_error("Unable to resume logical volumes");
339		goto out;
340	}
341
342	r = 1;
343out:
344	backup(vg);
345	return r;
346}
347
348static int _set_up_pvmove(struct cmd_context *cmd, const char *pv_name,
349			  int argc, char **argv)
350{
351	const char *lv_name = NULL;
352	char *pv_name_arg;
353	struct volume_group *vg;
354	struct dm_list *source_pvl;
355	struct dm_list *allocatable_pvs;
356	alloc_policy_t alloc;
357	struct dm_list *lvs_changed;
358	struct physical_volume *pv;
359	struct logical_volume *lv_mirr;
360	unsigned first_time = 1;
361	unsigned exclusive;
362	int r = ECMD_FAILED;
363
364	pv_name_arg = argv[0];
365	argc--;
366	argv++;
367
368	/* Find PV (in VG) */
369	if (!(pv = find_pv_by_name(cmd, pv_name))) {
370		stack;
371		return EINVALID_CMD_LINE;
372	}
373
374	if (arg_count(cmd, name_ARG)) {
375		if (!(lv_name = _extract_lvname(cmd, pv_vg_name(pv),
376						arg_value(cmd, name_ARG)))) {
377			stack;
378			return EINVALID_CMD_LINE;
379		}
380
381		if (!validate_name(lv_name)) {
382			log_error("Logical volume name %s is invalid", lv_name);
383			return EINVALID_CMD_LINE;
384		}
385	}
386
387	/* Read VG */
388	log_verbose("Finding volume group \"%s\"", pv_vg_name(pv));
389
390	vg = _get_vg(cmd, pv_vg_name(pv));
391	if (vg_read_error(vg)) {
392		vg_release(vg);
393		stack;
394		return ECMD_FAILED;
395	}
396
397	exclusive = _pvmove_is_exclusive(cmd, vg);
398
399	if ((lv_mirr = find_pvmove_lv(vg, pv_dev(pv), PVMOVE))) {
400		log_print("Detected pvmove in progress for %s", pv_name);
401		if (argc || lv_name)
402			log_error("Ignoring remaining command line arguments");
403
404		if (!(lvs_changed = lvs_using_lv(cmd, vg, lv_mirr))) {
405			log_error("ABORTING: Failed to generate list of moving LVs");
406			goto out;
407		}
408
409		/* Ensure mirror LV is active */
410		if (!_activate_lv(cmd, lv_mirr, exclusive)) {
411			log_error("ABORTING: Temporary mirror activation failed.");
412			goto out;
413		}
414
415		first_time = 0;
416	} else {
417		/* Determine PE ranges to be moved */
418		if (!(source_pvl = create_pv_list(cmd->mem, vg, 1,
419						  &pv_name_arg, 0)))
420			goto_out;
421
422		alloc = arg_uint_value(cmd, alloc_ARG, ALLOC_INHERIT);
423		if (alloc == ALLOC_INHERIT)
424			alloc = vg->alloc;
425
426		/* Get PVs we can use for allocation */
427		if (!(allocatable_pvs = _get_allocatable_pvs(cmd, argc, argv,
428							     vg, pv, alloc)))
429			goto_out;
430
431		if (!archive(vg))
432			goto_out;
433
434		if (!(lv_mirr = _set_up_pvmove_lv(cmd, vg, source_pvl, lv_name,
435						  allocatable_pvs, alloc,
436						  &lvs_changed)))
437			goto_out;
438	}
439
440	/* Lock lvs_changed and activate (with old metadata) */
441	if (!activate_lvs(cmd, lvs_changed, exclusive))
442		goto_out;
443
444	/* FIXME Presence of a mirror once set PVMOVE - now remove associated logic */
445	/* init_pvmove(1); */
446	/* vg->status |= PVMOVE; */
447
448	if (first_time) {
449		if (!_update_metadata
450		    (cmd, vg, lv_mirr, lvs_changed, PVMOVE_FIRST_TIME))
451			goto_out;
452	}
453
454	/* LVs are all in status LOCKED */
455	r = ECMD_PROCESSED;
456out:
457	unlock_and_release_vg(cmd, vg, pv_vg_name(pv));
458	return r;
459}
460
461static int _finish_pvmove(struct cmd_context *cmd, struct volume_group *vg,
462			  struct logical_volume *lv_mirr,
463			  struct dm_list *lvs_changed)
464{
465	int r = 1;
466	struct dm_list lvs_completed;
467	struct lv_list *lvl;
468
469	/* Update metadata to remove mirror segments and break dependencies */
470	dm_list_init(&lvs_completed);
471	if (!lv_remove_mirrors(cmd, lv_mirr, 1, 0, NULL, PVMOVE) ||
472	    !remove_layers_for_segments_all(cmd, lv_mirr, PVMOVE,
473					    &lvs_completed)) {
474		log_error("ABORTING: Removal of temporary mirror failed");
475		return 0;
476	}
477
478	dm_list_iterate_items(lvl, &lvs_completed)
479		/* FIXME Assumes only one pvmove at a time! */
480		lvl->lv->status &= ~LOCKED;
481
482	/* Store metadata without dependencies on mirror segments */
483	if (!vg_write(vg)) {
484		log_error("ABORTING: Failed to write new data locations "
485			  "to disk.");
486		return 0;
487	}
488
489	/* Suspend LVs changed */
490	if (!suspend_lvs(cmd, lvs_changed)) {
491		log_error("Locking LVs to remove temporary mirror failed");
492		r = 0;
493	}
494
495	/* Suspend mirror LV to flush pending I/O */
496	if (!suspend_lv(cmd, lv_mirr)) {
497		log_error("Suspension of temporary mirror LV failed");
498		r = 0;
499	}
500
501	/* Store metadata without dependencies on mirror segments */
502	if (!vg_commit(vg)) {
503		log_error("ABORTING: Failed to write new data locations "
504			  "to disk.");
505		vg_revert(vg);
506		resume_lv(cmd, lv_mirr);
507		resume_lvs(cmd, lvs_changed);
508		return 0;
509	}
510
511	/* Release mirror LV.  (No pending I/O because it's been suspended.) */
512	if (!resume_lv(cmd, lv_mirr)) {
513		log_error("Unable to reactivate logical volume \"%s\"",
514			  lv_mirr->name);
515		r = 0;
516	}
517
518	/* Unsuspend LVs */
519	resume_lvs(cmd, lvs_changed);
520
521	/* Deactivate mirror LV */
522	if (!deactivate_lv(cmd, lv_mirr)) {
523		log_error("ABORTING: Unable to deactivate temporary logical "
524			  "volume \"%s\"", lv_mirr->name);
525		r = 0;
526	}
527
528	log_verbose("Removing temporary pvmove LV");
529	if (!lv_remove(lv_mirr)) {
530		log_error("ABORTING: Removal of temporary pvmove LV failed");
531		return 0;
532	}
533
534	/* Store it on disks */
535	log_verbose("Writing out final volume group after pvmove");
536	if (!vg_write(vg) || !vg_commit(vg)) {
537		log_error("ABORTING: Failed to write new data locations "
538			  "to disk.");
539		return 0;
540	}
541
542	/* FIXME backup positioning */
543	backup(vg);
544
545	return r;
546}
547
548static struct volume_group *_get_move_vg(struct cmd_context *cmd,
549					 const char *name, const char *uuid)
550{
551	struct physical_volume *pv;
552
553	/* Reread all metadata in case it got changed */
554	if (!(pv = find_pv_by_name(cmd, name))) {
555		log_error("ABORTING: Can't reread PV %s", name);
556		/* What more could we do here? */
557		return NULL;
558	}
559
560	return _get_vg(cmd, pv_vg_name(pv));
561}
562
563static struct poll_functions _pvmove_fns = {
564	.get_copy_name_from_lv = get_pvmove_pvname_from_lv_mirr,
565	.get_copy_vg = _get_move_vg,
566	.get_copy_lv = find_pvmove_lv_from_pvname,
567	.poll_progress = poll_mirror_progress,
568	.update_metadata = _update_metadata,
569	.finish_copy = _finish_pvmove,
570};
571
572int pvmove_poll(struct cmd_context *cmd, const char *pv_name,
573		unsigned background)
574{
575	return poll_daemon(cmd, pv_name, NULL, background, PVMOVE, &_pvmove_fns,
576			   "Moved");
577}
578
579int pvmove(struct cmd_context *cmd, int argc, char **argv)
580{
581	char *pv_name = NULL;
582	char *colon;
583	int ret;
584
585	/* dm raid1 target must be present in every case */
586	if (!_pvmove_target_present(cmd, 0)) {
587		log_error("Required device-mapper target(s) not "
588			  "detected in your kernel");
589		return ECMD_FAILED;
590	}
591
592	if (argc) {
593		pv_name = argv[0];
594
595		/* Drop any PE lists from PV name */
596		if ((colon = strchr(pv_name, ':'))) {
597			if (!(pv_name = dm_pool_strndup(cmd->mem, pv_name,
598						     (unsigned) (colon -
599								 pv_name)))) {
600				log_error("Failed to clone PV name");
601				return ECMD_FAILED;
602			}
603		}
604
605		if (!arg_count(cmd, abort_ARG) &&
606		    (ret = _set_up_pvmove(cmd, pv_name, argc, argv)) !=
607		    ECMD_PROCESSED) {
608			stack;
609			return ret;
610		}
611	}
612
613	return pvmove_poll(cmd, pv_name, arg_is_set(cmd, background_ARG));
614}
615