1/*	$NetBSD: disk-rep.c,v 1.1.1.2 2009/12/02 00:26:48 haad Exp $	*/
2
3/*
4 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6 *
7 * This file is part of LVM2.
8 *
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU Lesser General Public License v.2.1.
12 *
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17
18#include "lib.h"
19#include "disk-rep.h"
20#include "xlate.h"
21#include "filter.h"
22#include "lvmcache.h"
23
24#include <fcntl.h>
25
26#define xx16(v) disk->v = xlate16(disk->v)
27#define xx32(v) disk->v = xlate32(disk->v)
28#define xx64(v) disk->v = xlate64(disk->v)
29
30/*
31 * Functions to perform the endian conversion
32 * between disk and core.  The same code works
33 * both ways of course.
34 */
35static void _xlate_pvd(struct pv_disk *disk)
36{
37	xx16(version);
38
39	xx32(pv_on_disk.base);
40	xx32(pv_on_disk.size);
41	xx32(vg_on_disk.base);
42	xx32(vg_on_disk.size);
43	xx32(pv_uuidlist_on_disk.base);
44	xx32(pv_uuidlist_on_disk.size);
45	xx32(lv_on_disk.base);
46	xx32(lv_on_disk.size);
47	xx32(pe_on_disk.base);
48	xx32(pe_on_disk.size);
49
50	xx32(pv_major);
51	xx32(pv_number);
52	xx32(pv_status);
53	xx32(pv_allocatable);
54	xx32(pv_size);
55	xx32(lv_cur);
56	xx32(pe_size);
57	xx32(pe_total);
58	xx32(pe_allocated);
59	xx32(pe_start);
60}
61
62static void _xlate_lvd(struct lv_disk *disk)
63{
64	xx32(lv_access);
65	xx32(lv_status);
66	xx32(lv_open);
67	xx32(lv_dev);
68	xx32(lv_number);
69	xx32(lv_mirror_copies);
70	xx32(lv_recovery);
71	xx32(lv_schedule);
72	xx32(lv_size);
73	xx32(lv_snapshot_minor);
74	xx16(lv_chunk_size);
75	xx16(dummy);
76	xx32(lv_allocated_le);
77	xx32(lv_stripes);
78	xx32(lv_stripesize);
79	xx32(lv_badblock);
80	xx32(lv_allocation);
81	xx32(lv_io_timeout);
82	xx32(lv_read_ahead);
83}
84
85static void _xlate_vgd(struct vg_disk *disk)
86{
87	xx32(vg_number);
88	xx32(vg_access);
89	xx32(vg_status);
90	xx32(lv_max);
91	xx32(lv_cur);
92	xx32(lv_open);
93	xx32(pv_max);
94	xx32(pv_cur);
95	xx32(pv_act);
96	xx32(dummy);
97	xx32(vgda);
98	xx32(pe_size);
99	xx32(pe_total);
100	xx32(pe_allocated);
101	xx32(pvg_total);
102}
103
104static void _xlate_extents(struct pe_disk *extents, uint32_t count)
105{
106	unsigned i;
107
108	for (i = 0; i < count; i++) {
109		extents[i].lv_num = xlate16(extents[i].lv_num);
110		extents[i].le_num = xlate16(extents[i].le_num);
111	}
112}
113
114/*
115 * Handle both minor metadata formats.
116 */
117static int _munge_formats(struct pv_disk *pvd)
118{
119	uint32_t pe_start;
120	unsigned b, e;
121
122	switch (pvd->version) {
123	case 1:
124		pvd->pe_start = ((pvd->pe_on_disk.base +
125				  pvd->pe_on_disk.size) >> SECTOR_SHIFT);
126		break;
127
128	case 2:
129		pvd->version = 1;
130		pe_start = pvd->pe_start << SECTOR_SHIFT;
131		pvd->pe_on_disk.size = pe_start - pvd->pe_on_disk.base;
132		break;
133
134	default:
135		return 0;
136	}
137
138	/* UUID too long? */
139	if (pvd->pv_uuid[ID_LEN]) {
140		/* Retain ID_LEN chars from end */
141		for (e = ID_LEN; e < sizeof(pvd->pv_uuid); e++) {
142			if (!pvd->pv_uuid[e]) {
143				e--;
144				break;
145			}
146		}
147		for (b = 0; b < ID_LEN; b++) {
148			pvd->pv_uuid[b] = pvd->pv_uuid[++e - ID_LEN];
149			/* FIXME Remove all invalid chars */
150			if (pvd->pv_uuid[b] == '/')
151				pvd->pv_uuid[b] = '#';
152		}
153		memset(&pvd->pv_uuid[ID_LEN], 0, sizeof(pvd->pv_uuid) - ID_LEN);
154	}
155
156	/* If UUID is missing, create one */
157	if (pvd->pv_uuid[0] == '\0') {
158		uuid_from_num((char *)pvd->pv_uuid, pvd->pv_number);
159		pvd->pv_uuid[ID_LEN] = '\0';
160	}
161
162	return 1;
163}
164
165/*
166 * If exported, remove "PV_EXP" from end of VG name
167 */
168static void _munge_exported_vg(struct pv_disk *pvd)
169{
170	int l;
171	size_t s;
172
173	/* Return if PV not in a VG */
174	if ((!*pvd->vg_name))
175		return;
176	/* FIXME also check vgd->status & VG_EXPORTED? */
177
178	l = strlen((char *)pvd->vg_name);
179	s = sizeof(EXPORTED_TAG);
180	if (!strncmp((char *)pvd->vg_name + l - s + 1, EXPORTED_TAG, s)) {
181		pvd->vg_name[l - s + 1] = '\0';
182		pvd->pv_status |= VG_EXPORTED;
183	}
184}
185
186int munge_pvd(struct device *dev, struct pv_disk *pvd)
187{
188	_xlate_pvd(pvd);
189
190	if (pvd->id[0] != 'H' || pvd->id[1] != 'M') {
191		log_very_verbose("%s does not have a valid LVM1 PV identifier",
192				 dev_name(dev));
193		return 0;
194	}
195
196	if (!_munge_formats(pvd)) {
197		log_very_verbose("format1: Unknown metadata version %d "
198				 "found on %s", pvd->version, dev_name(dev));
199		return 0;
200	}
201
202	/* If VG is exported, set VG name back to the real name */
203	_munge_exported_vg(pvd);
204
205	return 1;
206}
207
208static int _read_pvd(struct device *dev, struct pv_disk *pvd)
209{
210	if (!dev_read(dev, UINT64_C(0), sizeof(*pvd), pvd)) {
211		log_very_verbose("Failed to read PV data from %s",
212				 dev_name(dev));
213		return 0;
214	}
215
216	return munge_pvd(dev, pvd);
217}
218
219static int _read_lvd(struct device *dev, uint64_t pos, struct lv_disk *disk)
220{
221	if (!dev_read(dev, pos, sizeof(*disk), disk))
222		return_0;
223
224	_xlate_lvd(disk);
225
226	return 1;
227}
228
229int read_vgd(struct device *dev, struct vg_disk *vgd, struct pv_disk *pvd)
230{
231	uint64_t pos = pvd->vg_on_disk.base;
232
233	if (!dev_read(dev, pos, sizeof(*vgd), vgd))
234		return_0;
235
236	_xlate_vgd(vgd);
237
238	if ((vgd->lv_max > MAX_LV) || (vgd->pv_max > MAX_PV))
239		return_0;
240
241	/* If UUID is missing, create one */
242	if (vgd->vg_uuid[0] == '\0')
243		uuid_from_num((char *)vgd->vg_uuid, vgd->vg_number);
244
245	return 1;
246}
247
248static int _read_uuids(struct disk_list *data)
249{
250	unsigned num_read = 0;
251	struct uuid_list *ul;
252	char buffer[NAME_LEN] __attribute((aligned(8)));
253	uint64_t pos = data->pvd.pv_uuidlist_on_disk.base;
254	uint64_t end = pos + data->pvd.pv_uuidlist_on_disk.size;
255
256	while (pos < end && num_read < data->vgd.pv_cur) {
257		if (!dev_read(data->dev, pos, sizeof(buffer), buffer))
258			return_0;
259
260		if (!(ul = dm_pool_alloc(data->mem, sizeof(*ul))))
261			return_0;
262
263		memcpy(ul->uuid, buffer, NAME_LEN);
264		ul->uuid[NAME_LEN - 1] = '\0';
265
266		dm_list_add(&data->uuids, &ul->list);
267
268		pos += NAME_LEN;
269		num_read++;
270	}
271
272	return 1;
273}
274
275static int _check_lvd(struct lv_disk *lvd)
276{
277	return !(lvd->lv_name[0] == '\0');
278}
279
280static int _read_lvs(struct disk_list *data)
281{
282	unsigned int i, lvs_read = 0;
283	uint64_t pos;
284	struct lvd_list *ll;
285	struct vg_disk *vgd = &data->vgd;
286
287	for (i = 0; (i < vgd->lv_max) && (lvs_read < vgd->lv_cur); i++) {
288		pos = data->pvd.lv_on_disk.base + (i * sizeof(struct lv_disk));
289		ll = dm_pool_alloc(data->mem, sizeof(*ll));
290
291		if (!ll)
292			return_0;
293
294		if (!_read_lvd(data->dev, pos, &ll->lvd))
295			return_0;
296
297		if (!_check_lvd(&ll->lvd))
298			continue;
299
300		lvs_read++;
301		dm_list_add(&data->lvds, &ll->list);
302	}
303
304	return 1;
305}
306
307static int _read_extents(struct disk_list *data)
308{
309	size_t len = sizeof(struct pe_disk) * data->pvd.pe_total;
310	struct pe_disk *extents = dm_pool_alloc(data->mem, len);
311	uint64_t pos = data->pvd.pe_on_disk.base;
312
313	if (!extents)
314		return_0;
315
316	if (!dev_read(data->dev, pos, len, extents))
317		return_0;
318
319	_xlate_extents(extents, data->pvd.pe_total);
320	data->extents = extents;
321
322	return 1;
323}
324
325static void __update_lvmcache(const struct format_type *fmt,
326			      struct disk_list *dl,
327			      struct device *dev, const char *vgid,
328			      unsigned exported)
329{
330	struct lvmcache_info *info;
331	const char *vgname = *((char *)dl->pvd.vg_name) ?
332			     (char *)dl->pvd.vg_name : fmt->orphan_vg_name;
333
334	if (!(info = lvmcache_add(fmt->labeller, (char *)dl->pvd.pv_uuid, dev,
335				  vgname, vgid, exported ? EXPORTED_VG : 0))) {
336		stack;
337		return;
338	}
339
340	info->device_size = xlate32(dl->pvd.pv_size) << SECTOR_SHIFT;
341	dm_list_init(&info->mdas);
342	info->status &= ~CACHE_INVALID;
343}
344
345static struct disk_list *__read_disk(const struct format_type *fmt,
346				     struct device *dev, struct dm_pool *mem,
347				     const char *vg_name)
348{
349	struct disk_list *dl = dm_pool_zalloc(mem, sizeof(*dl));
350	const char *name = dev_name(dev);
351
352	if (!dl)
353		return_NULL;
354
355	dl->dev = dev;
356	dl->mem = mem;
357	dm_list_init(&dl->uuids);
358	dm_list_init(&dl->lvds);
359
360	if (!_read_pvd(dev, &dl->pvd))
361		goto_bad;
362
363	/*
364	 * is it an orphan ?
365	 */
366	if (!*dl->pvd.vg_name) {
367		log_very_verbose("%s is not a member of any format1 VG", name);
368
369		__update_lvmcache(fmt, dl, dev, fmt->orphan_vg_name, 0);
370		return (vg_name) ? NULL : dl;
371	}
372
373	if (!read_vgd(dl->dev, &dl->vgd, &dl->pvd)) {
374		log_error("Failed to read VG data from PV (%s)", name);
375		__update_lvmcache(fmt, dl, dev, fmt->orphan_vg_name, 0);
376		goto bad;
377	}
378
379	if (vg_name && strcmp(vg_name, (char *)dl->pvd.vg_name)) {
380		log_very_verbose("%s is not a member of the VG %s",
381				 name, vg_name);
382		__update_lvmcache(fmt, dl, dev, fmt->orphan_vg_name, 0);
383		goto bad;
384	}
385
386	__update_lvmcache(fmt, dl, dev, (char *)dl->vgd.vg_uuid,
387			  dl->vgd.vg_status & VG_EXPORTED);
388
389	if (!_read_uuids(dl)) {
390		log_error("Failed to read PV uuid list from %s", name);
391		goto bad;
392	}
393
394	if (!_read_lvs(dl)) {
395		log_error("Failed to read LV's from %s", name);
396		goto bad;
397	}
398
399	if (!_read_extents(dl)) {
400		log_error("Failed to read extents from %s", name);
401		goto bad;
402	}
403
404	log_very_verbose("Found %s in %sVG %s", name,
405			 (dl->vgd.vg_status & VG_EXPORTED) ? "exported " : "",
406			 dl->pvd.vg_name);
407
408	return dl;
409
410      bad:
411	dm_pool_free(dl->mem, dl);
412	return NULL;
413}
414
415struct disk_list *read_disk(const struct format_type *fmt, struct device *dev,
416			    struct dm_pool *mem, const char *vg_name)
417{
418	struct disk_list *dl;
419
420	if (!dev_open(dev))
421		return_NULL;
422
423	dl = __read_disk(fmt, dev, mem, vg_name);
424
425	if (!dev_close(dev))
426		stack;
427
428	return dl;
429}
430
431static void _add_pv_to_list(struct dm_list *head, struct disk_list *data)
432{
433	struct pv_disk *pvd;
434	struct disk_list *diskl;
435
436	dm_list_iterate_items(diskl, head) {
437		pvd = &diskl->pvd;
438		if (!strncmp((char *)data->pvd.pv_uuid, (char *)pvd->pv_uuid,
439			     sizeof(pvd->pv_uuid))) {
440			if (!dev_subsystem_part_major(data->dev)) {
441				log_very_verbose("Ignoring duplicate PV %s on "
442						 "%s", pvd->pv_uuid,
443						 dev_name(data->dev));
444				return;
445			}
446			log_very_verbose("Duplicate PV %s - using %s %s",
447					 pvd->pv_uuid, dev_subsystem_name(data->dev),
448					 dev_name(data->dev));
449			dm_list_del(&diskl->list);
450			break;
451		}
452	}
453	dm_list_add(head, &data->list);
454}
455
456/*
457 * Build a list of pv_d's structures, allocated from mem.
458 * We keep track of the first object allocated from the pool
459 * so we can free off all the memory if something goes wrong.
460 */
461int read_pvs_in_vg(const struct format_type *fmt, const char *vg_name,
462		   struct dev_filter *filter, struct dm_pool *mem,
463		   struct dm_list *head)
464{
465	struct dev_iter *iter;
466	struct device *dev;
467	struct disk_list *data = NULL;
468	struct lvmcache_vginfo *vginfo;
469	struct lvmcache_info *info;
470
471	/* Fast path if we already saw this VG and cached the list of PVs */
472	if (vg_name && (vginfo = vginfo_from_vgname(vg_name, NULL)) &&
473	    vginfo->infos.n) {
474		dm_list_iterate_items(info, &vginfo->infos) {
475			dev = info->dev;
476			if (dev && !(data = read_disk(fmt, dev, mem, vg_name)))
477				break;
478			_add_pv_to_list(head, data);
479		}
480
481		/* Did we find the whole VG? */
482		if (!vg_name || is_orphan_vg(vg_name) ||
483		    (data && *data->pvd.vg_name &&
484		     dm_list_size(head) == data->vgd.pv_cur))
485			return 1;
486
487		/* Failed */
488		dm_list_init(head);
489		/* vgcache_del(vg_name); */
490	}
491
492	if (!(iter = dev_iter_create(filter, 1))) {
493		log_error("read_pvs_in_vg: dev_iter_create failed");
494		return 0;
495	}
496
497	/* Otherwise do a complete scan */
498	for (dev = dev_iter_get(iter); dev; dev = dev_iter_get(iter)) {
499		if ((data = read_disk(fmt, dev, mem, vg_name))) {
500			_add_pv_to_list(head, data);
501		}
502	}
503	dev_iter_destroy(iter);
504
505	if (dm_list_empty(head))
506		return 0;
507
508	return 1;
509}
510
511static int _write_vgd(struct disk_list *data)
512{
513	struct vg_disk *vgd = &data->vgd;
514	uint64_t pos = data->pvd.vg_on_disk.base;
515
516	log_debug("Writing %s VG metadata to %s at %" PRIu64 " len %" PRIsize_t,
517		  data->pvd.vg_name, dev_name(data->dev), pos, sizeof(*vgd));
518
519	_xlate_vgd(vgd);
520	if (!dev_write(data->dev, pos, sizeof(*vgd), vgd))
521		return_0;
522
523	_xlate_vgd(vgd);
524
525	return 1;
526}
527
528static int _write_uuids(struct disk_list *data)
529{
530	struct uuid_list *ul;
531	uint64_t pos = data->pvd.pv_uuidlist_on_disk.base;
532	uint64_t end = pos + data->pvd.pv_uuidlist_on_disk.size;
533
534	dm_list_iterate_items(ul, &data->uuids) {
535		if (pos >= end) {
536			log_error("Too many uuids to fit on %s",
537				  dev_name(data->dev));
538			return 0;
539		}
540
541		log_debug("Writing %s uuidlist to %s at %" PRIu64 " len %d",
542			  data->pvd.vg_name, dev_name(data->dev),
543			  pos, NAME_LEN);
544
545		if (!dev_write(data->dev, pos, NAME_LEN, ul->uuid))
546			return_0;
547
548		pos += NAME_LEN;
549	}
550
551	return 1;
552}
553
554static int _write_lvd(struct device *dev, uint64_t pos, struct lv_disk *disk)
555{
556	log_debug("Writing %s LV %s metadata to %s at %" PRIu64 " len %"
557		  PRIsize_t, disk->vg_name, disk->lv_name, dev_name(dev),
558		  pos, sizeof(*disk));
559
560	_xlate_lvd(disk);
561	if (!dev_write(dev, pos, sizeof(*disk), disk))
562		return_0;
563
564	_xlate_lvd(disk);
565
566	return 1;
567}
568
569static int _write_lvs(struct disk_list *data)
570{
571	struct lvd_list *ll;
572	uint64_t pos, offset;
573
574	pos = data->pvd.lv_on_disk.base;
575
576	if (!dev_set(data->dev, pos, data->pvd.lv_on_disk.size, 0)) {
577		log_error("Couldn't zero lv area on device '%s'",
578			  dev_name(data->dev));
579		return 0;
580	}
581
582	dm_list_iterate_items(ll, &data->lvds) {
583		offset = sizeof(struct lv_disk) * ll->lvd.lv_number;
584		if (offset + sizeof(struct lv_disk) > data->pvd.lv_on_disk.size) {
585			log_error("lv_number %d too large", ll->lvd.lv_number);
586			return 0;
587		}
588
589		if (!_write_lvd(data->dev, pos + offset, &ll->lvd))
590			return_0;
591	}
592
593	return 1;
594}
595
596static int _write_extents(struct disk_list *data)
597{
598	size_t len = sizeof(struct pe_disk) * data->pvd.pe_total;
599	struct pe_disk *extents = data->extents;
600	uint64_t pos = data->pvd.pe_on_disk.base;
601
602	log_debug("Writing %s extents metadata to %s at %" PRIu64 " len %"
603		  PRIsize_t, data->pvd.vg_name, dev_name(data->dev),
604		  pos, len);
605
606	_xlate_extents(extents, data->pvd.pe_total);
607	if (!dev_write(data->dev, pos, len, extents))
608		return_0;
609
610	_xlate_extents(extents, data->pvd.pe_total);
611
612	return 1;
613}
614
615static int _write_pvd(struct disk_list *data)
616{
617	char *buf;
618	uint64_t pos = data->pvd.pv_on_disk.base;
619	size_t size = data->pvd.pv_on_disk.size;
620
621	if (size < sizeof(struct pv_disk)) {
622		log_error("Invalid PV structure size.");
623		return 0;
624	}
625
626	/* Make sure that the gap between the PV structure and
627	   the next one is zeroed in order to make non LVM tools
628	   happy (idea from AED) */
629	buf = dm_malloc(size);
630	if (!buf) {
631		log_error("Couldn't allocate temporary PV buffer.");
632		return 0;
633	}
634
635	memset(buf, 0, size);
636	memcpy(buf, &data->pvd, sizeof(struct pv_disk));
637
638	log_debug("Writing %s PV metadata to %s at %" PRIu64 " len %"
639		  PRIsize_t, data->pvd.vg_name, dev_name(data->dev),
640		  pos, size);
641
642	_xlate_pvd((struct pv_disk *) buf);
643	if (!dev_write(data->dev, pos, size, buf)) {
644		dm_free(buf);
645		return_0;
646	}
647
648	dm_free(buf);
649	return 1;
650}
651
652/*
653 * assumes the device has been opened.
654 */
655static int __write_all_pvd(const struct format_type *fmt __attribute((unused)),
656			   struct disk_list *data)
657{
658	const char *pv_name = dev_name(data->dev);
659
660	if (!_write_pvd(data)) {
661		log_error("Failed to write PV structure onto %s", pv_name);
662		return 0;
663	}
664
665	/* vgcache_add(data->pvd.vg_name, data->vgd.vg_uuid, data->dev, fmt); */
666	/*
667	 * Stop here for orphan pv's.
668	 */
669	if (data->pvd.vg_name[0] == '\0') {
670		/* if (!test_mode())
671		   vgcache_add(data->pvd.vg_name, NULL, data->dev, fmt); */
672		return 1;
673	}
674
675	/* if (!test_mode())
676	   vgcache_add(data->pvd.vg_name, data->vgd.vg_uuid, data->dev,
677	   fmt); */
678
679	if (!_write_vgd(data)) {
680		log_error("Failed to write VG data to %s", pv_name);
681		return 0;
682	}
683
684	if (!_write_uuids(data)) {
685		log_error("Failed to write PV uuid list to %s", pv_name);
686		return 0;
687	}
688
689	if (!_write_lvs(data)) {
690		log_error("Failed to write LV's to %s", pv_name);
691		return 0;
692	}
693
694	if (!_write_extents(data)) {
695		log_error("Failed to write extents to %s", pv_name);
696		return 0;
697	}
698
699	return 1;
700}
701
702/*
703 * opens the device and hands to the above fn.
704 */
705static int _write_all_pvd(const struct format_type *fmt, struct disk_list *data)
706{
707	int r;
708
709	if (!dev_open(data->dev))
710		return_0;
711
712	r = __write_all_pvd(fmt, data);
713
714	if (!dev_close(data->dev))
715		stack;
716
717	return r;
718}
719
720/*
721 * Writes all the given pv's to disk.  Does very
722 * little sanity checking, so make sure correct
723 * data is passed to here.
724 */
725int write_disks(const struct format_type *fmt, struct dm_list *pvs)
726{
727	struct disk_list *dl;
728
729	dm_list_iterate_items(dl, pvs) {
730		if (!(_write_all_pvd(fmt, dl)))
731			return_0;
732
733		log_very_verbose("Successfully wrote data to %s",
734				 dev_name(dl->dev));
735	}
736
737	return 1;
738}
739