geom_virstor.c revision 210058
1/*-
2 * Copyright (c) 2005 Ivan Voras <ivoras@freebsd.org>
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26#include <sys/cdefs.h>
27__FBSDID("$FreeBSD: head/sbin/geom/class/virstor/geom_virstor.c 210058 2010-07-14 15:14:00Z mav $");
28
29#include <sys/param.h>
30#include <errno.h>
31#include <paths.h>
32#include <stdio.h>
33#include <stdlib.h>
34#include <stdint.h>
35#include <string.h>
36#include <strings.h>
37#include <fcntl.h>
38#include <unistd.h>
39#include <libgeom.h>
40#include <err.h>
41#include <assert.h>
42
43#include <core/geom.h>
44#include <misc/subr.h>
45
46#include <geom/virstor/g_virstor_md.h>
47#include <geom/virstor/g_virstor.h>
48
49uint32_t lib_version = G_LIB_VERSION;
50uint32_t version = G_VIRSTOR_VERSION;
51static intmax_t chunk_size = 4 * 1024 * 1024; /* in kB (default: 4 MB) */
52static intmax_t vir_size = 2ULL << 40; /* in MB (default: 2 TB) */
53
54#if G_LIB_VERSION == 1
55/* Support RELENG_6 */
56#define G_TYPE_BOOL G_TYPE_NONE
57#endif
58
59/*
60 * virstor_main gets called by the geom(8) utility
61 */
62static void virstor_main(struct gctl_req *req, unsigned flags);
63
64struct g_command class_commands[] = {
65	{"clear", G_FLAG_VERBOSE, virstor_main, G_NULL_OPTS, NULL,
66		"[-v] prov ..."
67	},
68	{"dump", 0, virstor_main, G_NULL_OPTS, NULL,
69		"prov ..."
70	},
71	{"label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, virstor_main,
72		{
73			{'h', "hardcode", NULL, G_TYPE_BOOL},
74			{'m', "chunk_size", &chunk_size, G_TYPE_NUMBER},
75			{'s', "vir_size", &vir_size, G_TYPE_NUMBER},
76			G_OPT_SENTINEL
77		},
78		NULL, "[-h] [-v] [-m chunk_size] [-s vir_size] name provider0 [provider1 ...]"
79	},
80	{"destroy", G_FLAG_VERBOSE, NULL,
81		{
82			{'f', "force", NULL, G_TYPE_BOOL},
83			G_OPT_SENTINEL
84		},
85		NULL, "[-fv] name ..."
86	},
87	{"stop", G_FLAG_VERBOSE, NULL,
88		{
89			{'f', "force", NULL, G_TYPE_BOOL},
90			G_OPT_SENTINEL
91		},
92		NULL, "[-fv] name ... (alias for \"destroy\")"
93	},
94	{"add", G_FLAG_VERBOSE, NULL,
95		{
96			{'h', "hardcode", NULL, G_TYPE_BOOL},
97			G_OPT_SENTINEL
98		},
99		NULL, "[-vh] name prov [prov ...]"
100	},
101	{"remove", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, NULL,
102		"[-v] name ..."
103	},
104	G_CMD_SENTINEL
105};
106
107static int verbose = 0;
108
109/* Helper functions' declarations */
110static void virstor_clear(struct gctl_req *req);
111static void virstor_dump(struct gctl_req *req);
112static void virstor_label(struct gctl_req *req);
113
114/* Dispatcher function (no real work done here, only verbose flag recorder) */
115static void
116virstor_main(struct gctl_req *req, unsigned flags)
117{
118	const char *name;
119
120	if ((flags & G_FLAG_VERBOSE) != 0)
121		verbose = 1;
122
123	name = gctl_get_ascii(req, "verb");
124	if (name == NULL) {
125		gctl_error(req, "No '%s' argument.", "verb");
126		return;
127	}
128	if (strcmp(name, "label") == 0)
129		virstor_label(req);
130	else if (strcmp(name, "clear") == 0)
131		virstor_clear(req);
132	else if (strcmp(name, "dump") == 0)
133		virstor_dump(req);
134	else
135		gctl_error(req, "%s: Unknown command: %s.", __func__, name);
136
137	/* No CTASSERT in userland
138	CTASSERT(VIRSTOR_MAP_BLOCK_ENTRIES*VIRSTOR_MAP_ENTRY_SIZE == MAXPHYS);
139	*/
140}
141
142static void
143pathgen(const char *name, char *path, size_t size)
144{
145
146	if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) != 0)
147		snprintf(path, size, "%s%s", _PATH_DEV, name);
148	else
149		strlcpy(path, name, size);
150}
151
152static int
153my_g_metadata_store(const char *name, u_char *md, size_t size)
154{
155	char path[MAXPATHLEN];
156	unsigned sectorsize;
157	off_t mediasize;
158	u_char *sector;
159	int error, fd;
160
161	pathgen(name, path, sizeof(path));
162	sector = NULL;
163	error = 0;
164
165	fd = open(path, O_RDWR);
166	if (fd == -1)
167		return (errno);
168	mediasize = g_get_mediasize(name);
169	if (mediasize == 0) {
170		error = errno;
171		goto out;
172	}
173	sectorsize = g_get_sectorsize(name);
174	if (sectorsize == 0) {
175		error = errno;
176		goto out;
177	}
178	assert(sectorsize >= size);
179	sector = malloc(sectorsize);
180	if (sector == NULL) {
181		error = ENOMEM;
182		goto out;
183	}
184	bcopy(md, sector, size);
185	if (pwrite(fd, sector, sectorsize, mediasize - sectorsize) !=
186	    (ssize_t)sectorsize) {
187		error = errno;
188		goto out;
189	}
190out:
191	if (sector != NULL)
192		free(sector);
193	close(fd);
194	return (error);
195}
196
197/*
198 * Labels a new geom Meaning: parses and checks the parameters, calculates &
199 * writes metadata to the relevant providers so when the next round of
200 * "tasting" comes (which will be just after the provider(s) are closed) geom
201 * can be instantiated with the tasted metadata.
202 */
203static void
204virstor_label(struct gctl_req *req)
205{
206	struct g_virstor_metadata md;
207	off_t msize;
208	unsigned char *sect;
209	unsigned int i;
210	size_t ssize, secsize;
211	const char *name;
212	char param[32];
213	int hardcode, nargs, error;
214	struct virstor_map_entry *map;
215	size_t total_chunks;	/* We'll run out of memory if
216				   this needs to be bigger. */
217	unsigned int map_chunks; /* Chunks needed by the map (map size). */
218	size_t map_size;	/* In bytes. */
219	ssize_t written;
220	int fd;
221
222	nargs = gctl_get_int(req, "nargs");
223	if (nargs < 2) {
224		gctl_error(req, "Too few arguments (%d): expecting: name "
225		    "provider0 [provider1 ...]", nargs);
226		return;
227	}
228
229	hardcode = gctl_get_int(req, "hardcode");
230
231	/*
232	 * Initialize constant parts of metadata: magic signature, version,
233	 * name.
234	 */
235	bzero(&md, sizeof(md));
236	strlcpy(md.md_magic, G_VIRSTOR_MAGIC, sizeof(md.md_magic));
237	md.md_version = G_VIRSTOR_VERSION;
238	name = gctl_get_ascii(req, "arg0");
239	if (name == NULL) {
240		gctl_error(req, "No 'arg%u' argument.", 0);
241		return;
242	}
243	strlcpy(md.md_name, name, sizeof(md.md_name));
244
245	md.md_virsize = (off_t)gctl_get_intmax(req, "vir_size");
246	md.md_chunk_size = gctl_get_intmax(req, "chunk_size");
247	md.md_count = nargs - 1;
248
249	if (md.md_virsize == 0 || md.md_chunk_size == 0) {
250		gctl_error(req, "Virtual size and chunk size must be non-zero");
251		return;
252	}
253
254	if (md.md_chunk_size % MAXPHYS != 0) {
255		/* XXX: This is not strictly needed, but it's convenient to
256		 * impose some limitations on it, so why not MAXPHYS. */
257		size_t new_size = (md.md_chunk_size / MAXPHYS) * MAXPHYS;
258		if (new_size < md.md_chunk_size)
259			new_size += MAXPHYS;
260		fprintf(stderr, "Resizing chunk size to be a multiple of "
261		    "MAXPHYS (%d kB).\n", MAXPHYS / 1024);
262		fprintf(stderr, "New chunk size: %zu kB\n", new_size / 1024);
263		md.md_chunk_size = new_size;
264	}
265
266	if (md.md_virsize % md.md_chunk_size != 0) {
267		off_t chunk_count = md.md_virsize / md.md_chunk_size;
268		md.md_virsize = chunk_count * md.md_chunk_size;
269		fprintf(stderr, "Resizing virtual size to be a multiple of "
270		    "chunk size.\n");
271		fprintf(stderr, "New virtual size: %zu MB\n",
272		    (size_t)(md.md_virsize/(1024 * 1024)));
273	}
274
275	msize = secsize = 0;
276	for (i = 1; i < (unsigned)nargs; i++) {
277		snprintf(param, sizeof(param), "arg%u", i);
278		name = gctl_get_ascii(req, param);
279		ssize = g_get_sectorsize(name);
280		if (ssize == 0)
281			fprintf(stderr, "%s for %s\n", strerror(errno), name);
282		msize += g_get_mediasize(name);
283		if (secsize == 0)
284			secsize = ssize;
285		else if (secsize != ssize) {
286			gctl_error(req, "Devices need to have same sector size "
287			    "(%u on %s needs to be %u).",
288			    (u_int)ssize, name, (u_int)secsize);
289			return;
290		}
291	}
292
293	if (secsize == 0) {
294		gctl_error(req, "Device not specified");
295		return;
296	}
297
298	if (md.md_chunk_size % secsize != 0) {
299		fprintf(stderr, "Error: chunk size is not a multiple of sector "
300		    "size.");
301		gctl_error(req, "Chunk size (in bytes) must be multiple of %u.",
302		    (unsigned int)secsize);
303		return;
304	}
305
306	total_chunks = md.md_virsize / md.md_chunk_size;
307	map_size = total_chunks * sizeof(*map);
308	assert(md.md_virsize % md.md_chunk_size == 0);
309
310	ssize = map_size % secsize;
311	if (ssize != 0) {
312		size_t add_chunks = (secsize - ssize) / sizeof(*map);
313		total_chunks += add_chunks;
314		md.md_virsize = (off_t)total_chunks * (off_t)md.md_chunk_size;
315		map_size = total_chunks * sizeof(*map);
316		fprintf(stderr, "Resizing virtual size to fit virstor "
317		    "structures.\n");
318		fprintf(stderr, "New virtual size: %ju MB (%zu new chunks)\n",
319		    (uintmax_t)(md.md_virsize / (1024 * 1024)), add_chunks);
320	}
321
322	if (verbose)
323		printf("Total virtual chunks: %zu (%zu MB each), %ju MB total "
324		    "virtual size.\n",
325		    total_chunks, (size_t)(md.md_chunk_size / (1024 * 1024)),
326		    md.md_virsize/(1024 * 1024));
327
328	if ((off_t)md.md_virsize < msize)
329		fprintf(stderr, "WARNING: Virtual storage size < Physical "
330		    "available storage (%ju < %ju)\n", md.md_virsize, msize);
331
332	/* Clear last sector first to spoil all components if device exists. */
333	if (verbose)
334		printf("Clearing metadata on");
335
336	for (i = 1; i < (unsigned)nargs; i++) {
337		snprintf(param, sizeof(param), "arg%u", i);
338		name = gctl_get_ascii(req, param);
339
340		if (verbose)
341			printf(" %s", name);
342
343		msize = g_get_mediasize(name);
344		ssize = g_get_sectorsize(name);
345		if (msize == 0 || ssize == 0) {
346			gctl_error(req, "Can't retrieve information about "
347			    "%s: %s.", name, strerror(errno));
348			return;
349		}
350		if (msize < (off_t) MAX(md.md_chunk_size*4, map_size))
351			gctl_error(req, "Device %s is too small", name);
352		error = g_metadata_clear(name, NULL);
353		if (error != 0) {
354			gctl_error(req, "Can't clear metadata on %s: %s.", name,
355			    strerror(error));
356			return;
357		}
358	}
359
360
361	/* Write allocation table to the first provider - this needs to be done
362	 * before metadata is written because when kernel tastes it it's too
363	 * late */
364	name = gctl_get_ascii(req, "arg1"); /* device with metadata */
365	if (verbose)
366		printf(".\nWriting allocation table to %s...", name);
367
368	/* How many chunks does the map occupy? */
369	map_chunks = map_size/md.md_chunk_size;
370	if (map_size % md.md_chunk_size != 0)
371		map_chunks++;
372	if (verbose) {
373		printf(" (%zu MB, %d chunks) ", map_size/(1024*1024), map_chunks);
374		fflush(stdout);
375	}
376
377	if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0)
378		fd = open(name, O_RDWR);
379	else {
380		sprintf(param, "%s%s", _PATH_DEV, name);
381		fd = open(param, O_RDWR);
382	}
383	if (fd < 0)
384		gctl_error(req, "Cannot open provider %s to write map", name);
385
386	/* Do it with calloc because there might be a need to set up chunk flags
387	 * in the future */
388	map = calloc(total_chunks, sizeof(*map));
389	if (map == NULL) {
390		gctl_error(req,
391		    "Out of memory (need %zu bytes for allocation map)",
392		    map_size);
393	}
394
395	written = pwrite(fd, map, map_size, 0);
396	free(map);
397	if ((size_t)written != map_size) {
398		if (verbose) {
399			fprintf(stderr, "\nTried to write %zu, written %zd (%s)\n",
400			    map_size, written, strerror(errno));
401		}
402		gctl_error(req, "Error writing out allocation map!");
403		return;
404	}
405	close (fd);
406
407	if (verbose)
408		printf("\nStoring metadata on ");
409
410	/*
411	 * ID is randomly generated, unique for a geom. This is used to
412	 * recognize all providers belonging to one geom.
413	 */
414	md.md_id = arc4random();
415
416	/* Ok, store metadata. */
417	for (i = 1; i < (unsigned)nargs; i++) {
418		snprintf(param, sizeof(param), "arg%u", i);
419		name = gctl_get_ascii(req, param);
420
421		msize = g_get_mediasize(name);
422		ssize = g_get_sectorsize(name);
423
424		if (verbose)
425			printf("%s ", name);
426
427		/* this provider's position/type in geom */
428		md.no = i - 1;
429		/* this provider's size */
430		md.provsize = msize;
431		/* chunk allocation info */
432		md.chunk_count = md.provsize / md.md_chunk_size;
433		if (verbose)
434			printf("(%u chunks) ", md.chunk_count);
435		/* Check to make sure last sector is unused */
436		if ((off_t)(md.chunk_count * md.md_chunk_size) > msize-ssize)
437		    md.chunk_count--;
438		md.chunk_next = 0;
439		if (i != 1) {
440			md.chunk_reserved = 0;
441			md.flags = 0;
442		} else {
443			md.chunk_reserved = map_chunks * 2;
444			md.flags = VIRSTOR_PROVIDER_ALLOCATED |
445			    VIRSTOR_PROVIDER_CURRENT;
446			md.chunk_next = md.chunk_reserved;
447			if (verbose)
448				printf("(%u reserved) ", md.chunk_reserved);
449		}
450
451		if (!hardcode)
452			bzero(md.provider, sizeof(md.provider));
453		else {
454			/* convert "/dev/something" to "something" */
455			if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0) {
456				strlcpy(md.provider, name + strlen(_PATH_DEV),
457				    sizeof(md.provider));
458			} else
459				strlcpy(md.provider, name, sizeof(md.provider));
460		}
461		sect = malloc(ssize);
462		if (sect == NULL)
463			err(1, "Cannot allocate sector of %zu bytes", ssize);
464		bzero(sect, ssize);
465		virstor_metadata_encode(&md, sect);
466		error = my_g_metadata_store(name, sect, ssize);
467		free(sect);
468		if (error != 0) {
469			if (verbose)
470				printf("\n");
471			fprintf(stderr, "Can't store metadata on %s: %s.\n",
472			    name, strerror(error));
473			gctl_error(req,
474			    "Not fully done (error storing metadata).");
475			return;
476		}
477	}
478#if 0
479	if (verbose)
480		printf("\n");
481#endif
482}
483
484/* Clears metadata on given provider(s) IF it's owned by us */
485static void
486virstor_clear(struct gctl_req *req)
487{
488	const char *name;
489	char param[32];
490	unsigned i;
491	int nargs, error;
492	int fd;
493
494	nargs = gctl_get_int(req, "nargs");
495	if (nargs < 1) {
496		gctl_error(req, "Too few arguments.");
497		return;
498	}
499	for (i = 0; i < (unsigned)nargs; i++) {
500		snprintf(param, sizeof(param), "arg%u", i);
501		name = gctl_get_ascii(req, param);
502
503		error = g_metadata_clear(name, G_VIRSTOR_MAGIC);
504		if (error != 0) {
505			fprintf(stderr, "Can't clear metadata on %s: %s "
506			    "(do I own it?)\n", name, strerror(error));
507			gctl_error(req,
508			    "Not fully done (can't clear metadata).");
509			continue;
510		}
511		if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0)
512			fd = open(name, O_RDWR);
513		else {
514			sprintf(param, "%s%s", _PATH_DEV, name);
515			fd = open(param, O_RDWR);
516		}
517		if (fd < 0) {
518			gctl_error(req, "Cannot clear header sector for %s",
519			    name);
520			continue;
521		}
522		if (verbose)
523			printf("Metadata cleared on %s.\n", name);
524	}
525}
526
527/* Print some metadata information */
528static void
529virstor_metadata_dump(const struct g_virstor_metadata *md)
530{
531	printf("          Magic string: %s\n", md->md_magic);
532	printf("      Metadata version: %u\n", (u_int) md->md_version);
533	printf("           Device name: %s\n", md->md_name);
534	printf("             Device ID: %u\n", (u_int) md->md_id);
535	printf("        Provider index: %u\n", (u_int) md->no);
536	printf("      Active providers: %u\n", (u_int) md->md_count);
537	printf("    Hardcoded provider: %s\n",
538	    md->provider[0] != '\0' ? md->provider : "(not hardcoded)");
539	printf("          Virtual size: %u MB\n",
540	    (unsigned int)(md->md_virsize/(1024 * 1024)));
541	printf("            Chunk size: %u kB\n", md->md_chunk_size / 1024);
542	printf("    Chunks on provider: %u\n", md->chunk_count);
543	printf("           Chunks free: %u\n", md->chunk_count - md->chunk_next);
544	printf("       Reserved chunks: %u\n", md->chunk_reserved);
545}
546
547/* Called by geom(8) via gvirstor_main() to dump metadata information */
548static void
549virstor_dump(struct gctl_req *req)
550{
551	struct g_virstor_metadata md;
552	u_char tmpmd[512];	/* temporary buffer */
553	const char *name;
554	char param[16];
555	int nargs, error, i;
556
557	assert(sizeof(tmpmd) >= sizeof(md));
558
559	nargs = gctl_get_int(req, "nargs");
560	if (nargs < 1) {
561		gctl_error(req, "Too few arguments.");
562		return;
563	}
564	for (i = 0; i < nargs; i++) {
565		snprintf(param, sizeof(param), "arg%u", i);
566		name = gctl_get_ascii(req, param);
567
568		error = g_metadata_read(name, (u_char *) & tmpmd, sizeof(tmpmd),
569		    G_VIRSTOR_MAGIC);
570		if (error != 0) {
571			fprintf(stderr, "Can't read metadata from %s: %s.\n",
572			    name, strerror(error));
573			gctl_error(req,
574			    "Not fully done (error reading metadata).");
575			continue;
576		}
577		virstor_metadata_decode((u_char *) & tmpmd, &md);
578		printf("Metadata on %s:\n", name);
579		virstor_metadata_dump(&md);
580		printf("\n");
581	}
582}
583