zfs.c revision 241530
1/*-
2 * Copyright (c) 2007 Doug Rabson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 *	$FreeBSD: stable/9/sys/boot/zfs/zfs.c 241530 2012-10-14 07:45:40Z avg $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/9/sys/boot/zfs/zfs.c 241530 2012-10-14 07:45:40Z avg $");
31
32/*
33 *	Stand-alone file reading package.
34 */
35
36#include <sys/param.h>
37#include <sys/disklabel.h>
38#include <sys/time.h>
39#include <sys/queue.h>
40#include <stddef.h>
41#include <stdarg.h>
42#include <string.h>
43#include <stand.h>
44#include <bootstrap.h>
45
46#include "libzfs.h"
47
48#include "zfsimpl.c"
49
50static int	zfs_open(const char *path, struct open_file *f);
51static int	zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid);
52static int	zfs_close(struct open_file *f);
53static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
54static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
55static int	zfs_stat(struct open_file *f, struct stat *sb);
56static int	zfs_readdir(struct open_file *f, struct dirent *d);
57
58struct devsw zfs_dev;
59
60struct fs_ops zfs_fsops = {
61	"zfs",
62	zfs_open,
63	zfs_close,
64	zfs_read,
65	zfs_write,
66	zfs_seek,
67	zfs_stat,
68	zfs_readdir
69};
70
71/*
72 * In-core open file.
73 */
74struct file {
75	off_t		f_seekp;	/* seek pointer */
76	dnode_phys_t	f_dnode;
77	uint64_t	f_zap_type;	/* zap type for readdir */
78	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
79	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
80};
81
82/*
83 * Open a file.
84 */
85static int
86zfs_open(const char *upath, struct open_file *f)
87{
88	struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
89	struct file *fp;
90	int rc;
91
92	if (f->f_dev != &zfs_dev)
93		return (EINVAL);
94
95	/* allocate file system specific data structure */
96	fp = malloc(sizeof(struct file));
97	bzero(fp, sizeof(struct file));
98	f->f_fsdata = (void *)fp;
99
100	rc = zfs_lookup(mount, upath, &fp->f_dnode);
101	fp->f_seekp = 0;
102	if (rc) {
103		f->f_fsdata = NULL;
104		free(fp);
105	}
106	return (rc);
107}
108
109static int
110zfs_close(struct open_file *f)
111{
112	struct file *fp = (struct file *)f->f_fsdata;
113
114	dnode_cache_obj = 0;
115	f->f_fsdata = (void *)0;
116	if (fp == (struct file *)0)
117		return (0);
118
119	free(fp);
120	return (0);
121}
122
123/*
124 * Copy a portion of a file into kernel memory.
125 * Cross block boundaries when necessary.
126 */
127static int
128zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
129{
130	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
131	struct file *fp = (struct file *)f->f_fsdata;
132	struct stat sb;
133	size_t n;
134	int rc;
135
136	rc = zfs_stat(f, &sb);
137	if (rc)
138		return (rc);
139	n = size;
140	if (fp->f_seekp + n > sb.st_size)
141		n = sb.st_size - fp->f_seekp;
142
143	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
144	if (rc)
145		return (rc);
146
147	if (0) {
148	    int i;
149	    for (i = 0; i < n; i++)
150		putchar(((char*) start)[i]);
151	}
152	fp->f_seekp += n;
153	if (resid)
154		*resid = size - n;
155
156	return (0);
157}
158
159/*
160 * Don't be silly - the bootstrap has no business writing anything.
161 */
162static int
163zfs_write(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
164{
165
166	return (EROFS);
167}
168
169static off_t
170zfs_seek(struct open_file *f, off_t offset, int where)
171{
172	struct file *fp = (struct file *)f->f_fsdata;
173
174	switch (where) {
175	case SEEK_SET:
176		fp->f_seekp = offset;
177		break;
178	case SEEK_CUR:
179		fp->f_seekp += offset;
180		break;
181	case SEEK_END:
182	    {
183		struct stat sb;
184		int error;
185
186		error = zfs_stat(f, &sb);
187		if (error != 0) {
188			errno = error;
189			return (-1);
190		}
191		fp->f_seekp = sb.st_size - offset;
192		break;
193	    }
194	default:
195		errno = EINVAL;
196		return (-1);
197	}
198	return (fp->f_seekp);
199}
200
201static int
202zfs_stat(struct open_file *f, struct stat *sb)
203{
204	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
205	struct file *fp = (struct file *)f->f_fsdata;
206
207	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
208}
209
210static int
211zfs_readdir(struct open_file *f, struct dirent *d)
212{
213	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
214	struct file *fp = (struct file *)f->f_fsdata;
215	mzap_ent_phys_t mze;
216	struct stat sb;
217	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
218	int rc;
219
220	rc = zfs_stat(f, &sb);
221	if (rc)
222		return (rc);
223	if (!S_ISDIR(sb.st_mode))
224		return (ENOTDIR);
225
226	/*
227	 * If this is the first read, get the zap type.
228	 */
229	if (fp->f_seekp == 0) {
230		rc = dnode_read(spa, &fp->f_dnode,
231				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
232		if (rc)
233			return (rc);
234
235		if (fp->f_zap_type == ZBT_MICRO) {
236			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
237		} else {
238			rc = dnode_read(spa, &fp->f_dnode,
239					offsetof(zap_phys_t, zap_num_leafs),
240					&fp->f_num_leafs,
241					sizeof(fp->f_num_leafs));
242			if (rc)
243				return (rc);
244
245			fp->f_seekp = bsize;
246			fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
247			rc = dnode_read(spa, &fp->f_dnode,
248					fp->f_seekp,
249					fp->f_zap_leaf,
250					bsize);
251			if (rc)
252				return (rc);
253		}
254	}
255
256	if (fp->f_zap_type == ZBT_MICRO) {
257	mzap_next:
258		if (fp->f_seekp >= bsize)
259			return (ENOENT);
260
261		rc = dnode_read(spa, &fp->f_dnode,
262				fp->f_seekp, &mze, sizeof(mze));
263		if (rc)
264			return (rc);
265		fp->f_seekp += sizeof(mze);
266
267		if (!mze.mze_name[0])
268			goto mzap_next;
269
270		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
271		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
272		strcpy(d->d_name, mze.mze_name);
273		d->d_namlen = strlen(d->d_name);
274		return (0);
275	} else {
276		zap_leaf_t zl;
277		zap_leaf_chunk_t *zc, *nc;
278		int chunk;
279		size_t namelen;
280		char *p;
281		uint64_t value;
282
283		/*
284		 * Initialise this so we can use the ZAP size
285		 * calculating macros.
286		 */
287		zl.l_bs = ilog2(bsize);
288		zl.l_phys = fp->f_zap_leaf;
289
290		/*
291		 * Figure out which chunk we are currently looking at
292		 * and consider seeking to the next leaf. We use the
293		 * low bits of f_seekp as a simple chunk index.
294		 */
295	fzap_next:
296		chunk = fp->f_seekp & (bsize - 1);
297		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
298			fp->f_seekp = (fp->f_seekp & ~(bsize - 1)) + bsize;
299			chunk = 0;
300
301			/*
302			 * Check for EOF and read the new leaf.
303			 */
304			if (fp->f_seekp >= bsize * fp->f_num_leafs)
305				return (ENOENT);
306
307			rc = dnode_read(spa, &fp->f_dnode,
308					fp->f_seekp,
309					fp->f_zap_leaf,
310					bsize);
311			if (rc)
312				return (rc);
313		}
314
315		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
316		fp->f_seekp++;
317		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
318			goto fzap_next;
319
320		namelen = zc->l_entry.le_name_numints;
321		if (namelen > sizeof(d->d_name))
322			namelen = sizeof(d->d_name);
323
324		/*
325		 * Paste the name back together.
326		 */
327		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
328		p = d->d_name;
329		while (namelen > 0) {
330			int len;
331			len = namelen;
332			if (len > ZAP_LEAF_ARRAY_BYTES)
333				len = ZAP_LEAF_ARRAY_BYTES;
334			memcpy(p, nc->l_array.la_array, len);
335			p += len;
336			namelen -= len;
337			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
338		}
339		d->d_name[sizeof(d->d_name) - 1] = 0;
340
341		/*
342		 * Assume the first eight bytes of the value are
343		 * a uint64_t.
344		 */
345		value = fzap_leaf_value(&zl, zc);
346
347		d->d_fileno = ZFS_DIRENT_OBJ(value);
348		d->d_type = ZFS_DIRENT_TYPE(value);
349		d->d_namlen = strlen(d->d_name);
350
351		return (0);
352	}
353}
354
355static int
356vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size)
357{
358	int fd;
359
360	fd = (uintptr_t) priv;
361	lseek(fd, offset, SEEK_SET);
362	if (read(fd, buf, size) == size) {
363		return 0;
364	} else {
365		return (EIO);
366	}
367}
368
369static int
370zfs_dev_init(void)
371{
372	zfs_init();
373	if (archsw.arch_zfs_probe == NULL)
374		return (ENXIO);
375	archsw.arch_zfs_probe();
376	return (0);
377}
378
379int
380zfs_probe_dev(const char *devname, uint64_t *pool_guid)
381{
382	spa_t *spa;
383	int fd;
384	int ret;
385
386	fd = open(devname, O_RDONLY);
387	if (fd == -1)
388		return (ENXIO);
389	ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa);
390	if (ret != 0)
391		close(fd);
392	else if (pool_guid != NULL)
393		*pool_guid = spa->spa_guid;
394	return (0);
395}
396
397/*
398 * Print information about ZFS pools
399 */
400static void
401zfs_dev_print(int verbose)
402{
403	spa_t *spa;
404	char line[80];
405
406	if (verbose) {
407		spa_all_status();
408		return;
409	}
410	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
411		sprintf(line, "    zfs:%s\n", spa->spa_name);
412		pager_output(line);
413	}
414}
415
416/*
417 * Attempt to open the pool described by (dev) for use by (f).
418 */
419static int
420zfs_dev_open(struct open_file *f, ...)
421{
422	va_list		args;
423	struct zfs_devdesc	*dev;
424	struct zfsmount	*mount;
425	spa_t		*spa;
426	int		rv;
427
428	va_start(args, f);
429	dev = va_arg(args, struct zfs_devdesc *);
430	va_end(args);
431
432	spa = spa_find_by_guid(dev->pool_guid);
433	if (!spa)
434		return (ENXIO);
435	rv = zfs_spa_init(spa);
436	if (rv != 0)
437		return (rv);
438	mount = malloc(sizeof(*mount));
439	rv = zfs_mount(spa, dev->root_guid, mount);
440	if (rv != 0) {
441		free(mount);
442		return (rv);
443	}
444	if (mount->objset.os_type != DMU_OST_ZFS) {
445		printf("Unexpected object set type %ju\n",
446		    (uintmax_t)mount->objset.os_type);
447		free(mount);
448		return (EIO);
449	}
450	f->f_devdata = mount;
451	free(dev);
452	return (0);
453}
454
455static int
456zfs_dev_close(struct open_file *f)
457{
458
459	free(f->f_devdata);
460	f->f_devdata = NULL;
461	return (0);
462}
463
464static int
465zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
466{
467
468	return (ENOSYS);
469}
470
471struct devsw zfs_dev = {
472	.dv_name = "zfs",
473	.dv_type = DEVT_ZFS,
474	.dv_init = zfs_dev_init,
475	.dv_strategy = zfs_dev_strategy,
476	.dv_open = zfs_dev_open,
477	.dv_close = zfs_dev_close,
478	.dv_ioctl = noioctl,
479	.dv_print = zfs_dev_print,
480	.dv_cleanup = NULL
481};
482
483int
484zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
485{
486	static char	rootname[ZFS_MAXNAMELEN];
487	static char	poolname[ZFS_MAXNAMELEN];
488	spa_t		*spa;
489	const char	*end;
490	const char	*np;
491	const char	*sep;
492	int		rv;
493
494	np = devspec;
495	if (*np != ':')
496		return (EINVAL);
497	np++;
498	end = strchr(np, ':');
499	if (end == NULL)
500		return (EINVAL);
501	sep = strchr(np, '/');
502	if (sep == NULL || sep >= end)
503		sep = end;
504	memcpy(poolname, np, sep - np);
505	poolname[sep - np] = '\0';
506	if (sep < end) {
507		sep++;
508		memcpy(rootname, sep, end - sep);
509		rootname[end - sep] = '\0';
510	}
511	else
512		rootname[0] = '\0';
513
514	spa = spa_find_by_name(poolname);
515	if (!spa)
516		return (ENXIO);
517	rv = zfs_spa_init(spa);
518	if (rv != 0)
519		return (rv);
520	dev->pool_guid = spa->spa_guid;
521	if (rootname[0] != '\0') {
522		rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
523		if (rv != 0)
524			return (rv);
525	} else
526		dev->root_guid = 0;
527	if (path != NULL)
528		*path = (*end == '\0') ? end : end + 1;
529	dev->d_dev = &zfs_dev;
530	dev->d_type = zfs_dev.dv_type;
531	return (0);
532}
533
534char *
535zfs_fmtdev(void *vdev)
536{
537	static char		rootname[ZFS_MAXNAMELEN];
538	static char		buf[2 * ZFS_MAXNAMELEN + 8];
539	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
540	spa_t			*spa;
541
542	buf[0] = '\0';
543	if (dev->d_type != DEVT_ZFS)
544		return (buf);
545
546	spa = spa_find_by_guid(dev->pool_guid);
547	if (spa == NULL) {
548		printf("ZFS: can't find pool by guid\n");
549		return (buf);
550	}
551	if (zfs_spa_init(spa) != 0) {
552		printf("ZFS: can't init pool\n");
553		return (buf);
554	}
555	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
556		printf("ZFS: can't find root filesystem\n");
557		return (buf);
558	}
559	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
560		printf("ZFS: can't find filesystem by guid\n");
561		return (buf);
562	}
563
564	if (rootname[0] == '\0')
565		sprintf(buf, "%s:%s:", dev->d_dev->dv_name, spa->spa_name);
566	else
567		sprintf(buf, "%s:%s/%s:", dev->d_dev->dv_name, spa->spa_name,
568		    rootname);
569	return (buf);
570}
571