zfs.c revision 239292
1/*-
2 * Copyright (c) 2007 Doug Rabson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 *	$FreeBSD: head/sys/boot/zfs/zfs.c 239292 2012-08-15 09:18:49Z ae $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/boot/zfs/zfs.c 239292 2012-08-15 09:18:49Z ae $");
31
32/*
33 *	Stand-alone file reading package.
34 */
35
36#include <sys/disk.h>
37#include <sys/param.h>
38#include <sys/time.h>
39#include <sys/queue.h>
40#include <part.h>
41#include <stddef.h>
42#include <stdarg.h>
43#include <string.h>
44#include <stand.h>
45#include <bootstrap.h>
46
47#include "libzfs.h"
48
49#include "zfsimpl.c"
50
51static int	zfs_open(const char *path, struct open_file *f);
52static int	zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid);
53static int	zfs_close(struct open_file *f);
54static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
55static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
56static int	zfs_stat(struct open_file *f, struct stat *sb);
57static int	zfs_readdir(struct open_file *f, struct dirent *d);
58
59struct devsw zfs_dev;
60
61struct fs_ops zfs_fsops = {
62	"zfs",
63	zfs_open,
64	zfs_close,
65	zfs_read,
66	zfs_write,
67	zfs_seek,
68	zfs_stat,
69	zfs_readdir
70};
71
72/*
73 * In-core open file.
74 */
75struct file {
76	off_t		f_seekp;	/* seek pointer */
77	dnode_phys_t	f_dnode;
78	uint64_t	f_zap_type;	/* zap type for readdir */
79	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
80	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
81};
82
83/*
84 * Open a file.
85 */
86static int
87zfs_open(const char *upath, struct open_file *f)
88{
89	struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
90	struct file *fp;
91	int rc;
92
93	if (f->f_dev != &zfs_dev)
94		return (EINVAL);
95
96	/* allocate file system specific data structure */
97	fp = malloc(sizeof(struct file));
98	bzero(fp, sizeof(struct file));
99	f->f_fsdata = (void *)fp;
100
101	rc = zfs_lookup(mount, upath, &fp->f_dnode);
102	fp->f_seekp = 0;
103	if (rc) {
104		f->f_fsdata = NULL;
105		free(fp);
106	}
107	return (rc);
108}
109
110static int
111zfs_close(struct open_file *f)
112{
113	struct file *fp = (struct file *)f->f_fsdata;
114
115	dnode_cache_obj = 0;
116	f->f_fsdata = (void *)0;
117	if (fp == (struct file *)0)
118		return (0);
119
120	free(fp);
121	return (0);
122}
123
124/*
125 * Copy a portion of a file into kernel memory.
126 * Cross block boundaries when necessary.
127 */
128static int
129zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
130{
131	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
132	struct file *fp = (struct file *)f->f_fsdata;
133	struct stat sb;
134	size_t n;
135	int rc;
136
137	rc = zfs_stat(f, &sb);
138	if (rc)
139		return (rc);
140	n = size;
141	if (fp->f_seekp + n > sb.st_size)
142		n = sb.st_size - fp->f_seekp;
143
144	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
145	if (rc)
146		return (rc);
147
148	if (0) {
149	    int i;
150	    for (i = 0; i < n; i++)
151		putchar(((char*) start)[i]);
152	}
153	fp->f_seekp += n;
154	if (resid)
155		*resid = size - n;
156
157	return (0);
158}
159
160/*
161 * Don't be silly - the bootstrap has no business writing anything.
162 */
163static int
164zfs_write(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
165{
166
167	return (EROFS);
168}
169
170static off_t
171zfs_seek(struct open_file *f, off_t offset, int where)
172{
173	struct file *fp = (struct file *)f->f_fsdata;
174
175	switch (where) {
176	case SEEK_SET:
177		fp->f_seekp = offset;
178		break;
179	case SEEK_CUR:
180		fp->f_seekp += offset;
181		break;
182	case SEEK_END:
183	    {
184		struct stat sb;
185		int error;
186
187		error = zfs_stat(f, &sb);
188		if (error != 0) {
189			errno = error;
190			return (-1);
191		}
192		fp->f_seekp = sb.st_size - offset;
193		break;
194	    }
195	default:
196		errno = EINVAL;
197		return (-1);
198	}
199	return (fp->f_seekp);
200}
201
202static int
203zfs_stat(struct open_file *f, struct stat *sb)
204{
205	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
206	struct file *fp = (struct file *)f->f_fsdata;
207
208	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
209}
210
211static int
212zfs_readdir(struct open_file *f, struct dirent *d)
213{
214	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
215	struct file *fp = (struct file *)f->f_fsdata;
216	mzap_ent_phys_t mze;
217	struct stat sb;
218	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
219	int rc;
220
221	rc = zfs_stat(f, &sb);
222	if (rc)
223		return (rc);
224	if (!S_ISDIR(sb.st_mode))
225		return (ENOTDIR);
226
227	/*
228	 * If this is the first read, get the zap type.
229	 */
230	if (fp->f_seekp == 0) {
231		rc = dnode_read(spa, &fp->f_dnode,
232				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
233		if (rc)
234			return (rc);
235
236		if (fp->f_zap_type == ZBT_MICRO) {
237			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
238		} else {
239			rc = dnode_read(spa, &fp->f_dnode,
240					offsetof(zap_phys_t, zap_num_leafs),
241					&fp->f_num_leafs,
242					sizeof(fp->f_num_leafs));
243			if (rc)
244				return (rc);
245
246			fp->f_seekp = bsize;
247			fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
248			rc = dnode_read(spa, &fp->f_dnode,
249					fp->f_seekp,
250					fp->f_zap_leaf,
251					bsize);
252			if (rc)
253				return (rc);
254		}
255	}
256
257	if (fp->f_zap_type == ZBT_MICRO) {
258	mzap_next:
259		if (fp->f_seekp >= bsize)
260			return (ENOENT);
261
262		rc = dnode_read(spa, &fp->f_dnode,
263				fp->f_seekp, &mze, sizeof(mze));
264		if (rc)
265			return (rc);
266		fp->f_seekp += sizeof(mze);
267
268		if (!mze.mze_name[0])
269			goto mzap_next;
270
271		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
272		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
273		strcpy(d->d_name, mze.mze_name);
274		d->d_namlen = strlen(d->d_name);
275		return (0);
276	} else {
277		zap_leaf_t zl;
278		zap_leaf_chunk_t *zc, *nc;
279		int chunk;
280		size_t namelen;
281		char *p;
282		uint64_t value;
283
284		/*
285		 * Initialise this so we can use the ZAP size
286		 * calculating macros.
287		 */
288		zl.l_bs = ilog2(bsize);
289		zl.l_phys = fp->f_zap_leaf;
290
291		/*
292		 * Figure out which chunk we are currently looking at
293		 * and consider seeking to the next leaf. We use the
294		 * low bits of f_seekp as a simple chunk index.
295		 */
296	fzap_next:
297		chunk = fp->f_seekp & (bsize - 1);
298		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
299			fp->f_seekp = (fp->f_seekp & ~(bsize - 1)) + bsize;
300			chunk = 0;
301
302			/*
303			 * Check for EOF and read the new leaf.
304			 */
305			if (fp->f_seekp >= bsize * fp->f_num_leafs)
306				return (ENOENT);
307
308			rc = dnode_read(spa, &fp->f_dnode,
309					fp->f_seekp,
310					fp->f_zap_leaf,
311					bsize);
312			if (rc)
313				return (rc);
314		}
315
316		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
317		fp->f_seekp++;
318		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
319			goto fzap_next;
320
321		namelen = zc->l_entry.le_name_length;
322		if (namelen > sizeof(d->d_name))
323			namelen = sizeof(d->d_name);
324
325		/*
326		 * Paste the name back together.
327		 */
328		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
329		p = d->d_name;
330		while (namelen > 0) {
331			int len;
332			len = namelen;
333			if (len > ZAP_LEAF_ARRAY_BYTES)
334				len = ZAP_LEAF_ARRAY_BYTES;
335			memcpy(p, nc->l_array.la_array, len);
336			p += len;
337			namelen -= len;
338			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
339		}
340		d->d_name[sizeof(d->d_name) - 1] = 0;
341
342		/*
343		 * Assume the first eight bytes of the value are
344		 * a uint64_t.
345		 */
346		value = fzap_leaf_value(&zl, zc);
347
348		d->d_fileno = ZFS_DIRENT_OBJ(value);
349		d->d_type = ZFS_DIRENT_TYPE(value);
350		d->d_namlen = strlen(d->d_name);
351
352		return (0);
353	}
354}
355
356static int
357vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size)
358{
359	int fd;
360
361	fd = (uintptr_t) priv;
362	lseek(fd, offset, SEEK_SET);
363	if (read(fd, buf, size) == size) {
364		return 0;
365	} else {
366		return (EIO);
367	}
368}
369
370static int
371zfs_dev_init(void)
372{
373	zfs_init();
374	if (archsw.arch_zfs_probe == NULL)
375		return (ENXIO);
376	archsw.arch_zfs_probe();
377	return (0);
378}
379
380struct zfs_probe_args {
381	int		fd;
382	const char	*devname;
383	uint64_t	*pool_guid;
384	uint16_t	secsz;
385};
386
387static int
388zfs_diskread(void *arg, void *buf, size_t blocks, off_t offset)
389{
390	struct zfs_probe_args *ppa;
391
392	ppa = (struct zfs_probe_args *)arg;
393	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
394	    offset * ppa->secsz, buf, blocks * ppa->secsz));
395}
396
397static int
398zfs_probe(int fd, uint64_t *pool_guid)
399{
400	spa_t *spa;
401	int ret;
402
403	ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa);
404	if (ret == 0 && pool_guid != NULL)
405		*pool_guid = spa->spa_guid;
406	return (ret);
407}
408
409static void
410zfs_probe_partition(void *arg, const char *partname,
411    const struct ptable_entry *part)
412{
413	struct zfs_probe_args *ppa, pa;
414	struct ptable *table;
415	char devname[32];
416	int ret;
417
418	/* Probe only freebsd-zfs and freebsd partitions */
419	if (part->type != PART_FREEBSD &&
420	    part->type != PART_FREEBSD_ZFS)
421		return;
422
423	ppa = (struct zfs_probe_args *)arg;
424	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
425	devname[strlen(ppa->devname) - 1] = '\0';
426	sprintf(devname, "%s%s:", devname, partname);
427	pa.fd = open(devname, O_RDONLY);
428	if (pa.fd == -1)
429		return;
430	ret = zfs_probe(pa.fd, ppa->pool_guid);
431	if (ret == 0)
432		return;
433	/* Do we have BSD label here? */
434	if (part->type == PART_FREEBSD) {
435		pa.devname = devname;
436		pa.pool_guid = ppa->pool_guid;
437		pa.secsz = ppa->secsz;
438		table = ptable_open(&pa, part->end - part->start + 1,
439		    ppa->secsz, zfs_diskread);
440		if (table != NULL) {
441			ptable_iterate(table, &pa, zfs_probe_partition);
442			ptable_close(table);
443		}
444	}
445	close(pa.fd);
446}
447
448int
449zfs_probe_dev(const char *devname, uint64_t *pool_guid)
450{
451	struct ptable *table;
452	struct zfs_probe_args pa;
453	off_t mediasz;
454	int ret;
455
456	pa.fd = open(devname, O_RDONLY);
457	if (pa.fd == -1)
458		return (ENXIO);
459	/* Probe the whole disk */
460	ret = zfs_probe(pa.fd, pool_guid);
461	if (ret == 0)
462		return (0);
463	/* Probe each partition */
464	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
465	if (ret == 0)
466		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
467	if (ret == 0) {
468		pa.devname = devname;
469		pa.pool_guid = pool_guid;
470		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
471		    zfs_diskread);
472		if (table != NULL) {
473			ptable_iterate(table, &pa, zfs_probe_partition);
474			ptable_close(table);
475		}
476	}
477	close(pa.fd);
478	return (0);
479}
480
481/*
482 * Print information about ZFS pools
483 */
484static void
485zfs_dev_print(int verbose)
486{
487	spa_t *spa;
488	char line[80];
489
490	if (verbose) {
491		spa_all_status();
492		return;
493	}
494	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
495		sprintf(line, "    zfs:%s\n", spa->spa_name);
496		pager_output(line);
497	}
498}
499
500/*
501 * Attempt to open the pool described by (dev) for use by (f).
502 */
503static int
504zfs_dev_open(struct open_file *f, ...)
505{
506	va_list		args;
507	struct zfs_devdesc	*dev;
508	struct zfsmount	*mount;
509	spa_t		*spa;
510	int		rv;
511
512	va_start(args, f);
513	dev = va_arg(args, struct zfs_devdesc *);
514	va_end(args);
515
516	spa = spa_find_by_guid(dev->pool_guid);
517	if (!spa)
518		return (ENXIO);
519	rv = zfs_spa_init(spa);
520	if (rv != 0)
521		return (rv);
522	mount = malloc(sizeof(*mount));
523	rv = zfs_mount(spa, dev->root_guid, mount);
524	if (rv != 0) {
525		free(mount);
526		return (rv);
527	}
528	if (mount->objset.os_type != DMU_OST_ZFS) {
529		printf("Unexpected object set type %ju\n",
530		    (uintmax_t)mount->objset.os_type);
531		free(mount);
532		return (EIO);
533	}
534	f->f_devdata = mount;
535	free(dev);
536	return (0);
537}
538
539static int
540zfs_dev_close(struct open_file *f)
541{
542
543	free(f->f_devdata);
544	f->f_devdata = NULL;
545	return (0);
546}
547
548static int
549zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
550{
551
552	return (ENOSYS);
553}
554
555struct devsw zfs_dev = {
556	.dv_name = "zfs",
557	.dv_type = DEVT_ZFS,
558	.dv_init = zfs_dev_init,
559	.dv_strategy = zfs_dev_strategy,
560	.dv_open = zfs_dev_open,
561	.dv_close = zfs_dev_close,
562	.dv_ioctl = noioctl,
563	.dv_print = zfs_dev_print,
564	.dv_cleanup = NULL
565};
566
567int
568zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
569{
570	static char	rootname[ZFS_MAXNAMELEN];
571	static char	poolname[ZFS_MAXNAMELEN];
572	spa_t		*spa;
573	const char	*end;
574	const char	*np;
575	const char	*sep;
576	int		rv;
577
578	np = devspec;
579	if (*np != ':')
580		return (EINVAL);
581	np++;
582	end = strchr(np, ':');
583	if (end == NULL)
584		return (EINVAL);
585	sep = strchr(np, '/');
586	if (sep == NULL || sep >= end)
587		sep = end;
588	memcpy(poolname, np, sep - np);
589	poolname[sep - np] = '\0';
590	if (sep < end) {
591		sep++;
592		memcpy(rootname, sep, end - sep);
593		rootname[end - sep] = '\0';
594	}
595	else
596		rootname[0] = '\0';
597
598	spa = spa_find_by_name(poolname);
599	if (!spa)
600		return (ENXIO);
601	rv = zfs_spa_init(spa);
602	if (rv != 0)
603		return (rv);
604	dev->pool_guid = spa->spa_guid;
605	if (rootname[0] != '\0') {
606		rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
607		if (rv != 0)
608			return (rv);
609	} else
610		dev->root_guid = 0;
611	if (path != NULL)
612		*path = (*end == '\0') ? end : end + 1;
613	dev->d_dev = &zfs_dev;
614	dev->d_type = zfs_dev.dv_type;
615	return (0);
616}
617
618char *
619zfs_fmtdev(void *vdev)
620{
621	static char		rootname[ZFS_MAXNAMELEN];
622	static char		buf[2 * ZFS_MAXNAMELEN + 8];
623	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
624	spa_t			*spa;
625
626	buf[0] = '\0';
627	if (dev->d_type != DEVT_ZFS)
628		return (buf);
629
630	spa = spa_find_by_guid(dev->pool_guid);
631	if (spa == NULL) {
632		printf("ZFS: can't find pool by guid\n");
633		return (buf);
634	}
635	if (zfs_spa_init(spa) != 0) {
636		printf("ZFS: can't init pool\n");
637		return (buf);
638	}
639	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
640		printf("ZFS: can't find root filesystem\n");
641		return (buf);
642	}
643	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
644		printf("ZFS: can't find filesystem by guid\n");
645		return (buf);
646	}
647
648	if (rootname[0] == '\0')
649		sprintf(buf, "%s:%s:", dev->d_dev->dv_name, spa->spa_name);
650	else
651		sprintf(buf, "%s:%s/%s:", dev->d_dev->dv_name, spa->spa_name,
652		    rootname);
653	return (buf);
654}
655