1/*-
2 * Copyright (c) 2007 Doug Rabson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 *	$FreeBSD$
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32/*
33 *	Stand-alone file reading package.
34 */
35
36#include <sys/disk.h>
37#include <sys/param.h>
38#include <sys/time.h>
39#include <sys/queue.h>
40#include <part.h>
41#include <stddef.h>
42#include <stdarg.h>
43#include <string.h>
44#include <stand.h>
45#include <bootstrap.h>
46
47#include "libzfs.h"
48
49#include "zfsimpl.c"
50
51static int	zfs_open(const char *path, struct open_file *f);
52static int	zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid);
53static int	zfs_close(struct open_file *f);
54static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
55static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
56static int	zfs_stat(struct open_file *f, struct stat *sb);
57static int	zfs_readdir(struct open_file *f, struct dirent *d);
58
59struct devsw zfs_dev;
60
61struct fs_ops zfs_fsops = {
62	"zfs",
63	zfs_open,
64	zfs_close,
65	zfs_read,
66	zfs_write,
67	zfs_seek,
68	zfs_stat,
69	zfs_readdir
70};
71
72/*
73 * In-core open file.
74 */
75struct file {
76	off_t		f_seekp;	/* seek pointer */
77	dnode_phys_t	f_dnode;
78	uint64_t	f_zap_type;	/* zap type for readdir */
79	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
80	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
81};
82
83/*
84 * Open a file.
85 */
86static int
87zfs_open(const char *upath, struct open_file *f)
88{
89	struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
90	struct file *fp;
91	int rc;
92
93	if (f->f_dev != &zfs_dev)
94		return (EINVAL);
95
96	/* allocate file system specific data structure */
97	fp = malloc(sizeof(struct file));
98	bzero(fp, sizeof(struct file));
99	f->f_fsdata = (void *)fp;
100
101	rc = zfs_lookup(mount, upath, &fp->f_dnode);
102	fp->f_seekp = 0;
103	if (rc) {
104		f->f_fsdata = NULL;
105		free(fp);
106	}
107	return (rc);
108}
109
110static int
111zfs_close(struct open_file *f)
112{
113	struct file *fp = (struct file *)f->f_fsdata;
114
115	dnode_cache_obj = 0;
116	f->f_fsdata = (void *)0;
117	if (fp == (struct file *)0)
118		return (0);
119
120	free(fp);
121	return (0);
122}
123
124/*
125 * Copy a portion of a file into kernel memory.
126 * Cross block boundaries when necessary.
127 */
128static int
129zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
130{
131	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
132	struct file *fp = (struct file *)f->f_fsdata;
133	struct stat sb;
134	size_t n;
135	int rc;
136
137	rc = zfs_stat(f, &sb);
138	if (rc)
139		return (rc);
140	n = size;
141	if (fp->f_seekp + n > sb.st_size)
142		n = sb.st_size - fp->f_seekp;
143
144	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
145	if (rc)
146		return (rc);
147
148	if (0) {
149	    int i;
150	    for (i = 0; i < n; i++)
151		putchar(((char*) start)[i]);
152	}
153	fp->f_seekp += n;
154	if (resid)
155		*resid = size - n;
156
157	return (0);
158}
159
160/*
161 * Don't be silly - the bootstrap has no business writing anything.
162 */
163static int
164zfs_write(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
165{
166
167	return (EROFS);
168}
169
170static off_t
171zfs_seek(struct open_file *f, off_t offset, int where)
172{
173	struct file *fp = (struct file *)f->f_fsdata;
174
175	switch (where) {
176	case SEEK_SET:
177		fp->f_seekp = offset;
178		break;
179	case SEEK_CUR:
180		fp->f_seekp += offset;
181		break;
182	case SEEK_END:
183	    {
184		struct stat sb;
185		int error;
186
187		error = zfs_stat(f, &sb);
188		if (error != 0) {
189			errno = error;
190			return (-1);
191		}
192		fp->f_seekp = sb.st_size - offset;
193		break;
194	    }
195	default:
196		errno = EINVAL;
197		return (-1);
198	}
199	return (fp->f_seekp);
200}
201
202static int
203zfs_stat(struct open_file *f, struct stat *sb)
204{
205	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
206	struct file *fp = (struct file *)f->f_fsdata;
207
208	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
209}
210
211static int
212zfs_readdir(struct open_file *f, struct dirent *d)
213{
214	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
215	struct file *fp = (struct file *)f->f_fsdata;
216	mzap_ent_phys_t mze;
217	struct stat sb;
218	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
219	int rc;
220
221	rc = zfs_stat(f, &sb);
222	if (rc)
223		return (rc);
224	if (!S_ISDIR(sb.st_mode))
225		return (ENOTDIR);
226
227	/*
228	 * If this is the first read, get the zap type.
229	 */
230	if (fp->f_seekp == 0) {
231		rc = dnode_read(spa, &fp->f_dnode,
232				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
233		if (rc)
234			return (rc);
235
236		if (fp->f_zap_type == ZBT_MICRO) {
237			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
238		} else {
239			rc = dnode_read(spa, &fp->f_dnode,
240					offsetof(zap_phys_t, zap_num_leafs),
241					&fp->f_num_leafs,
242					sizeof(fp->f_num_leafs));
243			if (rc)
244				return (rc);
245
246			fp->f_seekp = bsize;
247			fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
248			rc = dnode_read(spa, &fp->f_dnode,
249					fp->f_seekp,
250					fp->f_zap_leaf,
251					bsize);
252			if (rc)
253				return (rc);
254		}
255	}
256
257	if (fp->f_zap_type == ZBT_MICRO) {
258	mzap_next:
259		if (fp->f_seekp >= bsize)
260			return (ENOENT);
261
262		rc = dnode_read(spa, &fp->f_dnode,
263				fp->f_seekp, &mze, sizeof(mze));
264		if (rc)
265			return (rc);
266		fp->f_seekp += sizeof(mze);
267
268		if (!mze.mze_name[0])
269			goto mzap_next;
270
271		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
272		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
273		strcpy(d->d_name, mze.mze_name);
274		d->d_namlen = strlen(d->d_name);
275		return (0);
276	} else {
277		zap_leaf_t zl;
278		zap_leaf_chunk_t *zc, *nc;
279		int chunk;
280		size_t namelen;
281		char *p;
282		uint64_t value;
283
284		/*
285		 * Initialise this so we can use the ZAP size
286		 * calculating macros.
287		 */
288		zl.l_bs = ilog2(bsize);
289		zl.l_phys = fp->f_zap_leaf;
290
291		/*
292		 * Figure out which chunk we are currently looking at
293		 * and consider seeking to the next leaf. We use the
294		 * low bits of f_seekp as a simple chunk index.
295		 */
296	fzap_next:
297		chunk = fp->f_seekp & (bsize - 1);
298		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
299			fp->f_seekp = (fp->f_seekp & ~(bsize - 1)) + bsize;
300			chunk = 0;
301
302			/*
303			 * Check for EOF and read the new leaf.
304			 */
305			if (fp->f_seekp >= bsize * fp->f_num_leafs)
306				return (ENOENT);
307
308			rc = dnode_read(spa, &fp->f_dnode,
309					fp->f_seekp,
310					fp->f_zap_leaf,
311					bsize);
312			if (rc)
313				return (rc);
314		}
315
316		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
317		fp->f_seekp++;
318		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
319			goto fzap_next;
320
321		namelen = zc->l_entry.le_name_numints;
322		if (namelen > sizeof(d->d_name))
323			namelen = sizeof(d->d_name);
324
325		/*
326		 * Paste the name back together.
327		 */
328		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
329		p = d->d_name;
330		while (namelen > 0) {
331			int len;
332			len = namelen;
333			if (len > ZAP_LEAF_ARRAY_BYTES)
334				len = ZAP_LEAF_ARRAY_BYTES;
335			memcpy(p, nc->l_array.la_array, len);
336			p += len;
337			namelen -= len;
338			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
339		}
340		d->d_name[sizeof(d->d_name) - 1] = 0;
341
342		/*
343		 * Assume the first eight bytes of the value are
344		 * a uint64_t.
345		 */
346		value = fzap_leaf_value(&zl, zc);
347
348		d->d_fileno = ZFS_DIRENT_OBJ(value);
349		d->d_type = ZFS_DIRENT_TYPE(value);
350		d->d_namlen = strlen(d->d_name);
351
352		return (0);
353	}
354}
355
356static int
357vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size)
358{
359	int fd;
360
361	fd = (uintptr_t) priv;
362	lseek(fd, offset, SEEK_SET);
363	if (read(fd, buf, size) == size) {
364		return 0;
365	} else {
366		return (EIO);
367	}
368}
369
370static int
371zfs_dev_init(void)
372{
373	spa_t *spa;
374	spa_t *next;
375	spa_t *prev;
376
377	zfs_init();
378	if (archsw.arch_zfs_probe == NULL)
379		return (ENXIO);
380	archsw.arch_zfs_probe();
381
382	prev = NULL;
383	spa = STAILQ_FIRST(&zfs_pools);
384	while (spa != NULL) {
385		next = STAILQ_NEXT(spa, spa_link);
386		if (zfs_spa_init(spa)) {
387			if (prev == NULL)
388				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
389			else
390				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
391		} else
392			prev = spa;
393		spa = next;
394	}
395	return (0);
396}
397
398struct zfs_probe_args {
399	int		fd;
400	const char	*devname;
401	uint64_t	*pool_guid;
402	uint16_t	secsz;
403};
404
405static int
406zfs_diskread(void *arg, void *buf, size_t blocks, off_t offset)
407{
408	struct zfs_probe_args *ppa;
409
410	ppa = (struct zfs_probe_args *)arg;
411	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
412	    offset * ppa->secsz, buf, blocks * ppa->secsz));
413}
414
415static int
416zfs_probe(int fd, uint64_t *pool_guid)
417{
418	spa_t *spa;
419	int ret;
420
421	ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa);
422	if (ret == 0 && pool_guid != NULL)
423		*pool_guid = spa->spa_guid;
424	return (ret);
425}
426
427static void
428zfs_probe_partition(void *arg, const char *partname,
429    const struct ptable_entry *part)
430{
431	struct zfs_probe_args *ppa, pa;
432	struct ptable *table;
433	char devname[32];
434	int ret;
435
436	/* Probe only freebsd-zfs and freebsd partitions */
437	if (part->type != PART_FREEBSD &&
438	    part->type != PART_FREEBSD_ZFS)
439		return;
440
441	ppa = (struct zfs_probe_args *)arg;
442	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
443	devname[strlen(ppa->devname) - 1] = '\0';
444	sprintf(devname, "%s%s:", devname, partname);
445	pa.fd = open(devname, O_RDONLY);
446	if (pa.fd == -1)
447		return;
448	ret = zfs_probe(pa.fd, ppa->pool_guid);
449	if (ret == 0)
450		return;
451	/* Do we have BSD label here? */
452	if (part->type == PART_FREEBSD) {
453		pa.devname = devname;
454		pa.pool_guid = ppa->pool_guid;
455		pa.secsz = ppa->secsz;
456		table = ptable_open(&pa, part->end - part->start + 1,
457		    ppa->secsz, zfs_diskread);
458		if (table != NULL) {
459			ptable_iterate(table, &pa, zfs_probe_partition);
460			ptable_close(table);
461		}
462	}
463	close(pa.fd);
464}
465
466int
467zfs_probe_dev(const char *devname, uint64_t *pool_guid)
468{
469	struct ptable *table;
470	struct zfs_probe_args pa;
471	off_t mediasz;
472	int ret;
473
474	pa.fd = open(devname, O_RDONLY);
475	if (pa.fd == -1)
476		return (ENXIO);
477	/* Probe the whole disk */
478	ret = zfs_probe(pa.fd, pool_guid);
479	if (ret == 0)
480		return (0);
481	/* Probe each partition */
482	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
483	if (ret == 0)
484		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
485	if (ret == 0) {
486		pa.devname = devname;
487		pa.pool_guid = pool_guid;
488		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
489		    zfs_diskread);
490		if (table != NULL) {
491			ptable_iterate(table, &pa, zfs_probe_partition);
492			ptable_close(table);
493		}
494	}
495	close(pa.fd);
496	return (0);
497}
498
499/*
500 * Print information about ZFS pools
501 */
502static void
503zfs_dev_print(int verbose)
504{
505	spa_t *spa;
506	char line[80];
507
508	if (verbose) {
509		spa_all_status();
510		return;
511	}
512	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
513		sprintf(line, "    zfs:%s\n", spa->spa_name);
514		pager_output(line);
515	}
516}
517
518/*
519 * Attempt to open the pool described by (dev) for use by (f).
520 */
521static int
522zfs_dev_open(struct open_file *f, ...)
523{
524	va_list		args;
525	struct zfs_devdesc	*dev;
526	struct zfsmount	*mount;
527	spa_t		*spa;
528	int		rv;
529
530	va_start(args, f);
531	dev = va_arg(args, struct zfs_devdesc *);
532	va_end(args);
533
534	if (dev->pool_guid == 0)
535		spa = STAILQ_FIRST(&zfs_pools);
536	else
537		spa = spa_find_by_guid(dev->pool_guid);
538	if (!spa)
539		return (ENXIO);
540	mount = malloc(sizeof(*mount));
541	rv = zfs_mount(spa, dev->root_guid, mount);
542	if (rv != 0) {
543		free(mount);
544		return (rv);
545	}
546	if (mount->objset.os_type != DMU_OST_ZFS) {
547		printf("Unexpected object set type %ju\n",
548		    (uintmax_t)mount->objset.os_type);
549		free(mount);
550		return (EIO);
551	}
552	f->f_devdata = mount;
553	free(dev);
554	return (0);
555}
556
557static int
558zfs_dev_close(struct open_file *f)
559{
560
561	free(f->f_devdata);
562	f->f_devdata = NULL;
563	return (0);
564}
565
566static int
567zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
568{
569
570	return (ENOSYS);
571}
572
573struct devsw zfs_dev = {
574	.dv_name = "zfs",
575	.dv_type = DEVT_ZFS,
576	.dv_init = zfs_dev_init,
577	.dv_strategy = zfs_dev_strategy,
578	.dv_open = zfs_dev_open,
579	.dv_close = zfs_dev_close,
580	.dv_ioctl = noioctl,
581	.dv_print = zfs_dev_print,
582	.dv_cleanup = NULL
583};
584
585int
586zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
587{
588	static char	rootname[ZFS_MAXNAMELEN];
589	static char	poolname[ZFS_MAXNAMELEN];
590	spa_t		*spa;
591	const char	*end;
592	const char	*np;
593	const char	*sep;
594	int		rv;
595
596	np = devspec;
597	if (*np != ':')
598		return (EINVAL);
599	np++;
600	end = strchr(np, ':');
601	if (end == NULL)
602		return (EINVAL);
603	sep = strchr(np, '/');
604	if (sep == NULL || sep >= end)
605		sep = end;
606	memcpy(poolname, np, sep - np);
607	poolname[sep - np] = '\0';
608	if (sep < end) {
609		sep++;
610		memcpy(rootname, sep, end - sep);
611		rootname[end - sep] = '\0';
612	}
613	else
614		rootname[0] = '\0';
615
616	spa = spa_find_by_name(poolname);
617	if (!spa)
618		return (ENXIO);
619	dev->pool_guid = spa->spa_guid;
620	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
621	if (rv != 0)
622		return (rv);
623	if (path != NULL)
624		*path = (*end == '\0') ? end : end + 1;
625	dev->d_dev = &zfs_dev;
626	dev->d_type = zfs_dev.dv_type;
627	return (0);
628}
629
630char *
631zfs_fmtdev(void *vdev)
632{
633	static char		rootname[ZFS_MAXNAMELEN];
634	static char		buf[2 * ZFS_MAXNAMELEN + 8];
635	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
636	spa_t			*spa;
637
638	buf[0] = '\0';
639	if (dev->d_type != DEVT_ZFS)
640		return (buf);
641
642	if (dev->pool_guid == 0) {
643		spa = STAILQ_FIRST(&zfs_pools);
644		dev->pool_guid = spa->spa_guid;
645	} else
646		spa = spa_find_by_guid(dev->pool_guid);
647	if (spa == NULL) {
648		printf("ZFS: can't find pool by guid\n");
649		return (buf);
650	}
651	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
652		printf("ZFS: can't find root filesystem\n");
653		return (buf);
654	}
655	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
656		printf("ZFS: can't find filesystem by guid\n");
657		return (buf);
658	}
659
660	if (rootname[0] == '\0')
661		sprintf(buf, "%s:%s:", dev->d_dev->dv_name, spa->spa_name);
662	else
663		sprintf(buf, "%s:%s/%s:", dev->d_dev->dv_name, spa->spa_name,
664		    rootname);
665	return (buf);
666}
667
668int
669zfs_list(const char *name)
670{
671	static char	poolname[ZFS_MAXNAMELEN];
672	uint64_t	objid;
673	spa_t		*spa;
674	const char	*dsname;
675	int		len;
676	int		rv;
677
678	len = strlen(name);
679	dsname = strchr(name, '/');
680	if (dsname != NULL) {
681		len = dsname - name;
682		dsname++;
683	} else
684		dsname = "";
685	memcpy(poolname, name, len);
686	poolname[len] = '\0';
687
688	spa = spa_find_by_name(poolname);
689	if (!spa)
690		return (ENXIO);
691	rv = zfs_lookup_dataset(spa, dsname, &objid);
692	if (rv != 0)
693		return (rv);
694	rv = zfs_list_dataset(spa, objid);
695	return (rv);
696}
697