zfs.c revision 360891
1/*-
2 * Copyright (c) 2007 Doug Rabson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 *	$FreeBSD: stable/11/stand/libsa/zfs/zfs.c 360891 2020-05-11 07:01:10Z tsoome $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/11/stand/libsa/zfs/zfs.c 360891 2020-05-11 07:01:10Z tsoome $");
31
32/*
33 *	Stand-alone file reading package.
34 */
35
36#include <stand.h>
37#include <sys/disk.h>
38#include <sys/param.h>
39#include <sys/time.h>
40#include <sys/queue.h>
41#include <disk.h>
42#include <part.h>
43#include <stddef.h>
44#include <stdarg.h>
45#include <string.h>
46#include <bootstrap.h>
47
48#include "libzfs.h"
49
50#include "zfsimpl.c"
51
52/* Define the range of indexes to be populated with ZFS Boot Environments */
53#define		ZFS_BE_FIRST	4
54#define		ZFS_BE_LAST	8
55
56static int	zfs_open(const char *path, struct open_file *f);
57static int	zfs_close(struct open_file *f);
58static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
59static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
60static int	zfs_stat(struct open_file *f, struct stat *sb);
61static int	zfs_readdir(struct open_file *f, struct dirent *d);
62
63static void	zfs_bootenv_initial(const char *);
64
65struct devsw zfs_dev;
66
67struct fs_ops zfs_fsops = {
68	"zfs",
69	zfs_open,
70	zfs_close,
71	zfs_read,
72	null_write,
73	zfs_seek,
74	zfs_stat,
75	zfs_readdir
76};
77
78/*
79 * In-core open file.
80 */
81struct file {
82	off_t		f_seekp;	/* seek pointer */
83	dnode_phys_t	f_dnode;
84	uint64_t	f_zap_type;	/* zap type for readdir */
85	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
86	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
87};
88
89static int	zfs_env_index;
90static int	zfs_env_count;
91
92SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
93struct zfs_be_list *zfs_be_headp;
94struct zfs_be_entry {
95	const char *name;
96	SLIST_ENTRY(zfs_be_entry) entries;
97} *zfs_be, *zfs_be_tmp;
98
99/*
100 * Open a file.
101 */
102static int
103zfs_open(const char *upath, struct open_file *f)
104{
105	struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
106	struct file *fp;
107	int rc;
108
109	if (f->f_dev != &zfs_dev)
110		return (EINVAL);
111
112	/* allocate file system specific data structure */
113	fp = malloc(sizeof(struct file));
114	bzero(fp, sizeof(struct file));
115	f->f_fsdata = (void *)fp;
116
117	rc = zfs_lookup(mount, upath, &fp->f_dnode);
118	fp->f_seekp = 0;
119	if (rc) {
120		f->f_fsdata = NULL;
121		free(fp);
122	}
123	return (rc);
124}
125
126static int
127zfs_close(struct open_file *f)
128{
129	struct file *fp = (struct file *)f->f_fsdata;
130
131	dnode_cache_obj = NULL;
132	f->f_fsdata = (void *)0;
133	if (fp == (struct file *)0)
134		return (0);
135
136	free(fp);
137	return (0);
138}
139
140/*
141 * Copy a portion of a file into kernel memory.
142 * Cross block boundaries when necessary.
143 */
144static int
145zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
146{
147	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
148	struct file *fp = (struct file *)f->f_fsdata;
149	struct stat sb;
150	size_t n;
151	int rc;
152
153	rc = zfs_stat(f, &sb);
154	if (rc)
155		return (rc);
156	n = size;
157	if (fp->f_seekp + n > sb.st_size)
158		n = sb.st_size - fp->f_seekp;
159
160	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
161	if (rc)
162		return (rc);
163
164	if (0) {
165	    int i;
166	    for (i = 0; i < n; i++)
167		putchar(((char*) start)[i]);
168	}
169	fp->f_seekp += n;
170	if (resid)
171		*resid = size - n;
172
173	return (0);
174}
175
176static off_t
177zfs_seek(struct open_file *f, off_t offset, int where)
178{
179	struct file *fp = (struct file *)f->f_fsdata;
180
181	switch (where) {
182	case SEEK_SET:
183		fp->f_seekp = offset;
184		break;
185	case SEEK_CUR:
186		fp->f_seekp += offset;
187		break;
188	case SEEK_END:
189	    {
190		struct stat sb;
191		int error;
192
193		error = zfs_stat(f, &sb);
194		if (error != 0) {
195			errno = error;
196			return (-1);
197		}
198		fp->f_seekp = sb.st_size - offset;
199		break;
200	    }
201	default:
202		errno = EINVAL;
203		return (-1);
204	}
205	return (fp->f_seekp);
206}
207
208static int
209zfs_stat(struct open_file *f, struct stat *sb)
210{
211	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
212	struct file *fp = (struct file *)f->f_fsdata;
213
214	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
215}
216
217static int
218zfs_readdir(struct open_file *f, struct dirent *d)
219{
220	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
221	struct file *fp = (struct file *)f->f_fsdata;
222	mzap_ent_phys_t mze;
223	struct stat sb;
224	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
225	int rc;
226
227	rc = zfs_stat(f, &sb);
228	if (rc)
229		return (rc);
230	if (!S_ISDIR(sb.st_mode))
231		return (ENOTDIR);
232
233	/*
234	 * If this is the first read, get the zap type.
235	 */
236	if (fp->f_seekp == 0) {
237		rc = dnode_read(spa, &fp->f_dnode,
238				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
239		if (rc)
240			return (rc);
241
242		if (fp->f_zap_type == ZBT_MICRO) {
243			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
244		} else {
245			rc = dnode_read(spa, &fp->f_dnode,
246					offsetof(zap_phys_t, zap_num_leafs),
247					&fp->f_num_leafs,
248					sizeof(fp->f_num_leafs));
249			if (rc)
250				return (rc);
251
252			fp->f_seekp = bsize;
253			fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
254			rc = dnode_read(spa, &fp->f_dnode,
255					fp->f_seekp,
256					fp->f_zap_leaf,
257					bsize);
258			if (rc)
259				return (rc);
260		}
261	}
262
263	if (fp->f_zap_type == ZBT_MICRO) {
264	mzap_next:
265		if (fp->f_seekp >= bsize)
266			return (ENOENT);
267
268		rc = dnode_read(spa, &fp->f_dnode,
269				fp->f_seekp, &mze, sizeof(mze));
270		if (rc)
271			return (rc);
272		fp->f_seekp += sizeof(mze);
273
274		if (!mze.mze_name[0])
275			goto mzap_next;
276
277		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
278		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
279		strcpy(d->d_name, mze.mze_name);
280		d->d_namlen = strlen(d->d_name);
281		return (0);
282	} else {
283		zap_leaf_t zl;
284		zap_leaf_chunk_t *zc, *nc;
285		int chunk;
286		size_t namelen;
287		char *p;
288		uint64_t value;
289
290		/*
291		 * Initialise this so we can use the ZAP size
292		 * calculating macros.
293		 */
294		zl.l_bs = ilog2(bsize);
295		zl.l_phys = fp->f_zap_leaf;
296
297		/*
298		 * Figure out which chunk we are currently looking at
299		 * and consider seeking to the next leaf. We use the
300		 * low bits of f_seekp as a simple chunk index.
301		 */
302	fzap_next:
303		chunk = fp->f_seekp & (bsize - 1);
304		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
305			fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
306			chunk = 0;
307
308			/*
309			 * Check for EOF and read the new leaf.
310			 */
311			if (fp->f_seekp >= bsize * fp->f_num_leafs)
312				return (ENOENT);
313
314			rc = dnode_read(spa, &fp->f_dnode,
315					fp->f_seekp,
316					fp->f_zap_leaf,
317					bsize);
318			if (rc)
319				return (rc);
320		}
321
322		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
323		fp->f_seekp++;
324		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
325			goto fzap_next;
326
327		namelen = zc->l_entry.le_name_numints;
328		if (namelen > sizeof(d->d_name))
329			namelen = sizeof(d->d_name);
330
331		/*
332		 * Paste the name back together.
333		 */
334		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
335		p = d->d_name;
336		while (namelen > 0) {
337			int len;
338			len = namelen;
339			if (len > ZAP_LEAF_ARRAY_BYTES)
340				len = ZAP_LEAF_ARRAY_BYTES;
341			memcpy(p, nc->l_array.la_array, len);
342			p += len;
343			namelen -= len;
344			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
345		}
346		d->d_name[sizeof(d->d_name) - 1] = 0;
347
348		/*
349		 * Assume the first eight bytes of the value are
350		 * a uint64_t.
351		 */
352		value = fzap_leaf_value(&zl, zc);
353
354		d->d_fileno = ZFS_DIRENT_OBJ(value);
355		d->d_type = ZFS_DIRENT_TYPE(value);
356		d->d_namlen = strlen(d->d_name);
357
358		return (0);
359	}
360}
361
362static int
363vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes)
364{
365	int fd, ret;
366	size_t res, head, tail, total_size, full_sec_size;
367	unsigned secsz, do_tail_read;
368	off_t start_sec;
369	char *outbuf, *bouncebuf;
370
371	fd = (uintptr_t) priv;
372	outbuf = (char *) buf;
373	bouncebuf = NULL;
374
375	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
376	if (ret != 0)
377		return (ret);
378
379	/*
380	 * Handling reads of arbitrary offset and size - multi-sector case
381	 * and single-sector case.
382	 *
383	 *                        Multi-sector Case
384	 *                (do_tail_read = true if tail > 0)
385	 *
386	 *   |<----------------------total_size--------------------->|
387	 *   |                                                       |
388	 *   |<--head-->|<--------------bytes------------>|<--tail-->|
389	 *   |          |                                 |          |
390	 *   |          |       |<~full_sec_size~>|       |          |
391	 *   +------------------+                 +------------------+
392	 *   |          |0101010|     .  .  .     |0101011|          |
393	 *   +------------------+                 +------------------+
394	 *         start_sec                         start_sec + n
395	 *
396	 *
397	 *                      Single-sector Case
398	 *                    (do_tail_read = false)
399	 *
400	 *              |<------total_size = secsz----->|
401	 *              |                               |
402	 *              |<-head->|<---bytes--->|<-tail->|
403	 *              +-------------------------------+
404	 *              |        |0101010101010|        |
405	 *              +-------------------------------+
406	 *                          start_sec
407	 */
408	start_sec = offset / secsz;
409	head = offset % secsz;
410	total_size = roundup2(head + bytes, secsz);
411	tail = total_size - (head + bytes);
412	do_tail_read = ((tail > 0) && (head + bytes > secsz));
413	full_sec_size = total_size;
414	if (head > 0)
415		full_sec_size -= secsz;
416	if (do_tail_read)
417		full_sec_size -= secsz;
418
419	/* Return of partial sector data requires a bounce buffer. */
420	if ((head > 0) || do_tail_read || bytes < secsz) {
421		bouncebuf = zfs_alloc(secsz);
422		if (bouncebuf == NULL) {
423			printf("vdev_read: out of memory\n");
424			return (ENOMEM);
425		}
426	}
427
428	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
429		ret = errno;
430		goto error;
431	}
432
433	/* Partial data return from first sector */
434	if (head > 0) {
435		res = read(fd, bouncebuf, secsz);
436		if (res != secsz) {
437			ret = EIO;
438			goto error;
439		}
440		memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes));
441		outbuf += min(secsz - head, bytes);
442	}
443
444	/*
445	 * Full data return from read sectors.
446	 * Note, there is still corner case where we read
447	 * from sector boundary, but less than sector size, e.g. reading 512B
448	 * from 4k sector.
449	 */
450	if (full_sec_size > 0) {
451		if (bytes < full_sec_size) {
452			res = read(fd, bouncebuf, secsz);
453			if (res != secsz) {
454				ret = EIO;
455				goto error;
456			}
457			memcpy(outbuf, bouncebuf, bytes);
458		} else {
459			res = read(fd, outbuf, full_sec_size);
460			if (res != full_sec_size) {
461				ret = EIO;
462				goto error;
463			}
464			outbuf += full_sec_size;
465		}
466	}
467
468	/* Partial data return from last sector */
469	if (do_tail_read) {
470		res = read(fd, bouncebuf, secsz);
471		if (res != secsz) {
472			ret = EIO;
473			goto error;
474		}
475		memcpy(outbuf, bouncebuf, secsz - tail);
476	}
477
478	ret = 0;
479error:
480	if (bouncebuf != NULL)
481		zfs_free(bouncebuf, secsz);
482	return (ret);
483}
484
485static int
486zfs_dev_init(void)
487{
488	spa_t *spa;
489	spa_t *next;
490	spa_t *prev;
491
492	zfs_init();
493	if (archsw.arch_zfs_probe == NULL)
494		return (ENXIO);
495	archsw.arch_zfs_probe();
496
497	prev = NULL;
498	spa = STAILQ_FIRST(&zfs_pools);
499	while (spa != NULL) {
500		next = STAILQ_NEXT(spa, spa_link);
501		if (zfs_spa_init(spa)) {
502			if (prev == NULL)
503				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
504			else
505				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
506		} else
507			prev = spa;
508		spa = next;
509	}
510	return (0);
511}
512
513struct zfs_probe_args {
514	int		fd;
515	const char	*devname;
516	uint64_t	*pool_guid;
517	u_int		secsz;
518};
519
520static int
521zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
522{
523	struct zfs_probe_args *ppa;
524
525	ppa = (struct zfs_probe_args *)arg;
526	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
527	    offset * ppa->secsz, buf, blocks * ppa->secsz));
528}
529
530static int
531zfs_probe(int fd, uint64_t *pool_guid)
532{
533	spa_t *spa;
534	int ret;
535
536	spa = NULL;
537	ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa);
538	if (ret == 0 && pool_guid != NULL)
539		*pool_guid = spa->spa_guid;
540	return (ret);
541}
542
543static int
544zfs_probe_partition(void *arg, const char *partname,
545    const struct ptable_entry *part)
546{
547	struct zfs_probe_args *ppa, pa;
548	struct ptable *table;
549	char devname[32];
550	int ret;
551
552	/* Probe only freebsd-zfs and freebsd partitions */
553	if (part->type != PART_FREEBSD &&
554	    part->type != PART_FREEBSD_ZFS)
555		return (0);
556
557	ppa = (struct zfs_probe_args *)arg;
558	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
559	devname[strlen(ppa->devname) - 1] = '\0';
560	sprintf(devname, "%s%s:", devname, partname);
561	pa.fd = open(devname, O_RDONLY);
562	if (pa.fd == -1)
563		return (0);
564	ret = zfs_probe(pa.fd, ppa->pool_guid);
565	if (ret == 0)
566		return (0);
567	/* Do we have BSD label here? */
568	if (part->type == PART_FREEBSD) {
569		pa.devname = devname;
570		pa.pool_guid = ppa->pool_guid;
571		pa.secsz = ppa->secsz;
572		table = ptable_open(&pa, part->end - part->start + 1,
573		    ppa->secsz, zfs_diskread);
574		if (table != NULL) {
575			ptable_iterate(table, &pa, zfs_probe_partition);
576			ptable_close(table);
577		}
578	}
579	close(pa.fd);
580	return (0);
581}
582
583int
584zfs_probe_dev(const char *devname, uint64_t *pool_guid)
585{
586	struct disk_devdesc *dev;
587	struct ptable *table;
588	struct zfs_probe_args pa;
589	uint64_t mediasz;
590	int ret;
591
592	if (pool_guid)
593		*pool_guid = 0;
594	pa.fd = open(devname, O_RDONLY);
595	if (pa.fd == -1)
596		return (ENXIO);
597	/*
598	 * We will not probe the whole disk, we can not boot from such
599	 * disks and some systems will misreport the disk sizes and will
600	 * hang while accessing the disk.
601	 */
602	if (archsw.arch_getdev((void **)&dev, devname, NULL) == 0) {
603		int partition = dev->d_partition;
604		int slice = dev->d_slice;
605
606		free(dev);
607		if (partition != -1 && slice != -1) {
608			ret = zfs_probe(pa.fd, pool_guid);
609			if (ret == 0)
610				return (0);
611		}
612	}
613
614	/* Probe each partition */
615	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
616	if (ret == 0)
617		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
618	if (ret == 0) {
619		pa.devname = devname;
620		pa.pool_guid = pool_guid;
621		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
622		    zfs_diskread);
623		if (table != NULL) {
624			ptable_iterate(table, &pa, zfs_probe_partition);
625			ptable_close(table);
626		}
627	}
628	close(pa.fd);
629	if (pool_guid && *pool_guid == 0)
630		ret = ENXIO;
631	return (ret);
632}
633
634/*
635 * Print information about ZFS pools
636 */
637static int
638zfs_dev_print(int verbose)
639{
640	spa_t *spa;
641	char line[80];
642	int ret = 0;
643
644	if (STAILQ_EMPTY(&zfs_pools))
645		return (0);
646
647	printf("%s devices:", zfs_dev.dv_name);
648	if ((ret = pager_output("\n")) != 0)
649		return (ret);
650
651	if (verbose) {
652		return (spa_all_status());
653	}
654	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
655		snprintf(line, sizeof(line), "    zfs:%s\n", spa->spa_name);
656		ret = pager_output(line);
657		if (ret != 0)
658			break;
659	}
660	return (ret);
661}
662
663/*
664 * Attempt to open the pool described by (dev) for use by (f).
665 */
666static int
667zfs_dev_open(struct open_file *f, ...)
668{
669	va_list		args;
670	struct zfs_devdesc	*dev;
671	struct zfsmount	*mount;
672	spa_t		*spa;
673	int		rv;
674
675	va_start(args, f);
676	dev = va_arg(args, struct zfs_devdesc *);
677	va_end(args);
678
679	if (dev->pool_guid == 0)
680		spa = STAILQ_FIRST(&zfs_pools);
681	else
682		spa = spa_find_by_guid(dev->pool_guid);
683	if (!spa)
684		return (ENXIO);
685	mount = malloc(sizeof(*mount));
686	rv = zfs_mount(spa, dev->root_guid, mount);
687	if (rv != 0) {
688		free(mount);
689		return (rv);
690	}
691	if (mount->objset.os_type != DMU_OST_ZFS) {
692		printf("Unexpected object set type %ju\n",
693		    (uintmax_t)mount->objset.os_type);
694		free(mount);
695		return (EIO);
696	}
697	f->f_devdata = mount;
698	free(dev);
699	return (0);
700}
701
702static int
703zfs_dev_close(struct open_file *f)
704{
705
706	free(f->f_devdata);
707	f->f_devdata = NULL;
708	return (0);
709}
710
711static int
712zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
713{
714
715	return (ENOSYS);
716}
717
718struct devsw zfs_dev = {
719	.dv_name = "zfs",
720	.dv_type = DEVT_ZFS,
721	.dv_init = zfs_dev_init,
722	.dv_strategy = zfs_dev_strategy,
723	.dv_open = zfs_dev_open,
724	.dv_close = zfs_dev_close,
725	.dv_ioctl = noioctl,
726	.dv_print = zfs_dev_print,
727	.dv_cleanup = NULL
728};
729
730int
731zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
732{
733	static char	rootname[ZFS_MAXNAMELEN];
734	static char	poolname[ZFS_MAXNAMELEN];
735	spa_t		*spa;
736	const char	*end;
737	const char	*np;
738	const char	*sep;
739	int		rv;
740
741	np = devspec;
742	if (*np != ':')
743		return (EINVAL);
744	np++;
745	end = strrchr(np, ':');
746	if (end == NULL)
747		return (EINVAL);
748	sep = strchr(np, '/');
749	if (sep == NULL || sep >= end)
750		sep = end;
751	memcpy(poolname, np, sep - np);
752	poolname[sep - np] = '\0';
753	if (sep < end) {
754		sep++;
755		memcpy(rootname, sep, end - sep);
756		rootname[end - sep] = '\0';
757	}
758	else
759		rootname[0] = '\0';
760
761	spa = spa_find_by_name(poolname);
762	if (!spa)
763		return (ENXIO);
764	dev->pool_guid = spa->spa_guid;
765	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
766	if (rv != 0)
767		return (rv);
768	if (path != NULL)
769		*path = (*end == '\0') ? end : end + 1;
770	dev->dd.d_dev = &zfs_dev;
771	return (0);
772}
773
774char *
775zfs_fmtdev(void *vdev)
776{
777	static char		rootname[ZFS_MAXNAMELEN];
778	static char		buf[2 * ZFS_MAXNAMELEN + 8];
779	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
780	spa_t			*spa;
781
782	buf[0] = '\0';
783	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
784		return (buf);
785
786	if (dev->pool_guid == 0) {
787		spa = STAILQ_FIRST(&zfs_pools);
788		dev->pool_guid = spa->spa_guid;
789	} else
790		spa = spa_find_by_guid(dev->pool_guid);
791	if (spa == NULL) {
792		printf("ZFS: can't find pool by guid\n");
793		return (buf);
794	}
795	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
796		printf("ZFS: can't find root filesystem\n");
797		return (buf);
798	}
799	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
800		printf("ZFS: can't find filesystem by guid\n");
801		return (buf);
802	}
803
804	if (rootname[0] == '\0')
805		sprintf(buf, "%s:%s:", dev->dd.d_dev->dv_name, spa->spa_name);
806	else
807		sprintf(buf, "%s:%s/%s:", dev->dd.d_dev->dv_name, spa->spa_name,
808		    rootname);
809	return (buf);
810}
811
812int
813zfs_list(const char *name)
814{
815	static char	poolname[ZFS_MAXNAMELEN];
816	uint64_t	objid;
817	spa_t		*spa;
818	const char	*dsname;
819	int		len;
820	int		rv;
821
822	len = strlen(name);
823	dsname = strchr(name, '/');
824	if (dsname != NULL) {
825		len = dsname - name;
826		dsname++;
827	} else
828		dsname = "";
829	memcpy(poolname, name, len);
830	poolname[len] = '\0';
831
832	spa = spa_find_by_name(poolname);
833	if (!spa)
834		return (ENXIO);
835	rv = zfs_lookup_dataset(spa, dsname, &objid);
836	if (rv != 0)
837		return (rv);
838
839	return (zfs_list_dataset(spa, objid));
840}
841
842void
843init_zfs_bootenv(const char *currdev_in)
844{
845	char *beroot, *currdev;
846	int currdev_len;
847
848	currdev = NULL;
849	currdev_len = strlen(currdev_in);
850	if (currdev_len == 0)
851		return;
852	if (strncmp(currdev_in, "zfs:", 4) != 0)
853		return;
854	currdev = strdup(currdev_in);
855	if (currdev == NULL)
856		return;
857	/* Remove the trailing : */
858	currdev[currdev_len - 1] = '\0';
859	setenv("zfs_be_active", currdev, 1);
860	setenv("zfs_be_currpage", "1", 1);
861	/* Remove the last element (current bootenv) */
862	beroot = strrchr(currdev, '/');
863	if (beroot != NULL)
864		beroot[0] = '\0';
865	beroot = strchr(currdev, ':') + 1;
866	setenv("zfs_be_root", beroot, 1);
867	zfs_bootenv_initial(beroot);
868	free(currdev);
869}
870
871static void
872zfs_bootenv_initial(const char *name)
873{
874	char		poolname[ZFS_MAXNAMELEN], *dsname;
875	char envname[32], envval[256];
876	uint64_t	objid;
877	spa_t		*spa;
878	int		bootenvs_idx, len, rv;
879
880	SLIST_INIT(&zfs_be_head);
881	zfs_env_count = 0;
882	len = strlen(name);
883	dsname = strchr(name, '/');
884	if (dsname != NULL) {
885		len = dsname - name;
886		dsname++;
887	} else
888		dsname = "";
889	strlcpy(poolname, name, len + 1);
890	spa = spa_find_by_name(poolname);
891	if (spa == NULL)
892		return;
893	rv = zfs_lookup_dataset(spa, dsname, &objid);
894	if (rv != 0)
895		return;
896	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
897	bootenvs_idx = 0;
898	/* Populate the initial environment variables */
899	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
900		/* Enumerate all bootenvs for general usage */
901		snprintf(envname, sizeof(envname), "bootenvs[%d]", bootenvs_idx);
902		snprintf(envval, sizeof(envval), "zfs:%s/%s", name, zfs_be->name);
903		rv = setenv(envname, envval, 1);
904		if (rv != 0)
905			break;
906		bootenvs_idx++;
907	}
908	snprintf(envval, sizeof(envval), "%d", bootenvs_idx);
909	setenv("bootenvs_count", envval, 1);
910
911	/* Clean up the SLIST of ZFS BEs */
912	while (!SLIST_EMPTY(&zfs_be_head)) {
913		zfs_be = SLIST_FIRST(&zfs_be_head);
914		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
915		free(zfs_be);
916	}
917
918	return;
919
920}
921
922int
923zfs_bootenv(const char *name)
924{
925	static char	poolname[ZFS_MAXNAMELEN], *dsname, *root;
926	char		becount[4];
927	uint64_t	objid;
928	spa_t		*spa;
929	int		len, rv, pages, perpage, currpage;
930
931	if (name == NULL)
932		return (EINVAL);
933	if ((root = getenv("zfs_be_root")) == NULL)
934		return (EINVAL);
935
936	if (strcmp(name, root) != 0) {
937		if (setenv("zfs_be_root", name, 1) != 0)
938			return (ENOMEM);
939	}
940
941	SLIST_INIT(&zfs_be_head);
942	zfs_env_count = 0;
943	len = strlen(name);
944	dsname = strchr(name, '/');
945	if (dsname != NULL) {
946		len = dsname - name;
947		dsname++;
948	} else
949		dsname = "";
950	memcpy(poolname, name, len);
951	poolname[len] = '\0';
952
953	spa = spa_find_by_name(poolname);
954	if (!spa)
955		return (ENXIO);
956	rv = zfs_lookup_dataset(spa, dsname, &objid);
957	if (rv != 0)
958		return (rv);
959	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
960
961	/* Calculate and store the number of pages of BEs */
962	perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
963	pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
964	snprintf(becount, 4, "%d", pages);
965	if (setenv("zfs_be_pages", becount, 1) != 0)
966		return (ENOMEM);
967
968	/* Roll over the page counter if it has exceeded the maximum */
969	currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
970	if (currpage > pages) {
971		if (setenv("zfs_be_currpage", "1", 1) != 0)
972			return (ENOMEM);
973	}
974
975	/* Populate the menu environment variables */
976	zfs_set_env();
977
978	/* Clean up the SLIST of ZFS BEs */
979	while (!SLIST_EMPTY(&zfs_be_head)) {
980		zfs_be = SLIST_FIRST(&zfs_be_head);
981		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
982		free(zfs_be);
983	}
984
985	return (rv);
986}
987
988int
989zfs_belist_add(const char *name, uint64_t value __unused)
990{
991
992	/* Skip special datasets that start with a $ character */
993	if (strncmp(name, "$", 1) == 0) {
994		return (0);
995	}
996	/* Add the boot environment to the head of the SLIST */
997	zfs_be = malloc(sizeof(struct zfs_be_entry));
998	if (zfs_be == NULL) {
999		return (ENOMEM);
1000	}
1001	zfs_be->name = name;
1002	SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
1003	zfs_env_count++;
1004
1005	return (0);
1006}
1007
1008int
1009zfs_set_env(void)
1010{
1011	char envname[32], envval[256];
1012	char *beroot, *pagenum;
1013	int rv, page, ctr;
1014
1015	beroot = getenv("zfs_be_root");
1016	if (beroot == NULL) {
1017		return (1);
1018	}
1019
1020	pagenum = getenv("zfs_be_currpage");
1021	if (pagenum != NULL) {
1022		page = strtol(pagenum, NULL, 10);
1023	} else {
1024		page = 1;
1025	}
1026
1027	ctr = 1;
1028	rv = 0;
1029	zfs_env_index = ZFS_BE_FIRST;
1030	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1031		/* Skip to the requested page number */
1032		if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
1033			ctr++;
1034			continue;
1035		}
1036
1037		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
1038		snprintf(envval, sizeof(envval), "%s", zfs_be->name);
1039		rv = setenv(envname, envval, 1);
1040		if (rv != 0) {
1041			break;
1042		}
1043
1044		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
1045		rv = setenv(envname, envval, 1);
1046		if (rv != 0){
1047			break;
1048		}
1049
1050		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
1051		rv = setenv(envname, "set_bootenv", 1);
1052		if (rv != 0){
1053			break;
1054		}
1055
1056		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
1057		snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
1058		rv = setenv(envname, envval, 1);
1059		if (rv != 0){
1060			break;
1061		}
1062
1063		zfs_env_index++;
1064		if (zfs_env_index > ZFS_BE_LAST) {
1065			break;
1066		}
1067
1068	}
1069
1070	for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
1071		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
1072		(void)unsetenv(envname);
1073		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
1074		(void)unsetenv(envname);
1075		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
1076		(void)unsetenv(envname);
1077		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
1078		(void)unsetenv(envname);
1079	}
1080
1081	return (rv);
1082}
1083