zfs.c revision 208669
1/*-
2 * Copyright (c) 2007 Doug Rabson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 *	$FreeBSD: head/sys/boot/zfs/zfs.c 208669 2010-05-31 09:06:03Z avg $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/boot/zfs/zfs.c 208669 2010-05-31 09:06:03Z avg $");
31
32/*
33 *	Stand-alone file reading package.
34 */
35
36#include <sys/param.h>
37#include <sys/disklabel.h>
38#include <sys/time.h>
39#include <sys/queue.h>
40#include <stddef.h>
41#include <stdarg.h>
42#include <string.h>
43#include <stand.h>
44#include <bootstrap.h>
45
46#include "zfsimpl.c"
47
48static int	zfs_open(const char *path, struct open_file *f);
49static int	zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid);
50static int	zfs_close(struct open_file *f);
51static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
52static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
53static int	zfs_stat(struct open_file *f, struct stat *sb);
54static int	zfs_readdir(struct open_file *f, struct dirent *d);
55
56struct devsw zfs_dev;
57
58struct fs_ops zfs_fsops = {
59	"zfs",
60	zfs_open,
61	zfs_close,
62	zfs_read,
63	zfs_write,
64	zfs_seek,
65	zfs_stat,
66	zfs_readdir
67};
68
69/*
70 * In-core open file.
71 */
72struct file {
73	off_t		f_seekp;	/* seek pointer */
74	dnode_phys_t	f_dnode;
75	uint64_t	f_zap_type;	/* zap type for readdir */
76	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
77	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
78};
79
80/*
81 * Open a file.
82 */
83static int
84zfs_open(const char *upath, struct open_file *f)
85{
86	spa_t *spa = (spa_t *) f->f_devdata;
87	struct file *fp;
88	int rc;
89
90	if (f->f_dev != &zfs_dev)
91		return (EINVAL);
92
93	rc = zfs_mount_pool(spa);
94	if (rc)
95		return (rc);
96
97	/* allocate file system specific data structure */
98	fp = malloc(sizeof(struct file));
99	bzero(fp, sizeof(struct file));
100	f->f_fsdata = (void *)fp;
101
102	if (spa->spa_root_objset.os_type != DMU_OST_ZFS) {
103		printf("Unexpected object set type %llu\n",
104		    spa->spa_root_objset.os_type);
105		rc = EIO;
106		goto out;
107	}
108
109	rc = zfs_lookup(spa, upath, &fp->f_dnode);
110	if (rc)
111		goto out;
112
113	fp->f_seekp = 0;
114out:
115	if (rc) {
116		f->f_fsdata = NULL;
117		free(fp);
118	}
119	return (rc);
120}
121
122static int
123zfs_close(struct open_file *f)
124{
125	struct file *fp = (struct file *)f->f_fsdata;
126
127	dnode_cache_obj = 0;
128	f->f_fsdata = (void *)0;
129	if (fp == (struct file *)0)
130		return (0);
131
132	free(fp);
133	return (0);
134}
135
136/*
137 * Copy a portion of a file into kernel memory.
138 * Cross block boundaries when necessary.
139 */
140static int
141zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
142{
143	spa_t *spa = (spa_t *) f->f_devdata;
144	struct file *fp = (struct file *)f->f_fsdata;
145	const znode_phys_t *zp = (const znode_phys_t *) fp->f_dnode.dn_bonus;
146	size_t n;
147	int rc;
148
149	n = size;
150	if (fp->f_seekp + n > zp->zp_size)
151		n = zp->zp_size - fp->f_seekp;
152
153	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
154	if (rc)
155		return (rc);
156
157	if (0) {
158	    int i;
159	    for (i = 0; i < n; i++)
160		putchar(((char*) start)[i]);
161	}
162	fp->f_seekp += n;
163	if (resid)
164		*resid = size - n;
165
166	return (0);
167}
168
169/*
170 * Don't be silly - the bootstrap has no business writing anything.
171 */
172static int
173zfs_write(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
174{
175
176	return (EROFS);
177}
178
179static off_t
180zfs_seek(struct open_file *f, off_t offset, int where)
181{
182	struct file *fp = (struct file *)f->f_fsdata;
183	znode_phys_t *zp = (znode_phys_t *) fp->f_dnode.dn_bonus;
184
185	switch (where) {
186	case SEEK_SET:
187		fp->f_seekp = offset;
188		break;
189	case SEEK_CUR:
190		fp->f_seekp += offset;
191		break;
192	case SEEK_END:
193		fp->f_seekp = zp->zp_size - offset;
194		break;
195	default:
196		errno = EINVAL;
197		return (-1);
198	}
199	return (fp->f_seekp);
200}
201
202static int
203zfs_stat(struct open_file *f, struct stat *sb)
204{
205	struct file *fp = (struct file *)f->f_fsdata;
206	znode_phys_t *zp = (znode_phys_t *) fp->f_dnode.dn_bonus;
207
208	/* only important stuff */
209	sb->st_mode = zp->zp_mode;
210	sb->st_uid = zp->zp_uid;
211	sb->st_gid = zp->zp_gid;
212	sb->st_size = zp->zp_size;
213
214	return (0);
215}
216
217static int
218zfs_readdir(struct open_file *f, struct dirent *d)
219{
220	spa_t *spa = (spa_t *) f->f_devdata;
221	struct file *fp = (struct file *)f->f_fsdata;
222	znode_phys_t *zp = (znode_phys_t *) fp->f_dnode.dn_bonus;
223	mzap_ent_phys_t mze;
224	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
225	int rc;
226
227	if ((zp->zp_mode >> 12) != 0x4) {
228		return (ENOTDIR);
229	}
230
231	/*
232	 * If this is the first read, get the zap type.
233	 */
234	if (fp->f_seekp == 0) {
235		rc = dnode_read(spa, &fp->f_dnode,
236				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
237		if (rc)
238			return (rc);
239
240		if (fp->f_zap_type == ZBT_MICRO) {
241			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
242		} else {
243			rc = dnode_read(spa, &fp->f_dnode,
244					offsetof(zap_phys_t, zap_num_leafs),
245					&fp->f_num_leafs,
246					sizeof(fp->f_num_leafs));
247			if (rc)
248				return (rc);
249
250			fp->f_seekp = bsize;
251			fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
252			rc = dnode_read(spa, &fp->f_dnode,
253					fp->f_seekp,
254					fp->f_zap_leaf,
255					bsize);
256			if (rc)
257				return (rc);
258		}
259	}
260
261	if (fp->f_zap_type == ZBT_MICRO) {
262	mzap_next:
263		if (fp->f_seekp >= bsize)
264			return (ENOENT);
265
266		rc = dnode_read(spa, &fp->f_dnode,
267				fp->f_seekp, &mze, sizeof(mze));
268		if (rc)
269			return (rc);
270		fp->f_seekp += sizeof(mze);
271
272		if (!mze.mze_name[0])
273			goto mzap_next;
274
275		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
276		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
277		strcpy(d->d_name, mze.mze_name);
278		d->d_namlen = strlen(d->d_name);
279		return (0);
280	} else {
281		zap_leaf_t zl;
282		zap_leaf_chunk_t *zc, *nc;
283		int chunk;
284		size_t namelen;
285		char *p;
286		uint64_t value;
287
288		/*
289		 * Initialise this so we can use the ZAP size
290		 * calculating macros.
291		 */
292		zl.l_bs = ilog2(bsize);
293		zl.l_phys = fp->f_zap_leaf;
294
295		/*
296		 * Figure out which chunk we are currently looking at
297		 * and consider seeking to the next leaf. We use the
298		 * low bits of f_seekp as a simple chunk index.
299		 */
300	fzap_next:
301		chunk = fp->f_seekp & (bsize - 1);
302		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
303			fp->f_seekp = (fp->f_seekp & ~(bsize - 1)) + bsize;
304			chunk = 0;
305
306			/*
307			 * Check for EOF and read the new leaf.
308			 */
309			if (fp->f_seekp >= bsize * fp->f_num_leafs)
310				return (ENOENT);
311
312			rc = dnode_read(spa, &fp->f_dnode,
313					fp->f_seekp,
314					fp->f_zap_leaf,
315					bsize);
316			if (rc)
317				return (rc);
318		}
319
320		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
321		fp->f_seekp++;
322		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
323			goto fzap_next;
324
325		namelen = zc->l_entry.le_name_length;
326		if (namelen > sizeof(d->d_name))
327			namelen = sizeof(d->d_name);
328
329		/*
330		 * Paste the name back together.
331		 */
332		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
333		p = d->d_name;
334		while (namelen > 0) {
335			int len;
336			len = namelen;
337			if (len > ZAP_LEAF_ARRAY_BYTES)
338				len = ZAP_LEAF_ARRAY_BYTES;
339			memcpy(p, nc->l_array.la_array, len);
340			p += len;
341			namelen -= len;
342			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
343		}
344		d->d_name[sizeof(d->d_name) - 1] = 0;
345
346		/*
347		 * Assume the first eight bytes of the value are
348		 * a uint64_t.
349		 */
350		value = fzap_leaf_value(&zl, zc);
351
352		d->d_fileno = ZFS_DIRENT_OBJ(value);
353		d->d_type = ZFS_DIRENT_TYPE(value);
354		d->d_namlen = strlen(d->d_name);
355
356		return (0);
357	}
358}
359
360static int
361vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size)
362{
363	int fd;
364
365	fd = (uintptr_t) priv;
366	lseek(fd, offset, SEEK_SET);
367	if (read(fd, buf, size) == size) {
368		return 0;
369	} else {
370		return (EIO);
371	}
372}
373
374/*
375 * Convert a pool guid to a 'unit number' suitable for use with zfs_dev_open.
376 */
377int
378zfs_guid_to_unit(uint64_t guid)
379{
380	spa_t *spa;
381	int unit;
382
383	unit = 0;
384	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
385		if (spa->spa_guid == guid)
386			return unit;
387		unit++;
388	}
389	return (-1);
390}
391
392static int
393zfs_dev_init(void)
394{
395	char devname[512];
396	int unit, slice;
397	int fd;
398
399	/*
400	 * Open all the disks we can find and see if we can reconstruct
401	 * ZFS pools from them. Bogusly assumes that the disks are named
402	 * diskN, diskNpM or diskNsM.
403	 */
404	zfs_init();
405	for (unit = 0; unit < 32 /* XXX */; unit++) {
406		sprintf(devname, "disk%d:", unit);
407		fd = open(devname, O_RDONLY);
408		if (fd == -1)
409			continue;
410
411		/*
412		 * If we find a vdev, the zfs code will eat the fd, otherwise
413		 * we close it.
414		 */
415		if (vdev_probe(vdev_read, (void*) (uintptr_t) fd, 0))
416			close(fd);
417
418		for (slice = 1; slice <= 128; slice++) {
419			sprintf(devname, "disk%dp%d:", unit, slice);
420			fd = open(devname, O_RDONLY);
421			if (fd == -1) {
422				sprintf(devname, "disk%ds%d:", unit, slice);
423				fd = open(devname, O_RDONLY);
424				if (fd == -1)
425					continue;
426			}
427			if (vdev_probe(vdev_read, (void*) (uintptr_t) fd, 0))
428				close(fd);
429		}
430	}
431
432	return (0);
433}
434
435/*
436 * Print information about ZFS pools
437 */
438static void
439zfs_dev_print(int verbose)
440{
441	spa_t *spa;
442	char line[80];
443	int unit;
444
445	if (verbose) {
446		spa_all_status();
447		return;
448	}
449	unit = 0;
450	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
451		sprintf(line, "    zfs%d:   %s\n", unit, spa->spa_name);
452		pager_output(line);
453		unit++;
454	}
455}
456
457/*
458 * Attempt to open the pool described by (dev) for use by (f).
459 */
460static int
461zfs_dev_open(struct open_file *f, ...)
462{
463	va_list		args;
464	struct devdesc	*dev;
465	int		unit, i;
466	spa_t		*spa;
467
468	va_start(args, f);
469	dev = va_arg(args, struct devdesc*);
470	va_end(args);
471
472	/*
473	 * We mostly ignore the stuff that devopen sends us. For now,
474	 * use the unit to find a pool - later we will override the
475	 * devname parsing so that we can name a pool and a fs within
476	 * the pool.
477	 */
478	unit = dev->d_unit;
479
480	i = 0;
481	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
482		if (i == unit)
483			break;
484		i++;
485	}
486	if (!spa) {
487		return (ENXIO);
488	}
489
490	f->f_devdata = spa;
491	free(dev);
492	return (0);
493}
494
495static int
496zfs_dev_close(struct open_file *f)
497{
498
499	f->f_devdata = NULL;
500	return (0);
501}
502
503static int
504zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
505{
506
507	return (ENOSYS);
508}
509
510struct devsw zfs_dev = {
511	.dv_name = "zfs",
512	.dv_type = DEVT_ZFS,
513	.dv_init = zfs_dev_init,
514	.dv_strategy = zfs_dev_strategy,
515	.dv_open = zfs_dev_open,
516	.dv_close = zfs_dev_close,
517	.dv_ioctl = noioctl,
518	.dv_print = zfs_dev_print,
519	.dv_cleanup = NULL
520};
521