1/*	$NetBSD: ukfs.c,v 1.56 2011/01/02 13:01:45 pooka Exp $	*/
2
3/*
4 * Copyright (c) 2007, 2008, 2009  Antti Kantee.  All Rights Reserved.
5 *
6 * Development of this software was supported by the
7 * Finnish Cultural Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31/*
32 * This library enables access to files systems directly without
33 * involving system calls.
34 */
35
36#ifdef __linux__
37#define _XOPEN_SOURCE 500
38#define _BSD_SOURCE
39#define _FILE_OFFSET_BITS 64
40#endif
41
42#include <sys/param.h>
43#include <sys/queue.h>
44#include <sys/stat.h>
45#include <sys/sysctl.h>
46#include <sys/mount.h>
47
48#include <assert.h>
49#include <dirent.h>
50#include <dlfcn.h>
51#include <err.h>
52#include <errno.h>
53#include <fcntl.h>
54#include <pthread.h>
55#include <stdio.h>
56#include <stdlib.h>
57#include <string.h>
58#include <unistd.h>
59#include <stdint.h>
60
61#include <rump/ukfs.h>
62
63#include <rump/rump.h>
64#include <rump/rump_syscalls.h>
65
66#include "ukfs_int_disklabel.h"
67
68#define UKFS_MODE_DEFAULT 0555
69
70struct ukfs {
71	pthread_spinlock_t ukfs_spin;
72
73	struct mount *ukfs_mp;
74	struct lwp *ukfs_lwp;
75	void *ukfs_specific;
76
77	int ukfs_devfd;
78
79	char *ukfs_devpath;
80	char *ukfs_mountpath;
81	char *ukfs_cwd;
82
83	struct ukfs_part *ukfs_part;
84};
85
86static int builddirs(const char *, mode_t,
87    int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *);
88
89struct mount *
90ukfs_getmp(struct ukfs *ukfs)
91{
92
93	return ukfs->ukfs_mp;
94}
95
96void
97ukfs_setspecific(struct ukfs *ukfs, void *priv)
98{
99
100	ukfs->ukfs_specific = priv;
101}
102
103void *
104ukfs_getspecific(struct ukfs *ukfs)
105{
106
107	return ukfs->ukfs_specific;
108}
109
110#ifdef DONT_WANT_PTHREAD_LINKAGE
111#define pthread_spin_lock(a)
112#define pthread_spin_unlock(a)
113#define pthread_spin_init(a,b)
114#define pthread_spin_destroy(a)
115#endif
116
117static int
118precall(struct ukfs *ukfs, struct lwp **curlwp)
119{
120
121	/* save previous.  ensure start from pristine context */
122	*curlwp = rump_pub_lwproc_curlwp();
123	if (*curlwp)
124		rump_pub_lwproc_switch(ukfs->ukfs_lwp);
125	rump_pub_lwproc_rfork(RUMP_RFCFDG);
126
127	if (rump_sys_chroot(ukfs->ukfs_mountpath) == -1)
128		return errno;
129	if (rump_sys_chdir(ukfs->ukfs_cwd) == -1)
130		return errno;
131
132	return 0;
133}
134
135static void
136postcall(struct lwp *curlwp)
137{
138
139	rump_pub_lwproc_releaselwp();
140	if (curlwp)
141		rump_pub_lwproc_switch(curlwp);
142}
143
144#define PRECALL()							\
145struct lwp *ukfs_curlwp;						\
146do {									\
147	int ukfs_rv;							\
148	if ((ukfs_rv = precall(ukfs, &ukfs_curlwp)) != 0) {		\
149		errno = ukfs_rv;					\
150		return -1;						\
151	}								\
152} while (/*CONSTCOND*/0)
153
154#define POSTCALL() postcall(ukfs_curlwp);
155
156struct ukfs_part {
157	pthread_spinlock_t part_lck;
158	int part_refcount;
159
160	int part_type;
161	char part_labelchar;
162	off_t part_devoff;
163	off_t part_devsize;
164};
165
166enum ukfs_parttype { UKFS_PART_NONE, UKFS_PART_DISKLABEL, UKFS_PART_OFFSET };
167
168static struct ukfs_part ukfs__part_none = {
169	.part_type = UKFS_PART_NONE,
170	.part_devoff = 0,
171	.part_devsize = RUMP_ETFS_SIZE_ENDOFF,
172};
173static struct ukfs_part ukfs__part_na;
174struct ukfs_part *ukfs_part_none = &ukfs__part_none;
175struct ukfs_part *ukfs_part_na = &ukfs__part_na;
176
177#define PART2LOCKSIZE(len) ((len) == RUMP_ETFS_SIZE_ENDOFF ? 0 : (len))
178
179int
180_ukfs_init(int version)
181{
182	int rv;
183
184	if (version != UKFS_VERSION) {
185		printf("incompatible ukfs version, %d vs. %d\n",
186		    version, UKFS_VERSION);
187		errno = EPROGMISMATCH;
188		return -1;
189	}
190
191	if ((rv = rump_init()) != 0) {
192		errno = rv;
193		return -1;
194	}
195
196	return 0;
197}
198
199/*ARGSUSED*/
200static int
201rumpmkdir(struct ukfs *dummy, const char *path, mode_t mode)
202{
203
204	return rump_sys_mkdir(path, mode);
205}
206
207int
208ukfs_part_probe(char *devpath, struct ukfs_part **partp)
209{
210	struct ukfs_part *part;
211	char *p;
212	int error = 0;
213	int devfd = -1;
214
215	if ((p = strstr(devpath, UKFS_PARTITION_SCANMAGIC)) != NULL) {
216		fprintf(stderr, "ukfs: %%PART is deprecated.  use "
217		    "%%DISKLABEL instead\n");
218		errno = ENODEV;
219		return -1;
220	}
221
222	part = malloc(sizeof(*part));
223	if (part == NULL) {
224		errno = ENOMEM;
225		return -1;
226	}
227	if (pthread_spin_init(&part->part_lck, PTHREAD_PROCESS_PRIVATE) == -1) {
228		error = errno;
229		free(part);
230		errno = error;
231		return -1;
232	}
233	part->part_type = UKFS_PART_NONE;
234	part->part_refcount = 1;
235
236	/*
237	 * Check for magic in pathname:
238	 *   disklabel: /regularpath%DISKLABEL:labelchar%\0
239	 *     offsets: /regularpath%OFFSET:start,end%\0
240	 */
241#define MAGICADJ_DISKLABEL(p, n) (p+sizeof(UKFS_DISKLABEL_SCANMAGIC)-1+n)
242	if ((p = strstr(devpath, UKFS_DISKLABEL_SCANMAGIC)) != NULL
243	    && strlen(p) == UKFS_DISKLABEL_MAGICLEN
244	    && *(MAGICADJ_DISKLABEL(p,1)) == '%') {
245		if (*(MAGICADJ_DISKLABEL(p,0)) >= 'a' &&
246		    *(MAGICADJ_DISKLABEL(p,0)) < 'a' + UKFS_MAXPARTITIONS) {
247			struct ukfs__disklabel dl;
248			struct ukfs__partition *pp;
249			int imswapped;
250			char buf[65536];
251			char labelchar = *(MAGICADJ_DISKLABEL(p,0));
252			int partition = labelchar - 'a';
253			uint32_t poffset, psize;
254
255			*p = '\0';
256			devfd = open(devpath, O_RDONLY);
257			if (devfd == -1) {
258				error = errno;
259				goto out;
260			}
261
262			/* Locate the disklabel and find the partition. */
263			if (pread(devfd, buf, sizeof(buf), 0) == -1) {
264				error = errno;
265				goto out;
266			}
267
268			if (ukfs__disklabel_scan(&dl, &imswapped,
269			    buf, sizeof(buf)) != 0) {
270				error = ENOENT;
271				goto out;
272			}
273
274			if (dl.d_npartitions < partition) {
275				error = ENOENT;
276				goto out;
277			}
278
279			pp = &dl.d_partitions[partition];
280			part->part_type = UKFS_PART_DISKLABEL;
281			part->part_labelchar = labelchar;
282			if (imswapped) {
283				poffset = bswap32(pp->p_offset);
284				psize = bswap32(pp->p_size);
285			} else {
286				poffset = pp->p_offset;
287				psize = pp->p_size;
288			}
289			part->part_devoff = poffset << DEV_BSHIFT;
290			part->part_devsize = psize << DEV_BSHIFT;
291		} else {
292			error = EINVAL;
293		}
294#define MAGICADJ_OFFSET(p, n) (p+sizeof(UKFS_OFFSET_SCANMAGIC)-1+n)
295	} else if (((p = strstr(devpath, UKFS_OFFSET_SCANMAGIC)) != NULL)
296	    && (strlen(p) >= UKFS_OFFSET_MINLEN)) {
297		char *comma, *pers, *ep, *nptr;
298		u_quad_t val;
299
300		comma = strchr(p, ',');
301		if (comma == NULL) {
302			error = EINVAL;
303			goto out;
304		}
305		pers = strchr(comma, '%');
306		if (pers == NULL) {
307			error = EINVAL;
308			goto out;
309		}
310		*comma = '\0';
311		*pers = '\0';
312		*p = '\0';
313
314		nptr = MAGICADJ_OFFSET(p,0);
315		/* check if string is negative */
316		if (*nptr == '-') {
317			error = ERANGE;
318			goto out;
319		}
320		val = strtouq(nptr, &ep, 10);
321		if (val == UQUAD_MAX) {
322			error = ERANGE;
323			goto out;
324		}
325		if (*ep != '\0') {
326			error = EADDRNOTAVAIL; /* creative ;) */
327			goto out;
328		}
329		part->part_devoff = val;
330
331		/* omstart */
332
333		nptr = comma+1;
334		/* check if string is negative */
335		if (*nptr == '-') {
336			error = ERANGE;
337			goto out;
338		}
339		val = strtouq(nptr, &ep, 10);
340		if (val == UQUAD_MAX) {
341			error = ERANGE;
342			goto out;
343		}
344		if (*ep != '\0') {
345			error = EADDRNOTAVAIL; /* creative ;) */
346			goto out;
347		}
348		part->part_devsize = val;
349		part->part_type = UKFS_PART_OFFSET;
350	} else {
351		ukfs_part_release(part);
352		part = ukfs_part_none;
353	}
354
355 out:
356	if (devfd != -1)
357		close(devfd);
358	if (error) {
359		free(part);
360		errno = error;
361	} else {
362		*partp = part;
363	}
364
365	return error ? -1 : 0;
366}
367
368int
369ukfs_part_tostring(struct ukfs_part *part, char *str, size_t strsize)
370{
371	int rv;
372
373	*str = '\0';
374	/* "pseudo" values */
375	if (part == ukfs_part_na) {
376		errno = EINVAL;
377		return -1;
378	}
379	if (part == ukfs_part_none)
380		return 0;
381
382	rv = 0;
383	switch (part->part_type) {
384	case UKFS_PART_NONE:
385		break;
386
387	case UKFS_PART_DISKLABEL:
388		snprintf(str, strsize, "%%DISKLABEL:%c%%",part->part_labelchar);
389		rv = 1;
390		break;
391
392	case UKFS_PART_OFFSET:
393		snprintf(str, strsize, "[%llu,%llu]",
394		    (unsigned long long)part->part_devoff,
395		    (unsigned long long)(part->part_devoff+part->part_devsize));
396		rv = 1;
397		break;
398	}
399
400	return rv;
401}
402
403static void
404unlockdev(int fd, struct ukfs_part *part)
405{
406	struct flock flarg;
407
408	if (part == ukfs_part_na)
409		return;
410
411	memset(&flarg, 0, sizeof(flarg));
412	flarg.l_type = F_UNLCK;
413	flarg.l_whence = SEEK_SET;
414	flarg.l_start = part->part_devoff;
415	flarg.l_len = PART2LOCKSIZE(part->part_devsize);
416	if (fcntl(fd, F_SETLK, &flarg) == -1)
417		warn("ukfs: cannot unlock device file");
418}
419
420/*
421 * Open the disk file and flock it.  Also, if we are operation on
422 * an embedded partition, find the partition offset and size from
423 * the disklabel.
424 *
425 * We hard-fail only in two cases:
426 *  1) we failed to get the partition info out (don't know what offset
427 *     to mount from)
428 *  2) we failed to flock the source device (i.e. fcntl() fails,
429 *     not e.g. open() before it)
430 *
431 * Otherwise we let the code proceed to mount and let the file system
432 * throw the proper error.  The only questionable bit is that if we
433 * soft-fail before flock and mount does succeed...
434 *
435 * Returns: -1 error (errno reports error code)
436 *           0 success
437 *
438 * dfdp: -1  device is not open
439 *        n  device is open
440 */
441static int
442process_diskdevice(const char *devpath, struct ukfs_part *part, int rdonly,
443	int *dfdp)
444{
445	struct stat sb;
446	int rv = 0, devfd;
447
448	/* defaults */
449	*dfdp = -1;
450
451	devfd = open(devpath, rdonly ? O_RDONLY : O_RDWR);
452	if (devfd == -1) {
453		rv = errno;
454		goto out;
455	}
456
457	if (fstat(devfd, &sb) == -1) {
458		rv = errno;
459		goto out;
460	}
461
462	/*
463	 * We do this only for non-block device since the
464	 * (NetBSD) kernel allows block device open only once.
465	 * We also need to close the device for fairly obvious reasons.
466	 */
467	if (!S_ISBLK(sb.st_mode)) {
468		struct flock flarg;
469
470		memset(&flarg, 0, sizeof(flarg));
471		flarg.l_type = rdonly ? F_RDLCK : F_WRLCK;
472		flarg.l_whence = SEEK_SET;
473		flarg.l_start = part->part_devoff;
474		flarg.l_len = PART2LOCKSIZE(part->part_devsize);
475		if (fcntl(devfd, F_SETLK, &flarg) == -1) {
476			pid_t holder;
477			int sverrno;
478
479			sverrno = errno;
480			if (fcntl(devfd, F_GETLK, &flarg) != 1)
481				holder = flarg.l_pid;
482			else
483				holder = -1;
484			warnx("ukfs_mount: cannot lock device.  held by pid %d",
485			    holder);
486			rv = sverrno;
487			goto out;
488		}
489	} else {
490		close(devfd);
491		devfd = -1;
492	}
493	*dfdp = devfd;
494
495 out:
496	if (rv) {
497		if (devfd != -1)
498			close(devfd);
499	}
500
501	return rv;
502}
503
504struct mountinfo {
505	const char *mi_vfsname;
506	const char *mi_mountpath;
507	int mi_mntflags;
508	void *mi_arg;
509	size_t mi_alen;
510	int *mi_error;
511};
512static void *
513mfs_mounter(void *arg)
514{
515	struct mountinfo *mi = arg;
516	int rv;
517
518	rv = rump_sys_mount(mi->mi_vfsname, mi->mi_mountpath, mi->mi_mntflags,
519	    mi->mi_arg, mi->mi_alen);
520	if (rv) {
521		warn("mfs mount failed.  fix me.");
522		abort(); /* XXX */
523	}
524
525	return NULL;
526}
527
528static struct ukfs *
529doukfsmount(const char *vfsname, const char *devpath, struct ukfs_part *part,
530	const char *mountpath, int mntflags, void *arg, size_t alen)
531{
532	struct ukfs *fs = NULL;
533	struct lwp *curlwp;
534	int rv = 0, devfd = -1;
535	int mounted = 0;
536	int regged = 0;
537
538	pthread_spin_lock(&part->part_lck);
539	part->part_refcount++;
540	pthread_spin_unlock(&part->part_lck);
541	if (part != ukfs_part_na) {
542		if ((rv = process_diskdevice(devpath, part,
543		    mntflags & MNT_RDONLY, &devfd)) != 0)
544			goto out;
545	}
546
547	fs = malloc(sizeof(struct ukfs));
548	if (fs == NULL) {
549		rv = ENOMEM;
550		goto out;
551	}
552	memset(fs, 0, sizeof(struct ukfs));
553
554	/* create our mountpoint.  this is never removed. */
555	if (builddirs(mountpath, 0777, rumpmkdir, NULL) == -1) {
556		if (errno != EEXIST) {
557			rv = errno;
558			goto out;
559		}
560	}
561
562	if (part != ukfs_part_na) {
563		/* LINTED */
564		rv = rump_pub_etfs_register_withsize(devpath, devpath,
565		    RUMP_ETFS_BLK, part->part_devoff, part->part_devsize);
566		if (rv) {
567			goto out;
568		}
569		regged = 1;
570	}
571
572	/*
573	 * MFS is special since mount(2) doesn't return.  Hence, we
574	 * create a thread here.  Could fix mfs to return, but there's
575	 * too much history for me to bother.
576	 */
577	if (strcmp(vfsname, MOUNT_MFS) == 0) {
578		pthread_t pt;
579		struct mountinfo mi;
580		int i;
581
582		mi.mi_vfsname = vfsname;
583		mi.mi_mountpath = mountpath;
584		mi.mi_mntflags = mntflags;
585		mi.mi_arg = arg;
586		mi.mi_alen = alen;
587
588		if (pthread_create(&pt, NULL, mfs_mounter, &mi) == -1) {
589			rv = errno;
590			goto out;
591		}
592
593		for (i = 0;i < 100000; i++) {
594			struct statvfs svfsb;
595
596			rv = rump_sys_statvfs1(mountpath, &svfsb, ST_WAIT);
597			if (rv == -1) {
598				rv = errno;
599				goto out;
600			}
601
602			if (strcmp(svfsb.f_mntonname, mountpath) == 0 &&
603			    strcmp(svfsb.f_fstypename, MOUNT_MFS) == 0) {
604				break;
605			}
606			usleep(1);
607		}
608	} else {
609		rv = rump_sys_mount(vfsname, mountpath, mntflags, arg, alen);
610		if (rv) {
611			rv = errno;
612			goto out;
613		}
614	}
615
616	mounted = 1;
617	rv = rump_pub_vfs_getmp(mountpath, &fs->ukfs_mp);
618	if (rv) {
619		goto out;
620	}
621
622	if (regged) {
623		fs->ukfs_devpath = strdup(devpath);
624	}
625	fs->ukfs_mountpath = strdup(mountpath);
626	pthread_spin_init(&fs->ukfs_spin, PTHREAD_PROCESS_SHARED);
627	fs->ukfs_devfd = devfd;
628	fs->ukfs_part = part;
629	assert(rv == 0);
630
631	curlwp = rump_pub_lwproc_curlwp();
632	rump_pub_lwproc_newlwp(0);
633	fs->ukfs_lwp = rump_pub_lwproc_curlwp();
634	fs->ukfs_cwd = strdup("/");
635	rump_pub_lwproc_switch(curlwp);
636
637 out:
638	if (rv) {
639		if (fs) {
640			free(fs);
641			fs = NULL;
642		}
643		if (mounted)
644			rump_sys_unmount(mountpath, MNT_FORCE);
645		if (regged)
646			rump_pub_etfs_remove(devpath);
647		if (devfd != -1) {
648			unlockdev(devfd, part);
649			close(devfd);
650		}
651		ukfs_part_release(part);
652		errno = rv;
653	}
654
655	return fs;
656}
657
658struct ukfs *
659ukfs_mount(const char *vfsname, const char *devpath,
660	const char *mountpath, int mntflags, void *arg, size_t alen)
661{
662
663	return doukfsmount(vfsname, devpath, ukfs_part_na,
664	    mountpath, mntflags, arg, alen);
665}
666
667struct ukfs *
668ukfs_mount_disk(const char *vfsname, const char *devpath,
669	struct ukfs_part *part, const char *mountpath, int mntflags,
670	void *arg, size_t alen)
671{
672
673	return doukfsmount(vfsname, devpath, part,
674	    mountpath, mntflags, arg, alen);
675}
676
677int
678ukfs_release(struct ukfs *fs, int flags)
679{
680	struct lwp *curlwp = rump_pub_lwproc_curlwp();
681
682	/* get root lwp */
683	rump_pub_lwproc_switch(fs->ukfs_lwp);
684	rump_pub_lwproc_rfork(RUMP_RFCFDG);
685
686	if ((flags & UKFS_RELFLAG_NOUNMOUNT) == 0) {
687		int rv, mntflag, error;
688
689		mntflag = 0;
690		if (flags & UKFS_RELFLAG_FORCE)
691			mntflag = MNT_FORCE;
692
693		rv = rump_sys_unmount(fs->ukfs_mountpath, mntflag);
694		if (rv == -1) {
695			error = errno;
696			rump_pub_lwproc_releaselwp();
697			if (curlwp)
698				rump_pub_lwproc_switch(curlwp);
699			errno = error;
700			return -1;
701		}
702	}
703
704	if (fs->ukfs_devpath) {
705		rump_pub_etfs_remove(fs->ukfs_devpath);
706		free(fs->ukfs_devpath);
707	}
708	free(fs->ukfs_mountpath);
709	free(fs->ukfs_cwd);
710
711	/* release this routine's lwp and ukfs base lwp */
712	rump_pub_lwproc_releaselwp();
713	rump_pub_lwproc_switch(fs->ukfs_lwp);
714	rump_pub_lwproc_releaselwp();
715
716	pthread_spin_destroy(&fs->ukfs_spin);
717	if (fs->ukfs_devfd != -1) {
718		unlockdev(fs->ukfs_devfd, fs->ukfs_part);
719		close(fs->ukfs_devfd);
720	}
721	ukfs_part_release(fs->ukfs_part);
722	free(fs);
723
724	if (curlwp)
725		rump_pub_lwproc_switch(curlwp);
726
727	return 0;
728}
729
730void
731ukfs_part_release(struct ukfs_part *part)
732{
733	int release;
734
735	if (part != ukfs_part_none && part != ukfs_part_na) {
736		pthread_spin_lock(&part->part_lck);
737		release = --part->part_refcount == 0;
738		pthread_spin_unlock(&part->part_lck);
739		if (release) {
740			pthread_spin_destroy(&part->part_lck);
741			free(part);
742		}
743	}
744}
745
746#define STDCALL(ukfs, thecall)						\
747	int rv = 0;							\
748									\
749	PRECALL();							\
750	rv = thecall;							\
751	POSTCALL();							\
752	return rv;
753
754int
755ukfs_opendir(struct ukfs *ukfs, const char *dirname, struct ukfs_dircookie **c)
756{
757	struct vnode *vp;
758	int rv;
759
760	PRECALL();
761	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
762	    NULL, &vp, NULL);
763	POSTCALL();
764
765	if (rv == 0) {
766		RUMP_VOP_UNLOCK(vp);
767	} else {
768		errno = rv;
769		rv = -1;
770	}
771
772	/*LINTED*/
773	*c = (struct ukfs_dircookie *)vp;
774	return rv;
775}
776
777static int
778getmydents(struct vnode *vp, off_t *off, uint8_t *buf, size_t bufsize)
779{
780	struct uio *uio;
781	size_t resid;
782	int rv, eofflag;
783	struct kauth_cred *cred;
784
785	uio = rump_pub_uio_setup(buf, bufsize, *off, RUMPUIO_READ);
786	cred = rump_pub_cred_create(0, 0, 0, NULL);
787	rv = RUMP_VOP_READDIR(vp, uio, cred, &eofflag, NULL, NULL);
788	rump_pub_cred_put(cred);
789	RUMP_VOP_UNLOCK(vp);
790	*off = rump_pub_uio_getoff(uio);
791	resid = rump_pub_uio_free(uio);
792
793	if (rv) {
794		errno = rv;
795		return -1;
796	}
797
798	/* LINTED: not totally correct return type, but follows syscall */
799	return bufsize - resid;
800}
801
802/*ARGSUSED*/
803int
804ukfs_getdents_cookie(struct ukfs *ukfs, struct ukfs_dircookie *c, off_t *off,
805	uint8_t *buf, size_t bufsize)
806{
807	/*LINTED*/
808	struct vnode *vp = (struct vnode *)c;
809
810	RUMP_VOP_LOCK(vp, RUMP_LK_SHARED);
811	return getmydents(vp, off, buf, bufsize);
812}
813
814int
815ukfs_getdents(struct ukfs *ukfs, const char *dirname, off_t *off,
816	uint8_t *buf, size_t bufsize)
817{
818	struct vnode *vp;
819	int rv;
820
821	PRECALL();
822	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
823	    NULL, &vp, NULL);
824	if (rv) {
825		POSTCALL();
826		errno = rv;
827		return -1;
828	}
829
830	rv = getmydents(vp, off, buf, bufsize);
831	rump_pub_vp_rele(vp);
832	POSTCALL();
833	return rv;
834}
835
836/*ARGSUSED*/
837int
838ukfs_closedir(struct ukfs *ukfs, struct ukfs_dircookie *c)
839{
840
841	/*LINTED*/
842	rump_pub_vp_rele((struct vnode *)c);
843	return 0;
844}
845
846int
847ukfs_open(struct ukfs *ukfs, const char *filename, int flags)
848{
849	int fd;
850
851	PRECALL();
852	fd = rump_sys_open(filename, flags, 0);
853	POSTCALL();
854	if (fd == -1)
855		return -1;
856
857	return fd;
858}
859
860ssize_t
861ukfs_read(struct ukfs *ukfs, const char *filename, off_t off,
862	uint8_t *buf, size_t bufsize)
863{
864	int fd;
865	ssize_t xfer = -1; /* XXXgcc */
866
867	PRECALL();
868	fd = rump_sys_open(filename, RUMP_O_RDONLY, 0);
869	if (fd == -1)
870		goto out;
871
872	xfer = rump_sys_pread(fd, buf, bufsize, off);
873	rump_sys_close(fd);
874
875 out:
876	POSTCALL();
877	if (fd == -1) {
878		return -1;
879	}
880	return xfer;
881}
882
883/*ARGSUSED*/
884ssize_t
885ukfs_read_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen)
886{
887
888	return rump_sys_pread(fd, buf, buflen, off);
889}
890
891ssize_t
892ukfs_write(struct ukfs *ukfs, const char *filename, off_t off,
893	uint8_t *buf, size_t bufsize)
894{
895	int fd;
896	ssize_t xfer = -1; /* XXXgcc */
897
898	PRECALL();
899	fd = rump_sys_open(filename, RUMP_O_WRONLY, 0);
900	if (fd == -1)
901		goto out;
902
903	/* write and commit */
904	xfer = rump_sys_pwrite(fd, buf, bufsize, off);
905	if (xfer > 0)
906		rump_sys_fsync(fd);
907
908	rump_sys_close(fd);
909
910 out:
911	POSTCALL();
912	if (fd == -1) {
913		return -1;
914	}
915	return xfer;
916}
917
918/*ARGSUSED*/
919ssize_t
920ukfs_write_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen,
921	int dosync)
922{
923	ssize_t xfer;
924
925	xfer = rump_sys_pwrite(fd, buf, buflen, off);
926	if (xfer > 0 && dosync)
927		rump_sys_fsync(fd);
928
929	return xfer;
930}
931
932/*ARGSUSED*/
933int
934ukfs_close(struct ukfs *ukfs, int fd)
935{
936
937	rump_sys_close(fd);
938	return 0;
939}
940
941int
942ukfs_create(struct ukfs *ukfs, const char *filename, mode_t mode)
943{
944	int fd;
945
946	PRECALL();
947	fd = rump_sys_open(filename, RUMP_O_WRONLY | RUMP_O_CREAT, mode);
948	if (fd == -1)
949		return -1;
950	rump_sys_close(fd);
951
952	POSTCALL();
953	return 0;
954}
955
956int
957ukfs_mknod(struct ukfs *ukfs, const char *path, mode_t mode, dev_t dev)
958{
959
960	STDCALL(ukfs, rump_sys_mknod(path, mode, dev));
961}
962
963int
964ukfs_mkfifo(struct ukfs *ukfs, const char *path, mode_t mode)
965{
966
967	STDCALL(ukfs, rump_sys_mkfifo(path, mode));
968}
969
970int
971ukfs_mkdir(struct ukfs *ukfs, const char *filename, mode_t mode)
972{
973
974	STDCALL(ukfs, rump_sys_mkdir(filename, mode));
975}
976
977int
978ukfs_remove(struct ukfs *ukfs, const char *filename)
979{
980
981	STDCALL(ukfs, rump_sys_unlink(filename));
982}
983
984int
985ukfs_rmdir(struct ukfs *ukfs, const char *filename)
986{
987
988	STDCALL(ukfs, rump_sys_rmdir(filename));
989}
990
991int
992ukfs_link(struct ukfs *ukfs, const char *filename, const char *f_create)
993{
994
995	STDCALL(ukfs, rump_sys_link(filename, f_create));
996}
997
998int
999ukfs_symlink(struct ukfs *ukfs, const char *filename, const char *linkname)
1000{
1001
1002	STDCALL(ukfs, rump_sys_symlink(filename, linkname));
1003}
1004
1005ssize_t
1006ukfs_readlink(struct ukfs *ukfs, const char *filename,
1007	char *linkbuf, size_t buflen)
1008{
1009	ssize_t rv;
1010
1011	PRECALL();
1012	rv = rump_sys_readlink(filename, linkbuf, buflen);
1013	POSTCALL();
1014	return rv;
1015}
1016
1017int
1018ukfs_rename(struct ukfs *ukfs, const char *from, const char *to)
1019{
1020
1021	STDCALL(ukfs, rump_sys_rename(from, to));
1022}
1023
1024int
1025ukfs_chdir(struct ukfs *ukfs, const char *path)
1026{
1027	char *newpath, *oldpath;
1028	int rv;
1029
1030	PRECALL();
1031	rv = rump_sys_chdir(path);
1032	if (rv == -1)
1033		goto out;
1034
1035	newpath = malloc(MAXPATHLEN);
1036	if (rump_sys___getcwd(newpath, MAXPATHLEN) == -1) {
1037		goto out;
1038	}
1039
1040	pthread_spin_lock(&ukfs->ukfs_spin);
1041	oldpath = ukfs->ukfs_cwd;
1042	ukfs->ukfs_cwd = newpath;
1043	pthread_spin_unlock(&ukfs->ukfs_spin);
1044	free(oldpath);
1045
1046 out:
1047	POSTCALL();
1048	return rv;
1049}
1050
1051int
1052ukfs_stat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
1053{
1054	int rv;
1055
1056	PRECALL();
1057	rv = rump_sys_stat(filename, file_stat);
1058	POSTCALL();
1059
1060	return rv;
1061}
1062
1063int
1064ukfs_lstat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
1065{
1066	int rv;
1067
1068	PRECALL();
1069	rv = rump_sys_lstat(filename, file_stat);
1070	POSTCALL();
1071
1072	return rv;
1073}
1074
1075int
1076ukfs_chmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1077{
1078
1079	STDCALL(ukfs, rump_sys_chmod(filename, mode));
1080}
1081
1082int
1083ukfs_lchmod(struct ukfs *ukfs, const char *filename, mode_t mode)
1084{
1085
1086	STDCALL(ukfs, rump_sys_lchmod(filename, mode));
1087}
1088
1089int
1090ukfs_chown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1091{
1092
1093	STDCALL(ukfs, rump_sys_chown(filename, uid, gid));
1094}
1095
1096int
1097ukfs_lchown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
1098{
1099
1100	STDCALL(ukfs, rump_sys_lchown(filename, uid, gid));
1101}
1102
1103int
1104ukfs_chflags(struct ukfs *ukfs, const char *filename, u_long flags)
1105{
1106
1107	STDCALL(ukfs, rump_sys_chflags(filename, flags));
1108}
1109
1110int
1111ukfs_lchflags(struct ukfs *ukfs, const char *filename, u_long flags)
1112{
1113
1114	STDCALL(ukfs, rump_sys_lchflags(filename, flags));
1115}
1116
1117int
1118ukfs_utimes(struct ukfs *ukfs, const char *filename, const struct timeval *tptr)
1119{
1120
1121	STDCALL(ukfs, rump_sys_utimes(filename, tptr));
1122}
1123
1124int
1125ukfs_lutimes(struct ukfs *ukfs, const char *filename,
1126	      const struct timeval *tptr)
1127{
1128
1129	STDCALL(ukfs, rump_sys_lutimes(filename, tptr));
1130}
1131
1132/*
1133 * Dynamic module support
1134 */
1135
1136/* load one library */
1137
1138/*
1139 * XXX: the dlerror stuff isn't really threadsafe, but then again I
1140 * can't protect against other threads calling dl*() outside of ukfs,
1141 * so just live with it being flimsy
1142 */
1143int
1144ukfs_modload(const char *fname)
1145{
1146	void *handle;
1147	const struct modinfo *const *mi_start, *const *mi_end;
1148	int error;
1149
1150	handle = dlopen(fname, RTLD_LAZY|RTLD_GLOBAL);
1151	if (handle == NULL) {
1152		const char *dlmsg = dlerror();
1153		if (strstr(dlmsg, "Undefined symbol"))
1154			return 0;
1155		warnx("dlopen %s failed: %s\n", fname, dlmsg);
1156		/* XXXerrno */
1157		return -1;
1158	}
1159
1160	mi_start = dlsym(handle, "__start_link_set_modules");
1161	mi_end = dlsym(handle, "__stop_link_set_modules");
1162	if (mi_start && mi_end) {
1163		error = rump_pub_module_init(mi_start,
1164		    (size_t)(mi_end-mi_start));
1165		if (error)
1166			goto errclose;
1167		return 1;
1168	}
1169	error = EINVAL;
1170
1171 errclose:
1172	dlclose(handle);
1173	errno = error;
1174	return -1;
1175}
1176
1177struct loadfail {
1178	char *pname;
1179
1180	LIST_ENTRY(loadfail) entries;
1181};
1182
1183#define RUMPFSMOD_PREFIX "librumpfs_"
1184#define RUMPFSMOD_SUFFIX ".so"
1185
1186int
1187ukfs_modload_dir(const char *dir)
1188{
1189	char nbuf[MAXPATHLEN+1], *p;
1190	struct dirent entry, *result;
1191	DIR *libdir;
1192	struct loadfail *lf, *nlf;
1193	int error, nloaded = 0, redo;
1194	LIST_HEAD(, loadfail) lfs;
1195
1196	libdir = opendir(dir);
1197	if (libdir == NULL)
1198		return -1;
1199
1200	LIST_INIT(&lfs);
1201	for (;;) {
1202		if ((error = readdir_r(libdir, &entry, &result)) != 0)
1203			break;
1204		if (!result)
1205			break;
1206		if (strncmp(result->d_name, RUMPFSMOD_PREFIX,
1207		    strlen(RUMPFSMOD_PREFIX)) != 0)
1208			continue;
1209		if (((p = strstr(result->d_name, RUMPFSMOD_SUFFIX)) == NULL)
1210		    || strlen(p) != strlen(RUMPFSMOD_SUFFIX))
1211			continue;
1212		strlcpy(nbuf, dir, sizeof(nbuf));
1213		strlcat(nbuf, "/", sizeof(nbuf));
1214		strlcat(nbuf, result->d_name, sizeof(nbuf));
1215		switch (ukfs_modload(nbuf)) {
1216		case 0:
1217			lf = malloc(sizeof(*lf));
1218			if (lf == NULL) {
1219				error = ENOMEM;
1220				break;
1221			}
1222			lf->pname = strdup(nbuf);
1223			if (lf->pname == NULL) {
1224				free(lf);
1225				error = ENOMEM;
1226				break;
1227			}
1228			LIST_INSERT_HEAD(&lfs, lf, entries);
1229			break;
1230		case 1:
1231			nloaded++;
1232			break;
1233		default:
1234			/* ignore errors */
1235			break;
1236		}
1237	}
1238	closedir(libdir);
1239	if (error && nloaded != 0)
1240		error = 0;
1241
1242	/*
1243	 * El-cheapo dependency calculator.  Just try to load the
1244	 * modules n times in a loop
1245	 */
1246	for (redo = 1; redo;) {
1247		redo = 0;
1248		nlf = LIST_FIRST(&lfs);
1249		while ((lf = nlf) != NULL) {
1250			nlf = LIST_NEXT(lf, entries);
1251			if (ukfs_modload(lf->pname) == 1) {
1252				nloaded++;
1253				redo = 1;
1254				LIST_REMOVE(lf, entries);
1255				free(lf->pname);
1256				free(lf);
1257			}
1258		}
1259	}
1260
1261	while ((lf = LIST_FIRST(&lfs)) != NULL) {
1262		LIST_REMOVE(lf, entries);
1263		free(lf->pname);
1264		free(lf);
1265	}
1266
1267	if (error && nloaded == 0) {
1268		errno = error;
1269		return -1;
1270	}
1271
1272	return nloaded;
1273}
1274
1275/* XXX: this code uses definitions from NetBSD, needs rumpdefs */
1276ssize_t
1277ukfs_vfstypes(char *buf, size_t buflen)
1278{
1279	int mib[3];
1280	struct sysctlnode q, ans[128];
1281	size_t alen;
1282	int i;
1283
1284	mib[0] = CTL_VFS;
1285	mib[1] = VFS_GENERIC;
1286	mib[2] = CTL_QUERY;
1287	alen = sizeof(ans);
1288
1289	memset(&q, 0, sizeof(q));
1290	q.sysctl_flags = SYSCTL_VERSION;
1291
1292	if (rump_sys___sysctl(mib, 3, ans, &alen, &q, sizeof(q)) == -1) {
1293		return -1;
1294	}
1295
1296	for (i = 0; i < alen/sizeof(ans[0]); i++)
1297		if (strcmp("fstypes", ans[i].sysctl_name) == 0)
1298			break;
1299	if (i == alen/sizeof(ans[0])) {
1300		errno = ENXIO;
1301		return -1;
1302	}
1303
1304	mib[0] = CTL_VFS;
1305	mib[1] = VFS_GENERIC;
1306	mib[2] = ans[i].sysctl_num;
1307
1308	if (rump_sys___sysctl(mib, 3, buf, &buflen, NULL, 0) == -1) {
1309		return -1;
1310	}
1311
1312	return buflen;
1313}
1314
1315/*
1316 * Utilities
1317 */
1318static int
1319builddirs(const char *pathname, mode_t mode,
1320	int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *fs)
1321{
1322	char *f1, *f2;
1323	int rv;
1324	mode_t mask;
1325	bool end;
1326
1327	/*ukfs_umask((mask = ukfs_umask(0)));*/
1328	umask((mask = umask(0)));
1329
1330	f1 = f2 = strdup(pathname);
1331	if (f1 == NULL) {
1332		errno = ENOMEM;
1333		return -1;
1334	}
1335
1336	end = false;
1337	for (;;) {
1338		/* find next component */
1339		f2 += strspn(f2, "/");
1340		f2 += strcspn(f2, "/");
1341		if (*f2 == '\0')
1342			end = true;
1343		else
1344			*f2 = '\0';
1345
1346		rv = mkdirfn(fs, f1, mode & ~mask);
1347		if (errno == EEXIST)
1348			rv = 0;
1349
1350		if (rv == -1 || *f2 != '\0' || end)
1351			break;
1352
1353		*f2 = '/';
1354	}
1355
1356	free(f1);
1357
1358	return rv;
1359}
1360
1361int
1362ukfs_util_builddirs(struct ukfs *ukfs, const char *pathname, mode_t mode)
1363{
1364
1365	return builddirs(pathname, mode, ukfs_mkdir, ukfs);
1366}
1367