1/*	$NetBSD: puffs.c,v 1.116 2011/05/03 13:16:47 manu Exp $	*/
2
3/*
4 * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
5 *
6 * Development of this software was supported by the
7 * Google Summer of Code program and the Ulla Tuominen Foundation.
8 * The Google SoC project was mentored by Bill Studenmund.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33#if !defined(lint)
34__RCSID("$NetBSD: puffs.c,v 1.116 2011/05/03 13:16:47 manu Exp $");
35#endif /* !lint */
36
37#include <sys/param.h>
38#include <sys/mount.h>
39
40#include <assert.h>
41#include <err.h>
42#include <errno.h>
43#include <fcntl.h>
44#include <mntopts.h>
45#include <paths.h>
46#include <pthread.h>
47#include <puffs.h>
48#include <stdio.h>
49#include <stdlib.h>
50#include <string.h>
51#include <syslog.h>
52#include <unistd.h>
53
54#include "puffs_priv.h"
55
56/* Most file systems want this for opts, so just give it to them */
57const struct mntopt puffsmopts[] = {
58	MOPT_STDOPTS,
59	PUFFSMOPT_STD,
60	MOPT_NULL,
61};
62
63pthread_mutex_t pu_lock = PTHREAD_MUTEX_INITIALIZER;
64
65#define FILLOP(lower, upper)						\
66do {									\
67	if (pops->puffs_node_##lower)					\
68		opmask[PUFFS_VN_##upper] = 1;				\
69} while (/*CONSTCOND*/0)
70static void
71fillvnopmask(struct puffs_ops *pops, struct puffs_kargs *pa)
72{
73	uint8_t *opmask = pa->pa_vnopmask;
74
75	memset(opmask, 0, sizeof(pa->pa_vnopmask));
76
77	FILLOP(create,   CREATE);
78	FILLOP(mknod,    MKNOD);
79	FILLOP(open,     OPEN);
80	FILLOP(close,    CLOSE);
81	FILLOP(access,   ACCESS);
82	FILLOP(getattr,  GETATTR);
83	FILLOP(setattr,  SETATTR);
84	FILLOP(poll,     POLL);
85	FILLOP(mmap,     MMAP);
86	FILLOP(fsync,    FSYNC);
87	FILLOP(seek,     SEEK);
88	FILLOP(remove,   REMOVE);
89	FILLOP(link,     LINK);
90	FILLOP(rename,   RENAME);
91	FILLOP(mkdir,    MKDIR);
92	FILLOP(rmdir,    RMDIR);
93	FILLOP(symlink,  SYMLINK);
94	FILLOP(readdir,  READDIR);
95	FILLOP(readlink, READLINK);
96	FILLOP(reclaim,  RECLAIM);
97	FILLOP(inactive, INACTIVE);
98	FILLOP(print,    PRINT);
99	FILLOP(read,     READ);
100	FILLOP(write,    WRITE);
101	FILLOP(advlock,  ADVLOCK);
102	FILLOP(abortop,  ABORTOP);
103	FILLOP(pathconf, PATHCONF);
104
105	FILLOP(getextattr,  GETEXTATTR);
106	FILLOP(setextattr,  SETEXTATTR);
107	FILLOP(listextattr, LISTEXTATTR);
108	FILLOP(deleteextattr, DELETEEXTATTR);
109}
110#undef FILLOP
111
112/*
113 * Go over all framev entries and write everything we can.  This is
114 * mostly for the benefit of delivering "unmount" to the kernel.
115 */
116static void
117finalpush(struct puffs_usermount *pu)
118{
119	struct puffs_fctrl_io *fio;
120
121	LIST_FOREACH(fio, &pu->pu_ios, fio_entries) {
122		if (fio->stat & FIO_WRGONE)
123			continue;
124
125		puffs__framev_output(pu, fio->fctrl, fio);
126	}
127}
128
129/*ARGSUSED*/
130void
131puffs_kernerr_abort(struct puffs_usermount *pu, uint8_t type,
132	int error, const char *str, puffs_cookie_t cookie)
133{
134
135	fprintf(stderr, "abort: type %d, error %d, cookie %p (%s)\n",
136	    type, error, cookie, str);
137	abort();
138}
139
140/*ARGSUSED*/
141void
142puffs_kernerr_log(struct puffs_usermount *pu, uint8_t type,
143	int error, const char *str, puffs_cookie_t cookie)
144{
145
146	syslog(LOG_WARNING, "kernel: type %d, error %d, cookie %p (%s)\n",
147	    type, error, cookie, str);
148}
149
150int
151puffs_getselectable(struct puffs_usermount *pu)
152{
153
154	return pu->pu_fd;
155}
156
157uint64_t
158puffs__nextreq(struct puffs_usermount *pu)
159{
160	uint64_t rv;
161
162	PU_LOCK();
163	rv = pu->pu_nextreq++ | (uint64_t)1<<63;
164	PU_UNLOCK();
165
166	return rv;
167}
168
169int
170puffs_setblockingmode(struct puffs_usermount *pu, int mode)
171{
172	int rv, x;
173
174	assert(puffs_getstate(pu) == PUFFS_STATE_RUNNING);
175
176	if (mode != PUFFSDEV_BLOCK && mode != PUFFSDEV_NONBLOCK) {
177		errno = EINVAL;
178		return -1;
179	}
180
181	x = mode;
182	rv = ioctl(pu->pu_fd, FIONBIO, &x);
183
184	if (rv == 0) {
185		if (mode == PUFFSDEV_BLOCK)
186			pu->pu_state &= ~PU_ASYNCFD;
187		else
188			pu->pu_state |= PU_ASYNCFD;
189	}
190
191	return rv;
192}
193
194int
195puffs_getstate(struct puffs_usermount *pu)
196{
197
198	return pu->pu_state & PU_STATEMASK;
199}
200
201void
202puffs_setstacksize(struct puffs_usermount *pu, size_t ss)
203{
204	long psize, minsize;
205	int stackshift;
206	int bonus;
207
208	assert(puffs_getstate(pu) == PUFFS_STATE_BEFOREMOUNT);
209
210	psize = sysconf(_SC_PAGESIZE);
211	minsize = 4*psize;
212	if (ss < (size_t)minsize || ss == PUFFS_STACKSIZE_MIN) {
213		if (ss != PUFFS_STACKSIZE_MIN)
214			fprintf(stderr, "puffs_setstacksize: adjusting "
215			    "stacksize to minimum %ld\n", minsize);
216		ss = 4*psize;
217	}
218
219	stackshift = -1;
220	bonus = 0;
221	while (ss) {
222		if (ss & 0x1)
223			bonus++;
224		ss >>= 1;
225		stackshift++;
226	}
227	if (bonus > 1) {
228		stackshift++;
229		fprintf(stderr, "puffs_setstacksize: using next power of two: "
230		    "%d\n", 1<<stackshift);
231	}
232
233	pu->pu_cc_stackshift = stackshift;
234}
235
236struct puffs_pathobj *
237puffs_getrootpathobj(struct puffs_usermount *pu)
238{
239	struct puffs_node *pnr;
240
241	pnr = pu->pu_pn_root;
242	if (pnr == NULL) {
243		errno = ENOENT;
244		return NULL;
245	}
246
247	return &pnr->pn_po;
248}
249
250void
251puffs_setroot(struct puffs_usermount *pu, struct puffs_node *pn)
252{
253
254	pu->pu_pn_root = pn;
255}
256
257struct puffs_node *
258puffs_getroot(struct puffs_usermount *pu)
259{
260
261	return pu->pu_pn_root;
262}
263
264void
265puffs_setrootinfo(struct puffs_usermount *pu, enum vtype vt,
266	vsize_t vsize, dev_t rdev)
267{
268	struct puffs_kargs *pargs = pu->pu_kargp;
269
270	if (puffs_getstate(pu) != PUFFS_STATE_BEFOREMOUNT) {
271		warnx("puffs_setrootinfo: call has effect only "
272		    "before mount\n");
273		return;
274	}
275
276	pargs->pa_root_vtype = vt;
277	pargs->pa_root_vsize = vsize;
278	pargs->pa_root_rdev = rdev;
279}
280
281void *
282puffs_getspecific(struct puffs_usermount *pu)
283{
284
285	return pu->pu_privdata;
286}
287
288void
289puffs_setspecific(struct puffs_usermount *pu, void *privdata)
290{
291
292	pu->pu_privdata = privdata;
293}
294
295void
296puffs_setmntinfo(struct puffs_usermount *pu,
297	const char *mntfromname, const char *puffsname)
298{
299	struct puffs_kargs *pargs = pu->pu_kargp;
300
301	(void)strlcpy(pargs->pa_mntfromname, mntfromname,
302	    sizeof(pargs->pa_mntfromname));
303	(void)strlcpy(pargs->pa_typename, puffsname,
304	    sizeof(pargs->pa_typename));
305}
306
307size_t
308puffs_getmaxreqlen(struct puffs_usermount *pu)
309{
310
311	return pu->pu_maxreqlen;
312}
313
314void
315puffs_setmaxreqlen(struct puffs_usermount *pu, size_t reqlen)
316{
317
318	if (puffs_getstate(pu) != PUFFS_STATE_BEFOREMOUNT)
319		warnx("puffs_setmaxreqlen: call has effect only "
320		    "before mount\n");
321
322	pu->pu_kargp->pa_maxmsglen = reqlen;
323}
324
325void
326puffs_setfhsize(struct puffs_usermount *pu, size_t fhsize, int flags)
327{
328
329	if (puffs_getstate(pu) != PUFFS_STATE_BEFOREMOUNT)
330		warnx("puffs_setfhsize: call has effect only before mount\n");
331
332	pu->pu_kargp->pa_fhsize = fhsize;
333	pu->pu_kargp->pa_fhflags = flags;
334}
335
336void
337puffs_setncookiehash(struct puffs_usermount *pu, int nhash)
338{
339
340	if (puffs_getstate(pu) != PUFFS_STATE_BEFOREMOUNT)
341		warnx("puffs_setfhsize: call has effect only before mount\n");
342
343	pu->pu_kargp->pa_nhashbuckets = nhash;
344}
345
346void
347puffs_set_pathbuild(struct puffs_usermount *pu, pu_pathbuild_fn fn)
348{
349
350	pu->pu_pathbuild = fn;
351}
352
353void
354puffs_set_pathtransform(struct puffs_usermount *pu, pu_pathtransform_fn fn)
355{
356
357	pu->pu_pathtransform = fn;
358}
359
360void
361puffs_set_pathcmp(struct puffs_usermount *pu, pu_pathcmp_fn fn)
362{
363
364	pu->pu_pathcmp = fn;
365}
366
367void
368puffs_set_pathfree(struct puffs_usermount *pu, pu_pathfree_fn fn)
369{
370
371	pu->pu_pathfree = fn;
372}
373
374void
375puffs_set_namemod(struct puffs_usermount *pu, pu_namemod_fn fn)
376{
377
378	pu->pu_namemod = fn;
379}
380
381void
382puffs_set_errnotify(struct puffs_usermount *pu, pu_errnotify_fn fn)
383{
384
385	pu->pu_errnotify = fn;
386}
387
388void
389puffs_set_cmap(struct puffs_usermount *pu, pu_cmap_fn fn)
390{
391
392	pu->pu_cmap = fn;
393}
394
395void
396puffs_ml_setloopfn(struct puffs_usermount *pu, puffs_ml_loop_fn lfn)
397{
398
399	pu->pu_ml_lfn = lfn;
400}
401
402void
403puffs_ml_settimeout(struct puffs_usermount *pu, struct timespec *ts)
404{
405
406	if (ts == NULL) {
407		pu->pu_ml_timep = NULL;
408	} else {
409		pu->pu_ml_timeout = *ts;
410		pu->pu_ml_timep = &pu->pu_ml_timeout;
411	}
412}
413
414void
415puffs_set_prepost(struct puffs_usermount *pu,
416	pu_prepost_fn pre, pu_prepost_fn pst)
417{
418
419	pu->pu_oppre = pre;
420	pu->pu_oppost = pst;
421}
422
423void
424puffs_setback(struct puffs_cc *pcc, int whatback)
425{
426	struct puffs_req *preq = puffs__framebuf_getdataptr(pcc->pcc_pb);
427
428	assert(PUFFSOP_OPCLASS(preq->preq_opclass) == PUFFSOP_VN && (
429	    preq->preq_optype == PUFFS_VN_OPEN ||
430	    preq->preq_optype == PUFFS_VN_MMAP ||
431	    preq->preq_optype == PUFFS_VN_REMOVE ||
432	    preq->preq_optype == PUFFS_VN_RMDIR ||
433	    preq->preq_optype == PUFFS_VN_INACTIVE));
434
435	preq->preq_setbacks |= whatback & PUFFS_SETBACK_MASK;
436}
437
438int
439puffs_daemon(struct puffs_usermount *pu, int nochdir, int noclose)
440{
441	long int n;
442	int parent, value, fd;
443
444	if (pipe(pu->pu_dpipe) == -1)
445		return -1;
446
447	switch (fork()) {
448	case -1:
449		return -1;
450	case 0:
451		parent = 0;
452		break;
453	default:
454		parent = 1;
455		break;
456	}
457	pu->pu_state |= PU_PUFFSDAEMON;
458
459	if (parent) {
460		close(pu->pu_dpipe[1]);
461		n = read(pu->pu_dpipe[0], &value, sizeof(int));
462		if (n == -1)
463			err(1, "puffs_daemon");
464		if (n != sizeof(value))
465			errx(1, "puffs_daemon got %ld bytes", n);
466		if (value) {
467			errno = value;
468			err(1, "puffs_daemon");
469		}
470		exit(0);
471	} else {
472		if (setsid() == -1)
473			goto fail;
474
475		if (!nochdir)
476			chdir("/");
477
478		if (!noclose) {
479			fd = open(_PATH_DEVNULL, O_RDWR, 0);
480			if (fd == -1)
481				goto fail;
482			dup2(fd, STDIN_FILENO);
483			dup2(fd, STDOUT_FILENO);
484			dup2(fd, STDERR_FILENO);
485			if (fd > STDERR_FILENO)
486				close(fd);
487		}
488		return 0;
489	}
490
491 fail:
492	n = write(pu->pu_dpipe[1], &errno, sizeof(int));
493	assert(n == 4);
494	return -1;
495}
496
497static void
498shutdaemon(struct puffs_usermount *pu, int error)
499{
500	ssize_t n;
501
502	n = write(pu->pu_dpipe[1], &error, sizeof(int));
503	assert(n == 4);
504	close(pu->pu_dpipe[0]);
505	close(pu->pu_dpipe[1]);
506	pu->pu_state &= ~PU_PUFFSDAEMON;
507}
508
509int
510puffs_mount(struct puffs_usermount *pu, const char *dir, int mntflags,
511	puffs_cookie_t cookie)
512{
513	int rv, fd, sverrno;
514	char *comfd;
515
516	pu->pu_kargp->pa_root_cookie = cookie;
517
518	/* XXXkludgehere */
519	/* kauth doesn't provide this service any longer */
520	if (geteuid() != 0)
521		mntflags |= MNT_NOSUID | MNT_NODEV;
522
523	/*
524	 * Undocumented...  Well, documented only here.
525	 *
526	 * This is used for imaginative purposes.  If the env variable is
527	 * set, puffs_mount() doesn't do the regular mount procedure.
528	 * Rather, it crams the mount data down the comfd and sets comfd as
529	 * the puffs descriptor.
530	 *
531	 * This shouldn't be used unless you can read my mind ( ... or write
532	 * it, not to mention execute it, but that's starting to get silly).
533	 */
534	if ((comfd = getenv("PUFFS_COMFD")) != NULL) {
535		size_t len;
536
537		if (sscanf(comfd, "%d", &pu->pu_fd) != 1) {
538			errno = EINVAL;
539			rv = -1;
540			goto out;
541		}
542		/* check that what we got at least resembles an fd */
543		if (fcntl(pu->pu_fd, F_GETFL) == -1) {
544			rv = -1;
545			goto out;
546		}
547
548#define allwrite(buf, len)						\
549do {									\
550	ssize_t al_rv;							\
551	al_rv = write(pu->pu_fd, buf, len);				\
552	if ((size_t)al_rv != len) {					\
553		if (al_rv != -1)					\
554			errno = EIO;					\
555		rv = -1;						\
556		goto out;						\
557	}								\
558} while (/*CONSTCOND*/0)
559		len = strlen(dir)+1;
560		allwrite(&len, sizeof(len));
561		allwrite(dir, len);
562		len = strlen(pu->pu_kargp->pa_mntfromname)+1;
563		allwrite(&len, sizeof(len));
564		allwrite(pu->pu_kargp->pa_mntfromname, len);
565		allwrite(&mntflags, sizeof(mntflags));
566		len = sizeof(*pu->pu_kargp);
567		allwrite(&len, sizeof(len));
568		allwrite(pu->pu_kargp, sizeof(*pu->pu_kargp));
569		allwrite(&pu->pu_flags, sizeof(pu->pu_flags));
570#undef allwrite
571
572		rv = 0;
573	} else {
574		char rp[MAXPATHLEN];
575
576		if (realpath(dir, rp) == NULL) {
577			rv = -1;
578			goto out;
579		}
580
581		if (strcmp(dir, rp) != 0) {
582			warnx("puffs_mount: \"%s\" is a relative path.", dir);
583			warnx("puffs_mount: using \"%s\" instead.", rp);
584		}
585
586		fd = open(_PATH_PUFFS, O_RDWR);
587		if (fd == -1) {
588			warnx("puffs_mount: cannot open %s", _PATH_PUFFS);
589			rv = -1;
590			goto out;
591		}
592		if (fd <= 2)
593			warnx("puffs_mount: device fd %d (<= 2), sure this is "
594			    "what you want?", fd);
595
596		pu->pu_kargp->pa_fd = pu->pu_fd = fd;
597		if ((rv = mount(MOUNT_PUFFS, rp, mntflags,
598		    pu->pu_kargp, sizeof(struct puffs_kargs))) == -1)
599			goto out;
600	}
601
602	PU_SETSTATE(pu, PUFFS_STATE_RUNNING);
603
604 out:
605	if (rv != 0)
606		sverrno = errno;
607	else
608		sverrno = 0;
609	free(pu->pu_kargp);
610	pu->pu_kargp = NULL;
611
612	if (pu->pu_state & PU_PUFFSDAEMON)
613		shutdaemon(pu, sverrno);
614
615	errno = sverrno;
616	return rv;
617}
618
619struct puffs_usermount *
620puffs_init(struct puffs_ops *pops, const char *mntfromname,
621	const char *puffsname, void *priv, uint32_t pflags)
622{
623	struct puffs_usermount *pu;
624	struct puffs_kargs *pargs;
625	int sverrno;
626
627	if (puffsname == PUFFS_DEFER)
628		puffsname = "n/a";
629	if (mntfromname == PUFFS_DEFER)
630		mntfromname = "n/a";
631	if (priv == PUFFS_DEFER)
632		priv = NULL;
633
634	pu = malloc(sizeof(struct puffs_usermount));
635	if (pu == NULL)
636		goto failfree;
637	memset(pu, 0, sizeof(struct puffs_usermount));
638
639	pargs = pu->pu_kargp = malloc(sizeof(struct puffs_kargs));
640	if (pargs == NULL)
641		goto failfree;
642	memset(pargs, 0, sizeof(struct puffs_kargs));
643
644	pargs->pa_vers = PUFFSVERSION;
645	pargs->pa_flags = PUFFS_FLAG_KERN(pflags);
646	fillvnopmask(pops, pargs);
647	puffs_setmntinfo(pu, mntfromname, puffsname);
648
649	puffs_zerostatvfs(&pargs->pa_svfsb);
650	pargs->pa_root_cookie = NULL;
651	pargs->pa_root_vtype = VDIR;
652	pargs->pa_root_vsize = 0;
653	pargs->pa_root_rdev = 0;
654	pargs->pa_maxmsglen = 0;
655	if (/*CONSTCOND*/ sizeof(time_t) == 4)
656		pargs->pa_time32 = 1;
657	else
658		pargs->pa_time32 = 0;
659
660	pu->pu_flags = pflags;
661	pu->pu_ops = *pops;
662	free(pops); /* XXX */
663
664	pu->pu_privdata = priv;
665	pu->pu_cc_stackshift = PUFFS_CC_STACKSHIFT_DEFAULT;
666	LIST_INIT(&pu->pu_pnodelst);
667	LIST_INIT(&pu->pu_ios);
668	LIST_INIT(&pu->pu_ios_rmlist);
669	LIST_INIT(&pu->pu_ccmagazin);
670	TAILQ_INIT(&pu->pu_sched);
671
672	pu->pu_framectrl[PU_FRAMECTRL_FS].rfb = puffs__fsframe_read;
673	pu->pu_framectrl[PU_FRAMECTRL_FS].wfb = puffs__fsframe_write;
674	pu->pu_framectrl[PU_FRAMECTRL_FS].cmpfb = puffs__fsframe_cmp;
675	pu->pu_framectrl[PU_FRAMECTRL_FS].gotfb = puffs__fsframe_gotframe;
676	pu->pu_framectrl[PU_FRAMECTRL_FS].fdnotfn = puffs_framev_unmountonclose;
677
678	/* defaults for some user-settable translation functions */
679	pu->pu_cmap = NULL; /* identity translation */
680
681	pu->pu_pathbuild = puffs_stdpath_buildpath;
682	pu->pu_pathfree = puffs_stdpath_freepath;
683	pu->pu_pathcmp = puffs_stdpath_cmppath;
684	pu->pu_pathtransform = NULL;
685	pu->pu_namemod = NULL;
686
687	pu->pu_errnotify = puffs_kernerr_log;
688
689	PU_SETSTATE(pu, PUFFS_STATE_BEFOREMOUNT);
690
691	return pu;
692
693 failfree:
694	/* can't unmount() from here for obvious reasons */
695	sverrno = errno;
696	free(pu);
697	errno = sverrno;
698	return NULL;
699}
700
701void
702puffs_cancel(struct puffs_usermount *pu, int error)
703{
704
705	assert(puffs_getstate(pu) < PUFFS_STATE_RUNNING);
706	shutdaemon(pu, error);
707	free(pu);
708}
709
710/*ARGSUSED1*/
711int
712puffs_exit(struct puffs_usermount *pu, int unused /* strict compat */)
713{
714	struct puffs_framebuf *pb;
715	struct puffs_req *preq;
716	void *winp;
717	size_t winlen;
718	int sverrno;
719
720	pb = puffs_framebuf_make();
721	if (pb == NULL) {
722		errno = ENOMEM;
723		return -1;
724	}
725
726	winlen = sizeof(struct puffs_req);
727	if (puffs_framebuf_getwindow(pb, 0, &winp, &winlen) == -1) {
728		sverrno = errno;
729		puffs_framebuf_destroy(pb);
730		errno = sverrno;
731		return -1;
732	}
733	preq = winp;
734
735	preq->preq_buflen = sizeof(struct puffs_req);
736	preq->preq_opclass = PUFFSOP_UNMOUNT;
737	preq->preq_id = puffs__nextreq(pu);
738
739	puffs_framev_enqueue_justsend(pu, puffs_getselectable(pu), pb, 1, 0);
740
741	return 0;
742}
743
744/* no sigset_t static intializer */
745static int sigs[NSIG] = { 0, };
746static int sigcatch = 0;
747
748int
749puffs_unmountonsignal(int sig, bool sigignore)
750{
751
752	if (sig < 0 || sig >= (int)NSIG) {
753		errno = EINVAL;
754		return -1;
755	}
756	if (sigignore)
757		if (signal(sig, SIG_IGN) == SIG_ERR)
758			return -1;
759
760	if (!sigs[sig])
761		sigcatch++;
762	sigs[sig] = 1;
763
764	return 0;
765}
766
767/*
768 * Actual mainloop.  This is called from a context which can block.
769 * It is called either from puffs_mainloop (indirectly, via
770 * puffs_cc_continue() or from puffs_cc_yield()).
771 */
772void
773puffs__theloop(struct puffs_cc *pcc)
774{
775	struct puffs_usermount *pu = pcc->pcc_pu;
776	struct puffs_framectrl *pfctrl;
777	struct puffs_fctrl_io *fio;
778	struct kevent *curev;
779	size_t nchanges;
780	int ndone;
781
782	while (puffs_getstate(pu) != PUFFS_STATE_UNMOUNTED) {
783
784		/*
785		 * Schedule existing requests.
786		 */
787		while ((pcc = TAILQ_FIRST(&pu->pu_sched)) != NULL) {
788			TAILQ_REMOVE(&pu->pu_sched, pcc, pcc_schedent);
789			puffs__goto(pcc);
790		}
791
792		if (pu->pu_ml_lfn)
793			pu->pu_ml_lfn(pu);
794
795		/* XXX: can we still do these optimizations? */
796#if 0
797		/*
798		 * Do this here, because:
799		 *  a) loopfunc might generate some results
800		 *  b) it's still "after" event handling (except for round 1)
801		 */
802		if (puffs_req_putput(ppr) == -1)
803			goto out;
804		puffs_req_resetput(ppr);
805
806		/* micro optimization: skip kevent syscall if possible */
807		if (pu->pu_nfds == 1 && pu->pu_ml_timep == NULL
808		    && (pu->pu_state & PU_ASYNCFD) == 0) {
809			pfctrl = XXX->fctrl;
810			puffs_framev_input(pu, pfctrl, XXX);
811			continue;
812		}
813#endif
814
815		/* else: do full processing */
816		/* Don't bother worrying about O(n) for now */
817		LIST_FOREACH(fio, &pu->pu_ios, fio_entries) {
818			if (fio->stat & FIO_WRGONE)
819				continue;
820
821			pfctrl = fio->fctrl;
822
823			/*
824			 * Try to write out everything to avoid the
825			 * need for enabling EVFILT_WRITE.  The likely
826			 * case is that we can fit everything into the
827			 * socket buffer.
828			 */
829			puffs__framev_output(pu, pfctrl, fio);
830		}
831
832		/*
833		 * Build list of which to enable/disable in writecheck.
834		 */
835		nchanges = 0;
836		LIST_FOREACH(fio, &pu->pu_ios, fio_entries) {
837			if (fio->stat & FIO_WRGONE)
838				continue;
839
840			/* en/disable write checks for kqueue as needed */
841			assert((FIO_EN_WRITE(fio) && FIO_RM_WRITE(fio)) == 0);
842			if (FIO_EN_WRITE(fio)) {
843				EV_SET(&pu->pu_evs[nchanges], fio->io_fd,
844				    EVFILT_WRITE, EV_ENABLE, 0, 0,
845				    (uintptr_t)fio);
846				fio->stat |= FIO_WR;
847				nchanges++;
848			}
849			if (FIO_RM_WRITE(fio)) {
850				EV_SET(&pu->pu_evs[nchanges], fio->io_fd,
851				    EVFILT_WRITE, EV_DISABLE, 0, 0,
852				    (uintptr_t)fio);
853				fio->stat &= ~FIO_WR;
854				nchanges++;
855			}
856		}
857
858		ndone = kevent(pu->pu_kq, pu->pu_evs, nchanges,
859		    pu->pu_evs, pu->pu_nevs, pu->pu_ml_timep);
860
861		if (ndone == -1) {
862			if (errno != EINTR)
863				break;
864			else
865				continue;
866		}
867
868		/* uoptimize */
869		if (ndone == 0)
870			continue;
871
872		/* iterate over the results */
873		for (curev = pu->pu_evs; ndone--; curev++) {
874			int what;
875
876#if 0
877			/* get & possibly dispatch events from kernel */
878			if (curev->ident == puffsfd) {
879				if (puffs_req_handle(pgr, ppr, 0) == -1)
880					goto out;
881				continue;
882			}
883#endif
884
885			fio = (void *)curev->udata;
886			if (__predict_true(fio))
887				pfctrl = fio->fctrl;
888			else
889				pfctrl = NULL;
890			if (curev->flags & EV_ERROR) {
891				assert(curev->filter == EVFILT_WRITE);
892				fio->stat &= ~FIO_WR;
893
894				/* XXX: how to know if it's a transient error */
895				puffs__framev_writeclose(pu, fio,
896				    (int)curev->data);
897				puffs__framev_notify(fio, PUFFS_FBIO_ERROR);
898				continue;
899			}
900
901			what = 0;
902			if (curev->filter == EVFILT_READ) {
903				puffs__framev_input(pu, pfctrl, fio);
904				what |= PUFFS_FBIO_READ;
905			}
906
907			else if (curev->filter == EVFILT_WRITE) {
908				puffs__framev_output(pu, pfctrl, fio);
909				what |= PUFFS_FBIO_WRITE;
910			}
911
912			else if (__predict_false(curev->filter==EVFILT_SIGNAL)){
913				if ((pu->pu_state & PU_DONEXIT) == 0) {
914					PU_SETSFLAG(pu, PU_DONEXIT);
915					puffs_exit(pu, 0);
916				}
917			}
918			if (what)
919				puffs__framev_notify(fio, what);
920		}
921
922		/*
923		 * Really free fd's now that we don't have references
924		 * to them.
925		 */
926		while ((fio = LIST_FIRST(&pu->pu_ios_rmlist)) != NULL) {
927			LIST_REMOVE(fio, fio_entries);
928			free(fio);
929		}
930	}
931
932	if (puffs__cc_restoremain(pu) == -1)
933		warn("cannot restore main context.  impending doom");
934}
935int
936puffs_mainloop(struct puffs_usermount *pu)
937{
938	struct puffs_fctrl_io *fio;
939	struct puffs_cc *pcc;
940	struct kevent *curev;
941	size_t nevs;
942	int sverrno, i;
943
944	assert(puffs_getstate(pu) >= PUFFS_STATE_RUNNING);
945
946	pu->pu_kq = kqueue();
947	if (pu->pu_kq == -1)
948		goto out;
949	pu->pu_state |= PU_HASKQ;
950
951	puffs_setblockingmode(pu, PUFFSDEV_NONBLOCK);
952	if (puffs__framev_addfd_ctrl(pu, puffs_getselectable(pu),
953	    PUFFS_FBIO_READ | PUFFS_FBIO_WRITE,
954	    &pu->pu_framectrl[PU_FRAMECTRL_FS]) == -1)
955		goto out;
956
957	nevs = pu->pu_nevs + sigcatch;
958	curev = realloc(pu->pu_evs, nevs * sizeof(struct kevent));
959	if (curev == NULL)
960		goto out;
961	pu->pu_evs = curev;
962	pu->pu_nevs = nevs;
963
964	LIST_FOREACH(fio, &pu->pu_ios, fio_entries) {
965		EV_SET(curev, fio->io_fd, EVFILT_READ, EV_ADD,
966		    0, 0, (uintptr_t)fio);
967		curev++;
968		EV_SET(curev, fio->io_fd, EVFILT_WRITE, EV_ADD | EV_DISABLE,
969		    0, 0, (uintptr_t)fio);
970		curev++;
971	}
972	for (i = 0; i < NSIG; i++) {
973		if (sigs[i]) {
974			EV_SET(curev, i, EVFILT_SIGNAL, EV_ADD | EV_ENABLE,
975			    0, 0, 0);
976			curev++;
977		}
978	}
979	assert(curev - pu->pu_evs == (ssize_t)pu->pu_nevs);
980	if (kevent(pu->pu_kq, pu->pu_evs, pu->pu_nevs, NULL, 0, NULL) == -1)
981		goto out;
982
983	pu->pu_state |= PU_INLOOP;
984
985	/*
986	 * Create alternate execution context and jump to it.  Note
987	 * that we come "out" of savemain twice.  Where we come out
988	 * of it depends on the architecture.  If the return address is
989	 * stored on the stack, we jump out from puffs_cc_continue(),
990	 * for a register return address from puffs__cc_savemain().
991	 * PU_MAINRESTORE makes sure we DTRT in both cases.
992	 */
993	if (puffs__cc_create(pu, puffs__theloop, &pcc) == -1) {
994		goto out;
995	}
996
997#if 0
998	if (puffs__cc_savemain(pu) == -1) {
999		goto out;
1000	}
1001#else
1002	/*
1003	 * XXX
1004	 * puffs__cc_savemain() uses getcontext() and then returns.
1005	 * the caller (this function) may overwrite the stack frame
1006	 * of puffs__cc_savemain(), so when we call setcontext() later and
1007	 * return from puffs__cc_savemain() again, the return address or
1008	 * saved stack pointer can be garbage.
1009	 * avoid this by calling getcontext() directly here.
1010	 */
1011	extern int puffs_fakecc;
1012	if (!puffs_fakecc) {
1013		PU_CLRSFLAG(pu, PU_MAINRESTORE);
1014		if (getcontext(&pu->pu_mainctx) == -1) {
1015			goto out;
1016		}
1017	}
1018#endif
1019
1020	if ((pu->pu_state & PU_MAINRESTORE) == 0)
1021		puffs_cc_continue(pcc);
1022
1023	finalpush(pu);
1024	errno = 0;
1025
1026 out:
1027	/* store the real error for a while */
1028	sverrno = errno;
1029
1030	errno = sverrno;
1031	if (errno)
1032		return -1;
1033	else
1034		return 0;
1035}
1036