1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 *   notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 *   copyright notice, this list of conditions and the following disclaimer
15 *   in the documentation and/or other materials provided with the
16 *   distribution.
17 * * Neither the name of Google Inc. nor the names of its
18 *   contributors may be used to endorse or promote products derived from
19 *   this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Copyright (C) 2005 Csaba Henk.
34 * All rights reserved.
35 *
36 * Copyright (c) 2019 The FreeBSD Foundation
37 *
38 * Portions of this software were developed by BFF Storage Systems, LLC under
39 * sponsorship from the FreeBSD Foundation.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 *    notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 *    notice, this list of conditions and the following disclaimer in the
48 *    documentation and/or other materials provided with the distribution.
49 *
50 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63#include <sys/cdefs.h>
64__FBSDID("$FreeBSD$");
65
66#include <sys/param.h>
67#include <sys/module.h>
68#include <sys/systm.h>
69#include <sys/counter.h>
70#include <sys/errno.h>
71#include <sys/kernel.h>
72#include <sys/conf.h>
73#include <sys/uio.h>
74#include <sys/malloc.h>
75#include <sys/queue.h>
76#include <sys/lock.h>
77#include <sys/sx.h>
78#include <sys/mutex.h>
79#include <sys/proc.h>
80#include <sys/mount.h>
81#include <sys/sdt.h>
82#include <sys/vnode.h>
83#include <sys/signalvar.h>
84#include <sys/syscallsubr.h>
85#include <sys/sysctl.h>
86#include <vm/uma.h>
87
88#include "fuse.h"
89#include "fuse_node.h"
90#include "fuse_ipc.h"
91#include "fuse_internal.h"
92
93SDT_PROVIDER_DECLARE(fusefs);
94/*
95 * Fuse trace probe:
96 * arg0: verbosity.  Higher numbers give more verbose messages
97 * arg1: Textual message
98 */
99SDT_PROBE_DEFINE2(fusefs, , ipc, trace, "int", "char*");
100
101static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
102    struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred);
103static void fuse_interrupt_send(struct fuse_ticket *otick, int err);
104static struct fuse_ticket *fticket_alloc(struct fuse_data *data);
105static void fticket_refresh(struct fuse_ticket *ftick);
106static void fticket_destroy(struct fuse_ticket *ftick);
107static int fticket_wait_answer(struct fuse_ticket *ftick);
108static inline int
109fticket_aw_pull_uio(struct fuse_ticket *ftick,
110    struct uio *uio);
111
112static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen);
113
114static fuse_handler_t fuse_standard_handler;
115
116static counter_u64_t fuse_ticket_count;
117SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, ticket_count, CTLFLAG_RD,
118    &fuse_ticket_count, "Number of allocated tickets");
119
120static long fuse_iov_permanent_bufsize = 1 << 19;
121
122SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW,
123    &fuse_iov_permanent_bufsize, 0,
124    "limit for permanently stored buffer size for fuse_iovs");
125static int fuse_iov_credit = 16;
126
127SYSCTL_INT(_vfs_fusefs, OID_AUTO, iov_credit, CTLFLAG_RW,
128    &fuse_iov_credit, 0,
129    "how many times is an oversized fuse_iov tolerated");
130
131MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer");
132static uma_zone_t ticket_zone;
133
134/*
135 * TODO: figure out how to timeout INTERRUPT requests, because the daemon may
136 * leagally never respond
137 */
138static int
139fuse_interrupt_callback(struct fuse_ticket *tick, struct uio *uio)
140{
141	struct fuse_ticket *otick, *x_tick;
142	struct fuse_interrupt_in *fii;
143	struct fuse_data *data = tick->tk_data;
144	bool found = false;
145
146	fii = (struct fuse_interrupt_in*)((char*)tick->tk_ms_fiov.base +
147		sizeof(struct fuse_in_header));
148
149	fuse_lck_mtx_lock(data->aw_mtx);
150	TAILQ_FOREACH_SAFE(otick, &data->aw_head, tk_aw_link, x_tick) {
151		if (otick->tk_unique == fii->unique) {
152			found = true;
153			break;
154		}
155	}
156	fuse_lck_mtx_unlock(data->aw_mtx);
157
158	if (!found) {
159		/* Original is already complete.  Just return */
160		return 0;
161	}
162
163	/* Clear the original ticket's interrupt association */
164	otick->irq_unique = 0;
165
166	if (tick->tk_aw_ohead.error == ENOSYS) {
167		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
168		return 0;
169	} else if (tick->tk_aw_ohead.error == EAGAIN) {
170		/*
171		 * There are two reasons we might get this:
172		 * 1) the daemon received the INTERRUPT request before the
173		 *    original, or
174		 * 2) the daemon received the INTERRUPT request after it
175		 *    completed the original request.
176		 * In the first case we should re-send the INTERRUPT.  In the
177		 * second, we should ignore it.
178		 */
179		/* Resend */
180		fuse_interrupt_send(otick, EINTR);
181		return 0;
182	} else {
183		/* Illegal FUSE_INTERRUPT response */
184		return EINVAL;
185	}
186}
187
188/* Interrupt the operation otick.  Return err as its error code */
189void
190fuse_interrupt_send(struct fuse_ticket *otick, int err)
191{
192	struct fuse_dispatcher fdi;
193	struct fuse_interrupt_in *fii;
194	struct fuse_in_header *ftick_hdr;
195	struct fuse_data *data = otick->tk_data;
196	struct fuse_ticket *tick, *xtick;
197	struct ucred reused_creds;
198	gid_t reused_groups[1];
199
200	if (otick->irq_unique == 0) {
201		/*
202		 * If the daemon hasn't yet received otick, then we can answer
203		 * it ourselves and return.
204		 */
205		fuse_lck_mtx_lock(data->ms_mtx);
206		STAILQ_FOREACH_SAFE(tick, &otick->tk_data->ms_head, tk_ms_link,
207			xtick) {
208			if (tick == otick) {
209				STAILQ_REMOVE(&otick->tk_data->ms_head, tick,
210					fuse_ticket, tk_ms_link);
211				otick->tk_data->ms_count--;
212				otick->tk_ms_link.stqe_next = NULL;
213				fuse_lck_mtx_unlock(data->ms_mtx);
214
215				fuse_lck_mtx_lock(otick->tk_aw_mtx);
216				if (!fticket_answered(otick)) {
217					fticket_set_answered(otick);
218					otick->tk_aw_errno = err;
219					wakeup(otick);
220				}
221				fuse_lck_mtx_unlock(otick->tk_aw_mtx);
222
223				fuse_ticket_drop(tick);
224				return;
225			}
226		}
227		fuse_lck_mtx_unlock(data->ms_mtx);
228
229		/*
230		 * If the fuse daemon doesn't support interrupts, then there's
231		 * nothing more that we can do
232		 */
233		if (!fsess_isimpl(data->mp, FUSE_INTERRUPT))
234			return;
235
236		/*
237		 * If the fuse daemon has already received otick, then we must
238		 * send FUSE_INTERRUPT.
239		 */
240		ftick_hdr = fticket_in_header(otick);
241		reused_creds.cr_uid = ftick_hdr->uid;
242		reused_groups[0] = ftick_hdr->gid;
243		reused_creds.cr_groups = reused_groups;
244		fdisp_init(&fdi, sizeof(*fii));
245		fdisp_make_pid(&fdi, FUSE_INTERRUPT, data, ftick_hdr->nodeid,
246			ftick_hdr->pid, &reused_creds);
247
248		fii = fdi.indata;
249		fii->unique = otick->tk_unique;
250		fuse_insert_callback(fdi.tick, fuse_interrupt_callback);
251
252		otick->irq_unique = fdi.tick->tk_unique;
253		/* Interrupt ops should be delivered ASAP */
254		fuse_insert_message(fdi.tick, true);
255		fdisp_destroy(&fdi);
256	} else {
257		/* This ticket has already been interrupted */
258	}
259}
260
261void
262fiov_init(struct fuse_iov *fiov, size_t size)
263{
264	uint32_t msize = FU_AT_LEAST(size);
265
266	fiov->len = 0;
267
268	fiov->base = malloc(msize, M_FUSEMSG, M_WAITOK | M_ZERO);
269
270	fiov->allocated_size = msize;
271	fiov->credit = fuse_iov_credit;
272}
273
274void
275fiov_teardown(struct fuse_iov *fiov)
276{
277	MPASS(fiov->base != NULL);
278	free(fiov->base, M_FUSEMSG);
279}
280
281void
282fiov_adjust(struct fuse_iov *fiov, size_t size)
283{
284	if (fiov->allocated_size < size ||
285	    (fuse_iov_permanent_bufsize >= 0 &&
286	    fiov->allocated_size - size > fuse_iov_permanent_bufsize &&
287	    --fiov->credit < 0)) {
288
289		fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG,
290		    M_WAITOK | M_ZERO);
291		if (!fiov->base) {
292			panic("FUSE: realloc failed");
293		}
294		fiov->allocated_size = FU_AT_LEAST(size);
295		fiov->credit = fuse_iov_credit;
296		/* Clear data buffer after reallocation */
297		bzero(fiov->base, size);
298	} else if (size > fiov->len) {
299		/* Clear newly extended portion of data buffer */
300		bzero((char*)fiov->base + fiov->len, size - fiov->len);
301	}
302	fiov->len = size;
303}
304
305/* Resize the fiov if needed, and clear it's buffer */
306void
307fiov_refresh(struct fuse_iov *fiov)
308{
309	fiov_adjust(fiov, 0);
310}
311
312static int
313fticket_ctor(void *mem, int size, void *arg, int flags)
314{
315	struct fuse_ticket *ftick = mem;
316	struct fuse_data *data = arg;
317
318	FUSE_ASSERT_MS_DONE(ftick);
319	FUSE_ASSERT_AW_DONE(ftick);
320
321	ftick->tk_data = data;
322
323	if (ftick->tk_unique != 0)
324		fticket_refresh(ftick);
325
326	/* May be truncated to 32 bits */
327	ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
328	if (ftick->tk_unique == 0)
329		ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
330
331	ftick->irq_unique = 0;
332
333	refcount_init(&ftick->tk_refcount, 1);
334	counter_u64_add(fuse_ticket_count, 1);
335
336	return 0;
337}
338
339static void
340fticket_dtor(void *mem, int size, void *arg)
341{
342#ifdef INVARIANTS
343	struct fuse_ticket *ftick = mem;
344#endif
345
346	FUSE_ASSERT_MS_DONE(ftick);
347	FUSE_ASSERT_AW_DONE(ftick);
348
349	counter_u64_add(fuse_ticket_count, -1);
350}
351
352static int
353fticket_init(void *mem, int size, int flags)
354{
355	struct fuse_ticket *ftick = mem;
356
357	bzero(ftick, sizeof(struct fuse_ticket));
358
359	fiov_init(&ftick->tk_ms_fiov, sizeof(struct fuse_in_header));
360	ftick->tk_ms_type = FT_M_FIOV;
361
362	mtx_init(&ftick->tk_aw_mtx, "fuse answer delivery mutex", NULL, MTX_DEF);
363	fiov_init(&ftick->tk_aw_fiov, 0);
364	ftick->tk_aw_type = FT_A_FIOV;
365
366	return 0;
367}
368
369static void
370fticket_fini(void *mem, int size)
371{
372	struct fuse_ticket *ftick = mem;
373
374	fiov_teardown(&ftick->tk_ms_fiov);
375	fiov_teardown(&ftick->tk_aw_fiov);
376	mtx_destroy(&ftick->tk_aw_mtx);
377}
378
379static inline struct fuse_ticket *
380fticket_alloc(struct fuse_data *data)
381{
382	return uma_zalloc_arg(ticket_zone, data, M_WAITOK);
383}
384
385static inline void
386fticket_destroy(struct fuse_ticket *ftick)
387{
388	return uma_zfree(ticket_zone, ftick);
389}
390
391static inline
392void
393fticket_refresh(struct fuse_ticket *ftick)
394{
395	FUSE_ASSERT_MS_DONE(ftick);
396	FUSE_ASSERT_AW_DONE(ftick);
397
398	fiov_refresh(&ftick->tk_ms_fiov);
399	ftick->tk_ms_bufdata = NULL;
400	ftick->tk_ms_bufsize = 0;
401	ftick->tk_ms_type = FT_M_FIOV;
402
403	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
404
405	fiov_refresh(&ftick->tk_aw_fiov);
406	ftick->tk_aw_errno = 0;
407	ftick->tk_aw_bufdata = NULL;
408	ftick->tk_aw_bufsize = 0;
409	ftick->tk_aw_type = FT_A_FIOV;
410
411	ftick->tk_flag = 0;
412}
413
414/* Prepar the ticket to be reused, but don't clear its data buffers */
415static inline void
416fticket_reset(struct fuse_ticket *ftick)
417{
418	FUSE_ASSERT_MS_DONE(ftick);
419	FUSE_ASSERT_AW_DONE(ftick);
420
421	ftick->tk_ms_bufdata = NULL;
422	ftick->tk_ms_bufsize = 0;
423	ftick->tk_ms_type = FT_M_FIOV;
424
425	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
426
427	ftick->tk_aw_errno = 0;
428	ftick->tk_aw_bufdata = NULL;
429	ftick->tk_aw_bufsize = 0;
430	ftick->tk_aw_type = FT_A_FIOV;
431
432	ftick->tk_flag = 0;
433}
434
435static int
436fticket_wait_answer(struct fuse_ticket *ftick)
437{
438	struct thread *td = curthread;
439	sigset_t blockedset, oldset;
440	int err = 0, stops_deferred;
441	struct fuse_data *data = ftick->tk_data;
442	bool interrupted = false;
443
444	if (fsess_isimpl(ftick->tk_data->mp, FUSE_INTERRUPT) &&
445	    data->dataflags & FSESS_INTR) {
446		SIGEMPTYSET(blockedset);
447	} else {
448		/* Block all signals except (implicitly) SIGKILL */
449		SIGFILLSET(blockedset);
450	}
451	stops_deferred = sigdeferstop(SIGDEFERSTOP_SILENT);
452	kern_sigprocmask(td, SIG_BLOCK, NULL, &oldset, 0);
453
454	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
455
456retry:
457	if (fticket_answered(ftick)) {
458		goto out;
459	}
460
461	if (fdata_get_dead(data)) {
462		err = ENOTCONN;
463		fticket_set_answered(ftick);
464		goto out;
465	}
466	kern_sigprocmask(td, SIG_BLOCK, &blockedset, NULL, 0);
467	err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans",
468	    data->daemon_timeout * hz);
469	kern_sigprocmask(td, SIG_SETMASK, &oldset, NULL, 0);
470	if (err == EWOULDBLOCK) {
471		SDT_PROBE2(fusefs, , ipc, trace, 3,
472			"fticket_wait_answer: EWOULDBLOCK");
473#ifdef XXXIP				/* die conditionally */
474		if (!fdata_get_dead(data)) {
475			fdata_set_dead(data);
476		}
477#endif
478		err = ETIMEDOUT;
479		fticket_set_answered(ftick);
480	} else if ((err == EINTR || err == ERESTART)) {
481		/*
482		 * Whether we get EINTR or ERESTART depends on whether
483		 * SA_RESTART was set by sigaction(2).
484		 *
485		 * Try to interrupt the operation and wait for an EINTR response
486		 * to the original operation.  If the file system does not
487		 * support FUSE_INTERRUPT, then we'll just wait for it to
488		 * complete like normal.  If it does support FUSE_INTERRUPT,
489		 * then it will either respond EINTR to the original operation,
490		 * or EAGAIN to the interrupt.
491		 */
492		sigset_t tmpset;
493
494		SDT_PROBE2(fusefs, , ipc, trace, 4,
495			"fticket_wait_answer: interrupt");
496		fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
497		fuse_interrupt_send(ftick, err);
498
499		PROC_LOCK(td->td_proc);
500		mtx_lock(&td->td_proc->p_sigacts->ps_mtx);
501		tmpset = td->td_proc->p_siglist;
502		SIGSETOR(tmpset, td->td_siglist);
503		mtx_unlock(&td->td_proc->p_sigacts->ps_mtx);
504		PROC_UNLOCK(td->td_proc);
505
506		fuse_lck_mtx_lock(ftick->tk_aw_mtx);
507		if (!interrupted && !SIGISMEMBER(tmpset, SIGKILL)) {
508			/*
509			 * Block all signals while we wait for an interrupt
510			 * response.  The protocol doesn't discriminate between
511			 * different signals.
512			 */
513			SIGFILLSET(blockedset);
514			interrupted = true;
515			goto retry;
516		} else {
517			/*
518			 * Return immediately for fatal signals, or if this is
519			 * the second interruption.  We should only be
520			 * interrupted twice if the thread is stopped, for
521			 * example during sigexit.
522			 */
523		}
524	} else if (err) {
525		SDT_PROBE2(fusefs, , ipc, trace, 6,
526			"fticket_wait_answer: other error");
527	} else {
528		SDT_PROBE2(fusefs, , ipc, trace, 7, "fticket_wait_answer: OK");
529	}
530out:
531	if (!(err || fticket_answered(ftick))) {
532		SDT_PROBE2(fusefs, , ipc, trace, 1,
533			"FUSE: requester was woken up but still no answer");
534		err = ENXIO;
535	}
536	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
537	sigallowstop(stops_deferred);
538
539	return err;
540}
541
542static	inline
543int
544fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio)
545{
546	int err = 0;
547	size_t len = uio_resid(uio);
548
549	if (len) {
550		switch (ftick->tk_aw_type) {
551		case FT_A_FIOV:
552			fiov_adjust(fticket_resp(ftick), len);
553			err = uiomove(fticket_resp(ftick)->base, len, uio);
554			break;
555
556		case FT_A_BUF:
557			ftick->tk_aw_bufsize = len;
558			err = uiomove(ftick->tk_aw_bufdata, len, uio);
559			break;
560
561		default:
562			panic("FUSE: unknown answer type for ticket %p", ftick);
563		}
564	}
565	return err;
566}
567
568int
569fticket_pull(struct fuse_ticket *ftick, struct uio *uio)
570{
571	int err = 0;
572
573	if (ftick->tk_aw_ohead.error) {
574		return 0;
575	}
576	err = fuse_body_audit(ftick, uio_resid(uio));
577	if (!err) {
578		err = fticket_aw_pull_uio(ftick, uio);
579	}
580	return err;
581}
582
583struct fuse_data *
584fdata_alloc(struct cdev *fdev, struct ucred *cred)
585{
586	struct fuse_data *data;
587
588	data = malloc(sizeof(struct fuse_data), M_FUSEMSG, M_WAITOK | M_ZERO);
589
590	data->fdev = fdev;
591	mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF);
592	STAILQ_INIT(&data->ms_head);
593	data->ms_count = 0;
594	knlist_init_mtx(&data->ks_rsel.si_note, &data->ms_mtx);
595	mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF);
596	TAILQ_INIT(&data->aw_head);
597	data->daemoncred = crhold(cred);
598	data->daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT;
599	sx_init(&data->rename_lock, "fuse rename lock");
600	data->ref = 1;
601
602	return data;
603}
604
605void
606fdata_trydestroy(struct fuse_data *data)
607{
608	data->ref--;
609	MPASS(data->ref >= 0);
610	if (data->ref != 0)
611		return;
612
613	/* Driving off stage all that stuff thrown at device... */
614	sx_destroy(&data->rename_lock);
615	crfree(data->daemoncred);
616	mtx_destroy(&data->aw_mtx);
617	knlist_delete(&data->ks_rsel.si_note, curthread, 0);
618	knlist_destroy(&data->ks_rsel.si_note);
619	mtx_destroy(&data->ms_mtx);
620
621	free(data, M_FUSEMSG);
622}
623
624void
625fdata_set_dead(struct fuse_data *data)
626{
627	FUSE_LOCK();
628	if (fdata_get_dead(data)) {
629		FUSE_UNLOCK();
630		return;
631	}
632	fuse_lck_mtx_lock(data->ms_mtx);
633	data->dataflags |= FSESS_DEAD;
634	wakeup_one(data);
635	selwakeuppri(&data->ks_rsel, PZERO + 1);
636	wakeup(&data->ticketer);
637	fuse_lck_mtx_unlock(data->ms_mtx);
638	FUSE_UNLOCK();
639}
640
641struct fuse_ticket *
642fuse_ticket_fetch(struct fuse_data *data)
643{
644	int err = 0;
645	struct fuse_ticket *ftick;
646
647	ftick = fticket_alloc(data);
648
649	if (!(data->dataflags & FSESS_INITED)) {
650		/* Sleep until get answer for INIT messsage */
651		FUSE_LOCK();
652		if (!(data->dataflags & FSESS_INITED) && data->ticketer > 2) {
653			err = msleep(&data->ticketer, &fuse_mtx, PCATCH | PDROP,
654			    "fu_ini", 0);
655			if (err)
656				fdata_set_dead(data);
657		} else
658			FUSE_UNLOCK();
659	}
660	return ftick;
661}
662
663int
664fuse_ticket_drop(struct fuse_ticket *ftick)
665{
666	int die;
667
668	die = refcount_release(&ftick->tk_refcount);
669	if (die)
670		fticket_destroy(ftick);
671
672	return die;
673}
674
675void
676fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t * handler)
677{
678	if (fdata_get_dead(ftick->tk_data)) {
679		return;
680	}
681	ftick->tk_aw_handler = handler;
682
683	fuse_lck_mtx_lock(ftick->tk_data->aw_mtx);
684	fuse_aw_push(ftick);
685	fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx);
686}
687
688/*
689 * Insert a new upgoing ticket into the message queue
690 *
691 * If urgent is true, insert at the front of the queue.  Otherwise, insert in
692 * FIFO order.
693 */
694void
695fuse_insert_message(struct fuse_ticket *ftick, bool urgent)
696{
697	if (ftick->tk_flag & FT_DIRTY) {
698		panic("FUSE: ticket reused without being refreshed");
699	}
700	ftick->tk_flag |= FT_DIRTY;
701
702	if (fdata_get_dead(ftick->tk_data)) {
703		return;
704	}
705	fuse_lck_mtx_lock(ftick->tk_data->ms_mtx);
706	if (urgent)
707		fuse_ms_push_head(ftick);
708	else
709		fuse_ms_push(ftick);
710	wakeup_one(ftick->tk_data);
711	selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
712	KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
713	fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
714}
715
716static int
717fuse_body_audit(struct fuse_ticket *ftick, size_t blen)
718{
719	int err = 0;
720	enum fuse_opcode opcode;
721
722	opcode = fticket_opcode(ftick);
723
724	switch (opcode) {
725	case FUSE_BMAP:
726		err = (blen == sizeof(struct fuse_bmap_out)) ? 0 : EINVAL;
727		break;
728
729	case FUSE_LINK:
730	case FUSE_LOOKUP:
731	case FUSE_MKDIR:
732	case FUSE_MKNOD:
733	case FUSE_SYMLINK:
734		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
735			err = (blen == sizeof(struct fuse_entry_out)) ?
736				0 : EINVAL;
737		} else {
738			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE) ? 0 : EINVAL;
739		}
740		break;
741
742	case FUSE_FORGET:
743		panic("FUSE: a handler has been intalled for FUSE_FORGET");
744		break;
745
746	case FUSE_GETATTR:
747	case FUSE_SETATTR:
748		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
749			err = (blen == sizeof(struct fuse_attr_out)) ?
750			  0 : EINVAL;
751		} else {
752			err = (blen == FUSE_COMPAT_ATTR_OUT_SIZE) ? 0 : EINVAL;
753		}
754		break;
755
756	case FUSE_READLINK:
757		err = (PAGE_SIZE >= blen) ? 0 : EINVAL;
758		break;
759
760	case FUSE_UNLINK:
761		err = (blen == 0) ? 0 : EINVAL;
762		break;
763
764	case FUSE_RMDIR:
765		err = (blen == 0) ? 0 : EINVAL;
766		break;
767
768	case FUSE_RENAME:
769		err = (blen == 0) ? 0 : EINVAL;
770		break;
771
772	case FUSE_OPEN:
773		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
774		break;
775
776	case FUSE_READ:
777		err = (((struct fuse_read_in *)(
778		    (char *)ftick->tk_ms_fiov.base +
779		    sizeof(struct fuse_in_header)
780		    ))->size >= blen) ? 0 : EINVAL;
781		break;
782
783	case FUSE_WRITE:
784		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
785		break;
786
787	case FUSE_STATFS:
788		if (fuse_libabi_geq(ftick->tk_data, 7, 4)) {
789			err = (blen == sizeof(struct fuse_statfs_out)) ?
790			  0 : EINVAL;
791		} else {
792			err = (blen == FUSE_COMPAT_STATFS_SIZE) ? 0 : EINVAL;
793		}
794		break;
795
796	case FUSE_RELEASE:
797		err = (blen == 0) ? 0 : EINVAL;
798		break;
799
800	case FUSE_FSYNC:
801		err = (blen == 0) ? 0 : EINVAL;
802		break;
803
804	case FUSE_SETXATTR:
805		err = (blen == 0) ? 0 : EINVAL;
806		break;
807
808	case FUSE_GETXATTR:
809	case FUSE_LISTXATTR:
810		/*
811		 * These can have varying response lengths, and 0 length
812		 * isn't necessarily invalid.
813		 */
814		err = 0;
815		break;
816
817	case FUSE_REMOVEXATTR:
818		err = (blen == 0) ? 0 : EINVAL;
819		break;
820
821	case FUSE_FLUSH:
822		err = (blen == 0) ? 0 : EINVAL;
823		break;
824
825	case FUSE_INIT:
826		if (blen == sizeof(struct fuse_init_out) ||
827		    blen == FUSE_COMPAT_INIT_OUT_SIZE ||
828		    blen == FUSE_COMPAT_22_INIT_OUT_SIZE) {
829			err = 0;
830		} else {
831			err = EINVAL;
832		}
833		break;
834
835	case FUSE_OPENDIR:
836		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
837		break;
838
839	case FUSE_READDIR:
840		err = (((struct fuse_read_in *)(
841		    (char *)ftick->tk_ms_fiov.base +
842		    sizeof(struct fuse_in_header)
843		    ))->size >= blen) ? 0 : EINVAL;
844		break;
845
846	case FUSE_RELEASEDIR:
847		err = (blen == 0) ? 0 : EINVAL;
848		break;
849
850	case FUSE_FSYNCDIR:
851		err = (blen == 0) ? 0 : EINVAL;
852		break;
853
854	case FUSE_GETLK:
855		err = (blen == sizeof(struct fuse_lk_out)) ? 0 : EINVAL;
856		break;
857
858	case FUSE_SETLK:
859		err = (blen == 0) ? 0 : EINVAL;
860		break;
861
862	case FUSE_SETLKW:
863		err = (blen == 0) ? 0 : EINVAL;
864		break;
865
866	case FUSE_ACCESS:
867		err = (blen == 0) ? 0 : EINVAL;
868		break;
869
870	case FUSE_CREATE:
871		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
872			err = (blen == sizeof(struct fuse_entry_out) +
873			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
874		} else {
875			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE +
876			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
877		}
878		break;
879
880	case FUSE_DESTROY:
881		err = (blen == 0) ? 0 : EINVAL;
882		break;
883
884	default:
885		panic("FUSE: opcodes out of sync (%d)\n", opcode);
886	}
887
888	return err;
889}
890
891static inline void
892fuse_setup_ihead(struct fuse_in_header *ihead, struct fuse_ticket *ftick,
893    uint64_t nid, enum fuse_opcode op, size_t blen, pid_t pid,
894    struct ucred *cred)
895{
896	ihead->len = sizeof(*ihead) + blen;
897	ihead->unique = ftick->tk_unique;
898	ihead->nodeid = nid;
899	ihead->opcode = op;
900
901	ihead->pid = pid;
902	ihead->uid = cred->cr_uid;
903	ihead->gid = cred->cr_groups[0];
904}
905
906/*
907 * fuse_standard_handler just pulls indata and wakes up pretender.
908 * Doesn't try to interpret data, that's left for the pretender.
909 * Though might do a basic size verification before the pull-in takes place
910 */
911
912static int
913fuse_standard_handler(struct fuse_ticket *ftick, struct uio *uio)
914{
915	int err = 0;
916
917	err = fticket_pull(ftick, uio);
918
919	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
920
921	if (!fticket_answered(ftick)) {
922		fticket_set_answered(ftick);
923		ftick->tk_aw_errno = err;
924		wakeup(ftick);
925	}
926	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
927
928	return err;
929}
930
931/*
932 * Reinitialize a dispatcher from a pid and node id, without resizing or
933 * clearing its data buffers
934 */
935static void
936fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
937    struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
938{
939	MPASS(fdip->tick);
940	MPASS2(sizeof(fdip->finh) + fdip->iosize <= fdip->tick->tk_ms_fiov.len,
941		"Must use fdisp_make_pid to increase the size of the fiov");
942	fticket_reset(fdip->tick);
943
944	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
945	    fdip->indata, fdip->iosize);
946
947	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid,
948		cred);
949}
950
951/* Initialize a dispatcher from a pid and node id */
952static void
953fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
954    struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred)
955{
956	if (fdip->tick) {
957		fticket_refresh(fdip->tick);
958	} else {
959		fdip->tick = fuse_ticket_fetch(data);
960	}
961
962	/* FUSE_DIMALLOC will bzero the fiovs when it enlarges them */
963	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
964	    fdip->indata, fdip->iosize);
965
966	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred);
967}
968
969void
970fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp,
971    uint64_t nid, struct thread *td, struct ucred *cred)
972{
973	struct fuse_data *data = fuse_get_mpdata(mp);
974	RECTIFY_TDCR(td, cred);
975
976	return fdisp_make_pid(fdip, op, data, nid, td->td_proc->p_pid, cred);
977}
978
979void
980fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
981    struct vnode *vp, struct thread *td, struct ucred *cred)
982{
983	struct mount *mp = vnode_mount(vp);
984	struct fuse_data *data = fuse_get_mpdata(mp);
985
986	RECTIFY_TDCR(td, cred);
987	return fdisp_make_pid(fdip, op, data, VTOI(vp),
988	    td->td_proc->p_pid, cred);
989}
990
991/* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */
992void
993fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
994    struct vnode *vp, struct thread *td, struct ucred *cred)
995{
996	RECTIFY_TDCR(td, cred);
997	return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp),
998	    td->td_proc->p_pid, cred);
999}
1000
1001void
1002fdisp_refresh(struct fuse_dispatcher *fdip)
1003{
1004	fticket_refresh(fdip->tick);
1005}
1006
1007SDT_PROBE_DEFINE2(fusefs, , ipc, fdisp_wait_answ_error, "char*", "int");
1008
1009int
1010fdisp_wait_answ(struct fuse_dispatcher *fdip)
1011{
1012	int err = 0;
1013
1014	fdip->answ_stat = 0;
1015	fuse_insert_callback(fdip->tick, fuse_standard_handler);
1016	fuse_insert_message(fdip->tick, false);
1017
1018	if ((err = fticket_wait_answer(fdip->tick))) {
1019		fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx);
1020
1021		if (fticket_answered(fdip->tick)) {
1022			/*
1023	                 * Just between noticing the interrupt and getting here,
1024	                 * the standard handler has completed his job.
1025	                 * So we drop the ticket and exit as usual.
1026	                 */
1027			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1028				"IPC: interrupted, already answered", err);
1029			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
1030			goto out;
1031		} else {
1032			/*
1033	                 * So we were faster than the standard handler.
1034	                 * Then by setting the answered flag we get *him*
1035	                 * to drop the ticket.
1036	                 */
1037			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1038				"IPC: interrupted, setting to answered", err);
1039			fticket_set_answered(fdip->tick);
1040			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
1041			return err;
1042		}
1043	}
1044
1045	if (fdip->tick->tk_aw_errno == ENOTCONN) {
1046		/* The daemon died while we were waiting for a response */
1047		err = ENOTCONN;
1048		goto out;
1049	} else if (fdip->tick->tk_aw_errno) {
1050		/*
1051		 * There was some sort of communication error with the daemon
1052		 * that the client wouldn't understand.
1053		 */
1054		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1055			"IPC: explicit EIO-ing", fdip->tick->tk_aw_errno);
1056		err = EIO;
1057		goto out;
1058	}
1059	if ((err = fdip->tick->tk_aw_ohead.error)) {
1060		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1061			"IPC: setting status", fdip->tick->tk_aw_ohead.error);
1062		/*
1063	         * This means a "proper" fuse syscall error.
1064	         * We record this value so the caller will
1065	         * be able to know it's not a boring messaging
1066	         * failure, if she wishes so (and if not, she can
1067	         * just simply propagate the return value of this routine).
1068	         * [XXX Maybe a bitflag would do the job too,
1069	         * if other flags needed, this will be converted thusly.]
1070	         */
1071		fdip->answ_stat = err;
1072		goto out;
1073	}
1074	fdip->answ = fticket_resp(fdip->tick)->base;
1075	fdip->iosize = fticket_resp(fdip->tick)->len;
1076
1077	return 0;
1078
1079out:
1080	return err;
1081}
1082
1083void
1084fuse_ipc_init(void)
1085{
1086	ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket),
1087	    fticket_ctor, fticket_dtor, fticket_init, fticket_fini,
1088	    UMA_ALIGN_PTR, 0);
1089	fuse_ticket_count = counter_u64_alloc(M_WAITOK);
1090}
1091
1092void
1093fuse_ipc_destroy(void)
1094{
1095	counter_u64_free(fuse_ticket_count);
1096	uma_zdestroy(ticket_zone);
1097}
1098