1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 *   notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 *   copyright notice, this list of conditions and the following disclaimer
15 *   in the documentation and/or other materials provided with the
16 *   distribution.
17 * * Neither the name of Google Inc. nor the names of its
18 *   contributors may be used to endorse or promote products derived from
19 *   this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Copyright (C) 2005 Csaba Henk.
34 * All rights reserved.
35 *
36 * Copyright (c) 2019 The FreeBSD Foundation
37 *
38 * Portions of this software were developed by BFF Storage Systems, LLC under
39 * sponsorship from the FreeBSD Foundation.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 *    notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 *    notice, this list of conditions and the following disclaimer in the
48 *    documentation and/or other materials provided with the distribution.
49 *
50 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63#include <sys/param.h>
64#include <sys/module.h>
65#include <sys/systm.h>
66#include <sys/counter.h>
67#include <sys/errno.h>
68#include <sys/kernel.h>
69#include <sys/conf.h>
70#include <sys/uio.h>
71#include <sys/malloc.h>
72#include <sys/queue.h>
73#include <sys/lock.h>
74#include <sys/sx.h>
75#include <sys/mutex.h>
76#include <sys/proc.h>
77#include <sys/mount.h>
78#include <sys/sdt.h>
79#include <sys/vnode.h>
80#include <sys/signalvar.h>
81#include <sys/syscallsubr.h>
82#include <sys/sysctl.h>
83#include <vm/uma.h>
84
85#include "fuse.h"
86#include "fuse_node.h"
87#include "fuse_ipc.h"
88#include "fuse_internal.h"
89
90SDT_PROVIDER_DECLARE(fusefs);
91/*
92 * Fuse trace probe:
93 * arg0: verbosity.  Higher numbers give more verbose messages
94 * arg1: Textual message
95 */
96SDT_PROBE_DEFINE2(fusefs, , ipc, trace, "int", "char*");
97
98static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
99    struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred);
100static void fuse_interrupt_send(struct fuse_ticket *otick, int err);
101static struct fuse_ticket *fticket_alloc(struct fuse_data *data);
102static void fticket_refresh(struct fuse_ticket *ftick);
103static inline void fticket_reset(struct fuse_ticket *ftick);
104static void fticket_destroy(struct fuse_ticket *ftick);
105static int fticket_wait_answer(struct fuse_ticket *ftick);
106static inline int
107fticket_aw_pull_uio(struct fuse_ticket *ftick,
108    struct uio *uio);
109
110static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen);
111
112static fuse_handler_t fuse_standard_handler;
113
114static counter_u64_t fuse_ticket_count;
115SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, ticket_count, CTLFLAG_RD,
116    &fuse_ticket_count, "Number of allocated tickets");
117
118static long fuse_iov_permanent_bufsize = 1 << 19;
119
120SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW,
121    &fuse_iov_permanent_bufsize, 0,
122    "limit for permanently stored buffer size for fuse_iovs");
123static int fuse_iov_credit = 16;
124
125SYSCTL_INT(_vfs_fusefs, OID_AUTO, iov_credit, CTLFLAG_RW,
126    &fuse_iov_credit, 0,
127    "how many times is an oversized fuse_iov tolerated");
128
129MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer");
130static uma_zone_t ticket_zone;
131
132/*
133 * TODO: figure out how to timeout INTERRUPT requests, because the daemon may
134 * leagally never respond
135 */
136static int
137fuse_interrupt_callback(struct fuse_ticket *tick, struct uio *uio)
138{
139	struct fuse_ticket *otick, *x_tick;
140	struct fuse_interrupt_in *fii;
141	struct fuse_data *data = tick->tk_data;
142	bool found = false;
143
144	fii = (struct fuse_interrupt_in*)((char*)tick->tk_ms_fiov.base +
145		sizeof(struct fuse_in_header));
146
147	fuse_lck_mtx_lock(data->aw_mtx);
148	TAILQ_FOREACH_SAFE(otick, &data->aw_head, tk_aw_link, x_tick) {
149		if (otick->tk_unique == fii->unique) {
150			found = true;
151			break;
152		}
153	}
154	fuse_lck_mtx_unlock(data->aw_mtx);
155
156	if (!found) {
157		/* Original is already complete.  Just return */
158		return 0;
159	}
160
161	/* Clear the original ticket's interrupt association */
162	otick->irq_unique = 0;
163
164	if (tick->tk_aw_ohead.error == ENOSYS) {
165		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
166		return 0;
167	} else if (tick->tk_aw_ohead.error == EAGAIN) {
168		/*
169		 * There are two reasons we might get this:
170		 * 1) the daemon received the INTERRUPT request before the
171		 *    original, or
172		 * 2) the daemon received the INTERRUPT request after it
173		 *    completed the original request.
174		 * In the first case we should re-send the INTERRUPT.  In the
175		 * second, we should ignore it.
176		 */
177		/* Resend */
178		fuse_interrupt_send(otick, EINTR);
179		return 0;
180	} else {
181		/* Illegal FUSE_INTERRUPT response */
182		return EINVAL;
183	}
184}
185
186/* Interrupt the operation otick.  Return err as its error code */
187void
188fuse_interrupt_send(struct fuse_ticket *otick, int err)
189{
190	struct fuse_dispatcher fdi;
191	struct fuse_interrupt_in *fii;
192	struct fuse_in_header *ftick_hdr;
193	struct fuse_data *data = otick->tk_data;
194	struct fuse_ticket *tick, *xtick;
195	struct ucred reused_creds;
196	gid_t reused_groups[1];
197
198	if (otick->irq_unique == 0) {
199		/*
200		 * If the daemon hasn't yet received otick, then we can answer
201		 * it ourselves and return.
202		 */
203		fuse_lck_mtx_lock(data->ms_mtx);
204		STAILQ_FOREACH_SAFE(tick, &otick->tk_data->ms_head, tk_ms_link,
205			xtick) {
206			if (tick == otick) {
207				STAILQ_REMOVE(&otick->tk_data->ms_head, tick,
208					fuse_ticket, tk_ms_link);
209				otick->tk_data->ms_count--;
210				otick->tk_ms_link.stqe_next = NULL;
211				fuse_lck_mtx_unlock(data->ms_mtx);
212
213				fuse_lck_mtx_lock(otick->tk_aw_mtx);
214				if (!fticket_answered(otick)) {
215					fticket_set_answered(otick);
216					otick->tk_aw_errno = err;
217					wakeup(otick);
218				}
219				fuse_lck_mtx_unlock(otick->tk_aw_mtx);
220
221				fuse_ticket_drop(tick);
222				return;
223			}
224		}
225		fuse_lck_mtx_unlock(data->ms_mtx);
226
227		/*
228		 * If the fuse daemon doesn't support interrupts, then there's
229		 * nothing more that we can do
230		 */
231		if (fsess_not_impl(data->mp, FUSE_INTERRUPT))
232			return;
233
234		/*
235		 * If the fuse daemon has already received otick, then we must
236		 * send FUSE_INTERRUPT.
237		 */
238		ftick_hdr = fticket_in_header(otick);
239		reused_creds.cr_uid = ftick_hdr->uid;
240		reused_groups[0] = ftick_hdr->gid;
241		reused_creds.cr_groups = reused_groups;
242		fdisp_init(&fdi, sizeof(*fii));
243		fdisp_make_pid(&fdi, FUSE_INTERRUPT, data, ftick_hdr->nodeid,
244			ftick_hdr->pid, &reused_creds);
245
246		fii = fdi.indata;
247		fii->unique = otick->tk_unique;
248		fuse_insert_callback(fdi.tick, fuse_interrupt_callback);
249
250		otick->irq_unique = fdi.tick->tk_unique;
251		/* Interrupt ops should be delivered ASAP */
252		fuse_insert_message(fdi.tick, true);
253		fdisp_destroy(&fdi);
254	} else {
255		/* This ticket has already been interrupted */
256	}
257}
258
259void
260fiov_init(struct fuse_iov *fiov, size_t size)
261{
262	uint32_t msize = FU_AT_LEAST(size);
263
264	fiov->len = 0;
265
266	fiov->base = malloc(msize, M_FUSEMSG, M_WAITOK | M_ZERO);
267
268	fiov->allocated_size = msize;
269	fiov->credit = fuse_iov_credit;
270}
271
272void
273fiov_teardown(struct fuse_iov *fiov)
274{
275	MPASS(fiov->base != NULL);
276	free(fiov->base, M_FUSEMSG);
277}
278
279void
280fiov_adjust(struct fuse_iov *fiov, size_t size)
281{
282	if (fiov->allocated_size < size ||
283	    (fuse_iov_permanent_bufsize >= 0 &&
284	    fiov->allocated_size - size > fuse_iov_permanent_bufsize &&
285	    --fiov->credit < 0)) {
286		fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG,
287		    M_WAITOK | M_ZERO);
288		if (!fiov->base) {
289			panic("FUSE: realloc failed");
290		}
291		fiov->allocated_size = FU_AT_LEAST(size);
292		fiov->credit = fuse_iov_credit;
293		/* Clear data buffer after reallocation */
294		bzero(fiov->base, size);
295	} else if (size > fiov->len) {
296		/* Clear newly extended portion of data buffer */
297		bzero((char*)fiov->base + fiov->len, size - fiov->len);
298	}
299	fiov->len = size;
300}
301
302/* Resize the fiov if needed, and clear it's buffer */
303void
304fiov_refresh(struct fuse_iov *fiov)
305{
306	fiov_adjust(fiov, 0);
307}
308
309static int
310fticket_ctor(void *mem, int size, void *arg, int flags)
311{
312	struct fuse_ticket *ftick = mem;
313	struct fuse_data *data = arg;
314
315	FUSE_ASSERT_MS_DONE(ftick);
316	FUSE_ASSERT_AW_DONE(ftick);
317
318	ftick->tk_data = data;
319	ftick->irq_unique = 0;
320	refcount_init(&ftick->tk_refcount, 1);
321	counter_u64_add(fuse_ticket_count, 1);
322
323	fticket_refresh(ftick);
324
325	return 0;
326}
327
328static void
329fticket_dtor(void *mem, int size, void *arg)
330{
331#ifdef INVARIANTS
332	struct fuse_ticket *ftick = mem;
333#endif
334
335	FUSE_ASSERT_MS_DONE(ftick);
336	FUSE_ASSERT_AW_DONE(ftick);
337
338	counter_u64_add(fuse_ticket_count, -1);
339}
340
341static int
342fticket_init(void *mem, int size, int flags)
343{
344	struct fuse_ticket *ftick = mem;
345
346	bzero(ftick, sizeof(struct fuse_ticket));
347
348	fiov_init(&ftick->tk_ms_fiov, sizeof(struct fuse_in_header));
349
350	mtx_init(&ftick->tk_aw_mtx, "fuse answer delivery mutex", NULL, MTX_DEF);
351	fiov_init(&ftick->tk_aw_fiov, 0);
352
353	return 0;
354}
355
356static void
357fticket_fini(void *mem, int size)
358{
359	struct fuse_ticket *ftick = mem;
360
361	fiov_teardown(&ftick->tk_ms_fiov);
362	fiov_teardown(&ftick->tk_aw_fiov);
363	mtx_destroy(&ftick->tk_aw_mtx);
364}
365
366static inline struct fuse_ticket *
367fticket_alloc(struct fuse_data *data)
368{
369	return uma_zalloc_arg(ticket_zone, data, M_WAITOK);
370}
371
372static inline void
373fticket_destroy(struct fuse_ticket *ftick)
374{
375	return uma_zfree(ticket_zone, ftick);
376}
377
378/* Prepare the ticket to be reused and clear its data buffers */
379static inline void
380fticket_refresh(struct fuse_ticket *ftick)
381{
382	fticket_reset(ftick);
383
384	fiov_refresh(&ftick->tk_ms_fiov);
385	fiov_refresh(&ftick->tk_aw_fiov);
386}
387
388/* Prepare the ticket to be reused, but don't clear its data buffers */
389static inline void
390fticket_reset(struct fuse_ticket *ftick)
391{
392	struct fuse_data *data = ftick->tk_data;
393
394	FUSE_ASSERT_MS_DONE(ftick);
395	FUSE_ASSERT_AW_DONE(ftick);
396
397	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
398
399	ftick->tk_aw_errno = 0;
400	ftick->tk_flag = 0;
401
402	/* May be truncated to 32 bits on LP32 arches */
403	ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
404	if (ftick->tk_unique == 0)
405		ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
406}
407
408static int
409fticket_wait_answer(struct fuse_ticket *ftick)
410{
411	struct thread *td = curthread;
412	sigset_t blockedset, oldset;
413	int err = 0, stops_deferred;
414	struct fuse_data *data = ftick->tk_data;
415	bool interrupted = false;
416
417	if (fsess_maybe_impl(ftick->tk_data->mp, FUSE_INTERRUPT) &&
418	    data->dataflags & FSESS_INTR) {
419		SIGEMPTYSET(blockedset);
420	} else {
421		/* Block all signals except (implicitly) SIGKILL */
422		SIGFILLSET(blockedset);
423	}
424	stops_deferred = sigdeferstop(SIGDEFERSTOP_SILENT);
425	kern_sigprocmask(td, SIG_BLOCK, NULL, &oldset, 0);
426
427	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
428
429retry:
430	if (fticket_answered(ftick)) {
431		goto out;
432	}
433
434	if (fdata_get_dead(data)) {
435		err = ENOTCONN;
436		fticket_set_answered(ftick);
437		goto out;
438	}
439	kern_sigprocmask(td, SIG_BLOCK, &blockedset, NULL, 0);
440	err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans",
441	    data->daemon_timeout * hz);
442	kern_sigprocmask(td, SIG_SETMASK, &oldset, NULL, 0);
443	if (err == EWOULDBLOCK) {
444		SDT_PROBE2(fusefs, , ipc, trace, 3,
445			"fticket_wait_answer: EWOULDBLOCK");
446#ifdef XXXIP				/* die conditionally */
447		if (!fdata_get_dead(data)) {
448			fdata_set_dead(data);
449		}
450#endif
451		err = ETIMEDOUT;
452		fticket_set_answered(ftick);
453	} else if ((err == EINTR || err == ERESTART)) {
454		/*
455		 * Whether we get EINTR or ERESTART depends on whether
456		 * SA_RESTART was set by sigaction(2).
457		 *
458		 * Try to interrupt the operation and wait for an EINTR response
459		 * to the original operation.  If the file system does not
460		 * support FUSE_INTERRUPT, then we'll just wait for it to
461		 * complete like normal.  If it does support FUSE_INTERRUPT,
462		 * then it will either respond EINTR to the original operation,
463		 * or EAGAIN to the interrupt.
464		 */
465		sigset_t tmpset;
466
467		SDT_PROBE2(fusefs, , ipc, trace, 4,
468			"fticket_wait_answer: interrupt");
469		fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
470		fuse_interrupt_send(ftick, err);
471
472		PROC_LOCK(td->td_proc);
473		mtx_lock(&td->td_proc->p_sigacts->ps_mtx);
474		tmpset = td->td_proc->p_siglist;
475		SIGSETOR(tmpset, td->td_siglist);
476		mtx_unlock(&td->td_proc->p_sigacts->ps_mtx);
477		PROC_UNLOCK(td->td_proc);
478
479		fuse_lck_mtx_lock(ftick->tk_aw_mtx);
480		if (!interrupted && !SIGISMEMBER(tmpset, SIGKILL)) {
481			/*
482			 * Block all signals while we wait for an interrupt
483			 * response.  The protocol doesn't discriminate between
484			 * different signals.
485			 */
486			SIGFILLSET(blockedset);
487			interrupted = true;
488			goto retry;
489		} else {
490			/*
491			 * Return immediately for fatal signals, or if this is
492			 * the second interruption.  We should only be
493			 * interrupted twice if the thread is stopped, for
494			 * example during sigexit.
495			 */
496		}
497	} else if (err) {
498		SDT_PROBE2(fusefs, , ipc, trace, 6,
499			"fticket_wait_answer: other error");
500	} else {
501		SDT_PROBE2(fusefs, , ipc, trace, 7, "fticket_wait_answer: OK");
502	}
503out:
504	if (!(err || fticket_answered(ftick))) {
505		SDT_PROBE2(fusefs, , ipc, trace, 1,
506			"FUSE: requester was woken up but still no answer");
507		err = ENXIO;
508	}
509	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
510	sigallowstop(stops_deferred);
511
512	return err;
513}
514
515static	inline
516int
517fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio)
518{
519	int err = 0;
520	size_t len = uio_resid(uio);
521
522	if (len) {
523		fiov_adjust(fticket_resp(ftick), len);
524		err = uiomove(fticket_resp(ftick)->base, len, uio);
525	}
526	return err;
527}
528
529int
530fticket_pull(struct fuse_ticket *ftick, struct uio *uio)
531{
532	int err = 0;
533
534	if (ftick->tk_aw_ohead.error) {
535		return 0;
536	}
537	err = fuse_body_audit(ftick, uio_resid(uio));
538	if (!err) {
539		err = fticket_aw_pull_uio(ftick, uio);
540	}
541	return err;
542}
543
544struct fuse_data *
545fdata_alloc(struct cdev *fdev, struct ucred *cred)
546{
547	struct fuse_data *data;
548
549	data = malloc(sizeof(struct fuse_data), M_FUSEMSG, M_WAITOK | M_ZERO);
550
551	data->fdev = fdev;
552	mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF);
553	STAILQ_INIT(&data->ms_head);
554	data->ms_count = 0;
555	knlist_init_mtx(&data->ks_rsel.si_note, &data->ms_mtx);
556	mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF);
557	TAILQ_INIT(&data->aw_head);
558	data->daemoncred = crhold(cred);
559	data->daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT;
560	sx_init(&data->rename_lock, "fuse rename lock");
561	data->ref = 1;
562
563	return data;
564}
565
566void
567fdata_trydestroy(struct fuse_data *data)
568{
569	data->ref--;
570	MPASS(data->ref >= 0);
571	if (data->ref != 0)
572		return;
573
574	/* Driving off stage all that stuff thrown at device... */
575	sx_destroy(&data->rename_lock);
576	crfree(data->daemoncred);
577	mtx_destroy(&data->aw_mtx);
578	knlist_delete(&data->ks_rsel.si_note, curthread, 0);
579	knlist_destroy(&data->ks_rsel.si_note);
580	mtx_destroy(&data->ms_mtx);
581
582	free(data, M_FUSEMSG);
583}
584
585void
586fdata_set_dead(struct fuse_data *data)
587{
588	FUSE_LOCK();
589	if (fdata_get_dead(data)) {
590		FUSE_UNLOCK();
591		return;
592	}
593	fuse_lck_mtx_lock(data->ms_mtx);
594	data->dataflags |= FSESS_DEAD;
595	wakeup_one(data);
596	selwakeuppri(&data->ks_rsel, PZERO + 1);
597	wakeup(&data->ticketer);
598	fuse_lck_mtx_unlock(data->ms_mtx);
599	FUSE_UNLOCK();
600}
601
602struct fuse_ticket *
603fuse_ticket_fetch(struct fuse_data *data)
604{
605	int err = 0;
606	struct fuse_ticket *ftick;
607
608	ftick = fticket_alloc(data);
609
610	if (!(data->dataflags & FSESS_INITED)) {
611		/* Sleep until get answer for INIT message */
612		FUSE_LOCK();
613		if (!(data->dataflags & FSESS_INITED) && data->ticketer > 2) {
614			err = msleep(&data->ticketer, &fuse_mtx, PCATCH | PDROP,
615			    "fu_ini", 0);
616			if (err)
617				fdata_set_dead(data);
618		} else
619			FUSE_UNLOCK();
620	}
621	return ftick;
622}
623
624int
625fuse_ticket_drop(struct fuse_ticket *ftick)
626{
627	int die;
628
629	die = refcount_release(&ftick->tk_refcount);
630	if (die)
631		fticket_destroy(ftick);
632
633	return die;
634}
635
636void
637fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t * handler)
638{
639	if (fdata_get_dead(ftick->tk_data)) {
640		return;
641	}
642	ftick->tk_aw_handler = handler;
643
644	fuse_lck_mtx_lock(ftick->tk_data->aw_mtx);
645	fuse_aw_push(ftick);
646	fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx);
647}
648
649/*
650 * Insert a new upgoing ticket into the message queue
651 *
652 * If urgent is true, insert at the front of the queue.  Otherwise, insert in
653 * FIFO order.
654 */
655void
656fuse_insert_message(struct fuse_ticket *ftick, bool urgent)
657{
658	if (ftick->tk_flag & FT_DIRTY) {
659		panic("FUSE: ticket reused without being refreshed");
660	}
661	ftick->tk_flag |= FT_DIRTY;
662
663	if (fdata_get_dead(ftick->tk_data)) {
664		return;
665	}
666	fuse_lck_mtx_lock(ftick->tk_data->ms_mtx);
667	if (urgent)
668		fuse_ms_push_head(ftick);
669	else
670		fuse_ms_push(ftick);
671	wakeup_one(ftick->tk_data);
672	selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
673	KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
674	fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
675}
676
677static int
678fuse_body_audit(struct fuse_ticket *ftick, size_t blen)
679{
680	int err = 0;
681	enum fuse_opcode opcode;
682
683	opcode = fticket_opcode(ftick);
684
685	switch (opcode) {
686	case FUSE_BMAP:
687		err = (blen == sizeof(struct fuse_bmap_out)) ? 0 : EINVAL;
688		break;
689
690	case FUSE_LINK:
691	case FUSE_LOOKUP:
692	case FUSE_MKDIR:
693	case FUSE_MKNOD:
694	case FUSE_SYMLINK:
695		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
696			err = (blen == sizeof(struct fuse_entry_out)) ?
697				0 : EINVAL;
698		} else {
699			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE) ? 0 : EINVAL;
700		}
701		break;
702
703	case FUSE_FORGET:
704		panic("FUSE: a handler has been intalled for FUSE_FORGET");
705		break;
706
707	case FUSE_GETATTR:
708	case FUSE_SETATTR:
709		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
710			err = (blen == sizeof(struct fuse_attr_out)) ?
711			  0 : EINVAL;
712		} else {
713			err = (blen == FUSE_COMPAT_ATTR_OUT_SIZE) ? 0 : EINVAL;
714		}
715		break;
716
717	case FUSE_READLINK:
718		err = (PAGE_SIZE >= blen) ? 0 : EINVAL;
719		break;
720
721	case FUSE_UNLINK:
722		err = (blen == 0) ? 0 : EINVAL;
723		break;
724
725	case FUSE_RMDIR:
726		err = (blen == 0) ? 0 : EINVAL;
727		break;
728
729	case FUSE_RENAME:
730		err = (blen == 0) ? 0 : EINVAL;
731		break;
732
733	case FUSE_OPEN:
734		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
735		break;
736
737	case FUSE_READ:
738		err = (((struct fuse_read_in *)(
739		    (char *)ftick->tk_ms_fiov.base +
740		    sizeof(struct fuse_in_header)
741		    ))->size >= blen) ? 0 : EINVAL;
742		break;
743
744	case FUSE_WRITE:
745		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
746		break;
747
748	case FUSE_STATFS:
749		if (fuse_libabi_geq(ftick->tk_data, 7, 4)) {
750			err = (blen == sizeof(struct fuse_statfs_out)) ?
751			  0 : EINVAL;
752		} else {
753			err = (blen == FUSE_COMPAT_STATFS_SIZE) ? 0 : EINVAL;
754		}
755		break;
756
757	case FUSE_RELEASE:
758		err = (blen == 0) ? 0 : EINVAL;
759		break;
760
761	case FUSE_FSYNC:
762		err = (blen == 0) ? 0 : EINVAL;
763		break;
764
765	case FUSE_SETXATTR:
766		err = (blen == 0) ? 0 : EINVAL;
767		break;
768
769	case FUSE_GETXATTR:
770	case FUSE_LISTXATTR:
771		/*
772		 * These can have varying response lengths, and 0 length
773		 * isn't necessarily invalid.
774		 */
775		err = 0;
776		break;
777
778	case FUSE_REMOVEXATTR:
779		err = (blen == 0) ? 0 : EINVAL;
780		break;
781
782	case FUSE_FLUSH:
783		err = (blen == 0) ? 0 : EINVAL;
784		break;
785
786	case FUSE_INIT:
787		if (blen == sizeof(struct fuse_init_out) ||
788		    blen == FUSE_COMPAT_INIT_OUT_SIZE ||
789		    blen == FUSE_COMPAT_22_INIT_OUT_SIZE) {
790			err = 0;
791		} else {
792			err = EINVAL;
793		}
794		break;
795
796	case FUSE_OPENDIR:
797		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
798		break;
799
800	case FUSE_READDIR:
801		err = (((struct fuse_read_in *)(
802		    (char *)ftick->tk_ms_fiov.base +
803		    sizeof(struct fuse_in_header)
804		    ))->size >= blen) ? 0 : EINVAL;
805		break;
806
807	case FUSE_RELEASEDIR:
808		err = (blen == 0) ? 0 : EINVAL;
809		break;
810
811	case FUSE_FSYNCDIR:
812		err = (blen == 0) ? 0 : EINVAL;
813		break;
814
815	case FUSE_GETLK:
816		err = (blen == sizeof(struct fuse_lk_out)) ? 0 : EINVAL;
817		break;
818
819	case FUSE_SETLK:
820		err = (blen == 0) ? 0 : EINVAL;
821		break;
822
823	case FUSE_SETLKW:
824		err = (blen == 0) ? 0 : EINVAL;
825		break;
826
827	case FUSE_ACCESS:
828		err = (blen == 0) ? 0 : EINVAL;
829		break;
830
831	case FUSE_CREATE:
832		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
833			err = (blen == sizeof(struct fuse_entry_out) +
834			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
835		} else {
836			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE +
837			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
838		}
839		break;
840
841	case FUSE_DESTROY:
842		err = (blen == 0) ? 0 : EINVAL;
843		break;
844
845	case FUSE_FALLOCATE:
846		err = (blen == 0) ? 0 : EINVAL;
847		break;
848
849	case FUSE_LSEEK:
850		err = (blen == sizeof(struct fuse_lseek_out)) ? 0 : EINVAL;
851		break;
852
853	case FUSE_COPY_FILE_RANGE:
854		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
855		break;
856
857	default:
858		panic("FUSE: opcodes out of sync (%d)\n", opcode);
859	}
860
861	return err;
862}
863
864static inline void
865fuse_setup_ihead(struct fuse_in_header *ihead, struct fuse_ticket *ftick,
866    uint64_t nid, enum fuse_opcode op, size_t blen, pid_t pid,
867    struct ucred *cred)
868{
869	ihead->len = sizeof(*ihead) + blen;
870	ihead->unique = ftick->tk_unique;
871	ihead->nodeid = nid;
872	ihead->opcode = op;
873
874	ihead->pid = pid;
875	ihead->uid = cred->cr_uid;
876	ihead->gid = cred->cr_groups[0];
877}
878
879/*
880 * fuse_standard_handler just pulls indata and wakes up pretender.
881 * Doesn't try to interpret data, that's left for the pretender.
882 * Though might do a basic size verification before the pull-in takes place
883 */
884
885static int
886fuse_standard_handler(struct fuse_ticket *ftick, struct uio *uio)
887{
888	int err = 0;
889
890	err = fticket_pull(ftick, uio);
891
892	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
893
894	if (!fticket_answered(ftick)) {
895		fticket_set_answered(ftick);
896		ftick->tk_aw_errno = err;
897		wakeup(ftick);
898	}
899	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
900
901	return err;
902}
903
904/*
905 * Reinitialize a dispatcher from a pid and node id, without resizing or
906 * clearing its data buffers
907 */
908static void
909fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
910    struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
911{
912	MPASS(fdip->tick);
913	MPASS2(sizeof(fdip->finh) + fdip->iosize <= fdip->tick->tk_ms_fiov.len,
914		"Must use fdisp_make_pid to increase the size of the fiov");
915	fticket_reset(fdip->tick);
916
917	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
918	    fdip->indata, fdip->iosize);
919
920	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid,
921		cred);
922}
923
924/* Initialize a dispatcher from a pid and node id */
925static void
926fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
927    struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred)
928{
929	if (fdip->tick) {
930		fticket_refresh(fdip->tick);
931	} else {
932		fdip->tick = fuse_ticket_fetch(data);
933	}
934
935	/* FUSE_DIMALLOC will bzero the fiovs when it enlarges them */
936	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
937	    fdip->indata, fdip->iosize);
938
939	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred);
940}
941
942void
943fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp,
944    uint64_t nid, struct thread *td, struct ucred *cred)
945{
946	struct fuse_data *data = fuse_get_mpdata(mp);
947	RECTIFY_TDCR(td, cred);
948
949	return fdisp_make_pid(fdip, op, data, nid, td->td_proc->p_pid, cred);
950}
951
952void
953fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
954    struct vnode *vp, struct thread *td, struct ucred *cred)
955{
956	struct mount *mp = vnode_mount(vp);
957	struct fuse_data *data = fuse_get_mpdata(mp);
958
959	RECTIFY_TDCR(td, cred);
960	return fdisp_make_pid(fdip, op, data, VTOI(vp),
961	    td->td_proc->p_pid, cred);
962}
963
964/* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */
965void
966fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
967    struct vnode *vp, struct thread *td, struct ucred *cred)
968{
969	RECTIFY_TDCR(td, cred);
970	return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp),
971	    td->td_proc->p_pid, cred);
972}
973
974SDT_PROBE_DEFINE2(fusefs, , ipc, fdisp_wait_answ_error, "char*", "int");
975
976int
977fdisp_wait_answ(struct fuse_dispatcher *fdip)
978{
979	int err = 0;
980
981	fdip->answ_stat = 0;
982	fuse_insert_callback(fdip->tick, fuse_standard_handler);
983	fuse_insert_message(fdip->tick, false);
984
985	if ((err = fticket_wait_answer(fdip->tick))) {
986		fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx);
987
988		if (fticket_answered(fdip->tick)) {
989			/*
990	                 * Just between noticing the interrupt and getting here,
991	                 * the standard handler has completed his job.
992	                 * So we drop the ticket and exit as usual.
993	                 */
994			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
995				"IPC: interrupted, already answered", err);
996			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
997			goto out;
998		} else {
999			/*
1000	                 * So we were faster than the standard handler.
1001	                 * Then by setting the answered flag we get *him*
1002	                 * to drop the ticket.
1003	                 */
1004			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1005				"IPC: interrupted, setting to answered", err);
1006			fticket_set_answered(fdip->tick);
1007			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
1008			return err;
1009		}
1010	}
1011
1012	if (fdip->tick->tk_aw_errno == ENOTCONN) {
1013		/* The daemon died while we were waiting for a response */
1014		err = ENOTCONN;
1015		goto out;
1016	} else if (fdip->tick->tk_aw_errno) {
1017		/*
1018		 * There was some sort of communication error with the daemon
1019		 * that the client wouldn't understand.
1020		 */
1021		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1022			"IPC: explicit EIO-ing", fdip->tick->tk_aw_errno);
1023		err = EIO;
1024		goto out;
1025	}
1026	if ((err = fdip->tick->tk_aw_ohead.error)) {
1027		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1028			"IPC: setting status", fdip->tick->tk_aw_ohead.error);
1029		/*
1030	         * This means a "proper" fuse syscall error.
1031	         * We record this value so the caller will
1032	         * be able to know it's not a boring messaging
1033	         * failure, if she wishes so (and if not, she can
1034	         * just simply propagate the return value of this routine).
1035	         * [XXX Maybe a bitflag would do the job too,
1036	         * if other flags needed, this will be converted thusly.]
1037	         */
1038		fdip->answ_stat = err;
1039		goto out;
1040	}
1041	fdip->answ = fticket_resp(fdip->tick)->base;
1042	fdip->iosize = fticket_resp(fdip->tick)->len;
1043
1044	return 0;
1045
1046out:
1047	return err;
1048}
1049
1050void
1051fuse_ipc_init(void)
1052{
1053	ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket),
1054	    fticket_ctor, fticket_dtor, fticket_init, fticket_fini,
1055	    UMA_ALIGN_PTR, 0);
1056	fuse_ticket_count = counter_u64_alloc(M_WAITOK);
1057}
1058
1059void
1060fuse_ipc_destroy(void)
1061{
1062	counter_u64_free(fuse_ticket_count);
1063	uma_zdestroy(ticket_zone);
1064}
1065
1066SDT_PROBE_DEFINE3(fusefs,, ipc, warn, "struct fuse_data*", "unsigned", "char*");
1067void
1068fuse_warn(struct fuse_data *data, unsigned flag, const char *msg)
1069{
1070	SDT_PROBE3(fusefs, , ipc, warn, data, flag, msg);
1071	if (!(data->dataflags & flag)) {
1072		printf("WARNING: FUSE protocol violation for server mounted at "
1073		    "%s: %s  "
1074		    "This warning will not be repeated.\n",
1075		    data->mp->mnt_stat.f_mntonname, msg);
1076		data->dataflags |= flag;
1077	}
1078}
1079