1/*
2  FUSE: Filesystem in Userspace
3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5  This program can be distributed under the terms of the GNU GPL.
6  See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/poll.h>
14#include <linux/uio.h>
15#include <linux/miscdevice.h>
16#include <linux/pagemap.h>
17#include <linux/file.h>
18#include <linux/slab.h>
19#include <linux/pipe_fs_i.h>
20#include <linux/swap.h>
21#include <linux/splice.h>
22
23MODULE_ALIAS_MISCDEV(FUSE_MINOR);
24MODULE_ALIAS("devname:fuse");
25
26static struct kmem_cache *fuse_req_cachep;
27
28static struct fuse_conn *fuse_get_conn(struct file *file)
29{
30	/*
31	 * Lockless access is OK, because file->private data is set
32	 * once during mount and is valid until the file is released.
33	 */
34	return file->private_data;
35}
36
37static void fuse_request_init(struct fuse_req *req)
38{
39	memset(req, 0, sizeof(*req));
40	INIT_LIST_HEAD(&req->list);
41	INIT_LIST_HEAD(&req->intr_entry);
42	init_waitqueue_head(&req->waitq);
43	atomic_set(&req->count, 1);
44}
45
46struct fuse_req *fuse_request_alloc(void)
47{
48	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
49	if (req)
50		fuse_request_init(req);
51	return req;
52}
53EXPORT_SYMBOL_GPL(fuse_request_alloc);
54
55struct fuse_req *fuse_request_alloc_nofs(void)
56{
57	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
58	if (req)
59		fuse_request_init(req);
60	return req;
61}
62
63void fuse_request_free(struct fuse_req *req)
64{
65	kmem_cache_free(fuse_req_cachep, req);
66}
67
68static void block_sigs(sigset_t *oldset)
69{
70	sigset_t mask;
71
72	siginitsetinv(&mask, sigmask(SIGKILL));
73	sigprocmask(SIG_BLOCK, &mask, oldset);
74}
75
76static void restore_sigs(sigset_t *oldset)
77{
78	sigprocmask(SIG_SETMASK, oldset, NULL);
79}
80
81static void __fuse_get_request(struct fuse_req *req)
82{
83	atomic_inc(&req->count);
84}
85
86/* Must be called with > 1 refcount */
87static void __fuse_put_request(struct fuse_req *req)
88{
89	BUG_ON(atomic_read(&req->count) < 2);
90	atomic_dec(&req->count);
91}
92
93static void fuse_req_init_context(struct fuse_req *req)
94{
95	req->in.h.uid = current_fsuid();
96	req->in.h.gid = current_fsgid();
97	req->in.h.pid = current->pid;
98}
99
100struct fuse_req *fuse_get_req(struct fuse_conn *fc)
101{
102	struct fuse_req *req;
103	sigset_t oldset;
104	int intr;
105	int err;
106
107	atomic_inc(&fc->num_waiting);
108	block_sigs(&oldset);
109	intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
110	restore_sigs(&oldset);
111	err = -EINTR;
112	if (intr)
113		goto out;
114
115	err = -ENOTCONN;
116	if (!fc->connected)
117		goto out;
118
119	req = fuse_request_alloc();
120	err = -ENOMEM;
121	if (!req)
122		goto out;
123
124	fuse_req_init_context(req);
125	req->waiting = 1;
126	return req;
127
128 out:
129	atomic_dec(&fc->num_waiting);
130	return ERR_PTR(err);
131}
132EXPORT_SYMBOL_GPL(fuse_get_req);
133
134/*
135 * Return request in fuse_file->reserved_req.  However that may
136 * currently be in use.  If that is the case, wait for it to become
137 * available.
138 */
139static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
140					 struct file *file)
141{
142	struct fuse_req *req = NULL;
143	struct fuse_file *ff = file->private_data;
144
145	do {
146		wait_event(fc->reserved_req_waitq, ff->reserved_req);
147		spin_lock(&fc->lock);
148		if (ff->reserved_req) {
149			req = ff->reserved_req;
150			ff->reserved_req = NULL;
151			get_file(file);
152			req->stolen_file = file;
153		}
154		spin_unlock(&fc->lock);
155	} while (!req);
156
157	return req;
158}
159
160/*
161 * Put stolen request back into fuse_file->reserved_req
162 */
163static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
164{
165	struct file *file = req->stolen_file;
166	struct fuse_file *ff = file->private_data;
167
168	spin_lock(&fc->lock);
169	fuse_request_init(req);
170	BUG_ON(ff->reserved_req);
171	ff->reserved_req = req;
172	wake_up_all(&fc->reserved_req_waitq);
173	spin_unlock(&fc->lock);
174	fput(file);
175}
176
177/*
178 * Gets a requests for a file operation, always succeeds
179 *
180 * This is used for sending the FLUSH request, which must get to
181 * userspace, due to POSIX locks which may need to be unlocked.
182 *
183 * If allocation fails due to OOM, use the reserved request in
184 * fuse_file.
185 *
186 * This is very unlikely to deadlock accidentally, since the
187 * filesystem should not have it's own file open.  If deadlock is
188 * intentional, it can still be broken by "aborting" the filesystem.
189 */
190struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
191{
192	struct fuse_req *req;
193
194	atomic_inc(&fc->num_waiting);
195	wait_event(fc->blocked_waitq, !fc->blocked);
196	req = fuse_request_alloc();
197	if (!req)
198		req = get_reserved_req(fc, file);
199
200	fuse_req_init_context(req);
201	req->waiting = 1;
202	return req;
203}
204
205void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
206{
207	if (atomic_dec_and_test(&req->count)) {
208		if (req->waiting)
209			atomic_dec(&fc->num_waiting);
210
211		if (req->stolen_file)
212			put_reserved_req(fc, req);
213		else
214			fuse_request_free(req);
215	}
216}
217EXPORT_SYMBOL_GPL(fuse_put_request);
218
219static unsigned len_args(unsigned numargs, struct fuse_arg *args)
220{
221	unsigned nbytes = 0;
222	unsigned i;
223
224	for (i = 0; i < numargs; i++)
225		nbytes += args[i].size;
226
227	return nbytes;
228}
229
230static u64 fuse_get_unique(struct fuse_conn *fc)
231{
232	fc->reqctr++;
233	/* zero is special */
234	if (fc->reqctr == 0)
235		fc->reqctr = 1;
236
237	return fc->reqctr;
238}
239
240static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
241{
242	req->in.h.len = sizeof(struct fuse_in_header) +
243		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
244	list_add_tail(&req->list, &fc->pending);
245	req->state = FUSE_REQ_PENDING;
246	if (!req->waiting) {
247		req->waiting = 1;
248		atomic_inc(&fc->num_waiting);
249	}
250	wake_up(&fc->waitq);
251	kill_fasync(&fc->fasync, SIGIO, POLL_IN);
252}
253
254static void flush_bg_queue(struct fuse_conn *fc)
255{
256	while (fc->active_background < fc->max_background &&
257	       !list_empty(&fc->bg_queue)) {
258		struct fuse_req *req;
259
260		req = list_entry(fc->bg_queue.next, struct fuse_req, list);
261		list_del(&req->list);
262		fc->active_background++;
263		req->in.h.unique = fuse_get_unique(fc);
264		queue_request(fc, req);
265	}
266}
267
268/*
269 * This function is called when a request is finished.  Either a reply
270 * has arrived or it was aborted (and not yet sent) or some error
271 * occurred during communication with userspace, or the device file
272 * was closed.  The requester thread is woken up (if still waiting),
273 * the 'end' callback is called if given, else the reference to the
274 * request is released
275 *
276 * Called with fc->lock, unlocks it
277 */
278static void request_end(struct fuse_conn *fc, struct fuse_req *req)
279__releases(fc->lock)
280{
281	void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
282	req->end = NULL;
283	list_del(&req->list);
284	list_del(&req->intr_entry);
285	req->state = FUSE_REQ_FINISHED;
286	if (req->background) {
287		if (fc->num_background == fc->max_background) {
288			fc->blocked = 0;
289			wake_up_all(&fc->blocked_waitq);
290		}
291		if (fc->num_background == fc->congestion_threshold &&
292		    fc->connected && fc->bdi_initialized) {
293			clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
294			clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
295		}
296		fc->num_background--;
297		fc->active_background--;
298		flush_bg_queue(fc);
299	}
300	spin_unlock(&fc->lock);
301	wake_up(&req->waitq);
302	if (end)
303		end(fc, req);
304	fuse_put_request(fc, req);
305}
306
307static void wait_answer_interruptible(struct fuse_conn *fc,
308				      struct fuse_req *req)
309__releases(fc->lock)
310__acquires(fc->lock)
311{
312	if (signal_pending(current))
313		return;
314
315	spin_unlock(&fc->lock);
316	wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
317	spin_lock(&fc->lock);
318}
319
320static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
321{
322	list_add_tail(&req->intr_entry, &fc->interrupts);
323	wake_up(&fc->waitq);
324	kill_fasync(&fc->fasync, SIGIO, POLL_IN);
325}
326
327static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
328__releases(fc->lock)
329__acquires(fc->lock)
330{
331	if (!fc->no_interrupt) {
332		/* Any signal may interrupt this */
333		wait_answer_interruptible(fc, req);
334
335		if (req->aborted)
336			goto aborted;
337		if (req->state == FUSE_REQ_FINISHED)
338			return;
339
340		req->interrupted = 1;
341		if (req->state == FUSE_REQ_SENT)
342			queue_interrupt(fc, req);
343	}
344
345	if (!req->force) {
346		sigset_t oldset;
347
348		/* Only fatal signals may interrupt this */
349		block_sigs(&oldset);
350		wait_answer_interruptible(fc, req);
351		restore_sigs(&oldset);
352
353		if (req->aborted)
354			goto aborted;
355		if (req->state == FUSE_REQ_FINISHED)
356			return;
357
358		/* Request is not yet in userspace, bail out */
359		if (req->state == FUSE_REQ_PENDING) {
360			list_del(&req->list);
361			__fuse_put_request(req);
362			req->out.h.error = -EINTR;
363			return;
364		}
365	}
366
367	/*
368	 * Either request is already in userspace, or it was forced.
369	 * Wait it out.
370	 */
371	spin_unlock(&fc->lock);
372	wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
373	spin_lock(&fc->lock);
374
375	if (!req->aborted)
376		return;
377
378 aborted:
379	BUG_ON(req->state != FUSE_REQ_FINISHED);
380	if (req->locked) {
381		/* This is uninterruptible sleep, because data is
382		   being copied to/from the buffers of req.  During
383		   locked state, there mustn't be any filesystem
384		   operation (e.g. page fault), since that could lead
385		   to deadlock */
386		spin_unlock(&fc->lock);
387		wait_event(req->waitq, !req->locked);
388		spin_lock(&fc->lock);
389	}
390}
391
392void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
393{
394	req->isreply = 1;
395	spin_lock(&fc->lock);
396	if (!fc->connected)
397		req->out.h.error = -ENOTCONN;
398	else if (fc->conn_error)
399		req->out.h.error = -ECONNREFUSED;
400	else {
401		req->in.h.unique = fuse_get_unique(fc);
402		queue_request(fc, req);
403		/* acquire extra reference, since request is still needed
404		   after request_end() */
405		__fuse_get_request(req);
406
407		request_wait_answer(fc, req);
408	}
409	spin_unlock(&fc->lock);
410}
411EXPORT_SYMBOL_GPL(fuse_request_send);
412
413static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
414					    struct fuse_req *req)
415{
416	req->background = 1;
417	fc->num_background++;
418	if (fc->num_background == fc->max_background)
419		fc->blocked = 1;
420	if (fc->num_background == fc->congestion_threshold &&
421	    fc->bdi_initialized) {
422		set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
423		set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
424	}
425	list_add_tail(&req->list, &fc->bg_queue);
426	flush_bg_queue(fc);
427}
428
429static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
430{
431	spin_lock(&fc->lock);
432	if (fc->connected) {
433		fuse_request_send_nowait_locked(fc, req);
434		spin_unlock(&fc->lock);
435	} else {
436		req->out.h.error = -ENOTCONN;
437		request_end(fc, req);
438	}
439}
440
441void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
442{
443	req->isreply = 0;
444	fuse_request_send_nowait(fc, req);
445}
446
447void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
448{
449	req->isreply = 1;
450	fuse_request_send_nowait(fc, req);
451}
452EXPORT_SYMBOL_GPL(fuse_request_send_background);
453
454static int fuse_request_send_notify_reply(struct fuse_conn *fc,
455					  struct fuse_req *req, u64 unique)
456{
457	int err = -ENODEV;
458
459	req->isreply = 0;
460	req->in.h.unique = unique;
461	spin_lock(&fc->lock);
462	if (fc->connected) {
463		queue_request(fc, req);
464		err = 0;
465	}
466	spin_unlock(&fc->lock);
467
468	return err;
469}
470
471/*
472 * Called under fc->lock
473 *
474 * fc->connected must have been checked previously
475 */
476void fuse_request_send_background_locked(struct fuse_conn *fc,
477					 struct fuse_req *req)
478{
479	req->isreply = 1;
480	fuse_request_send_nowait_locked(fc, req);
481}
482
483/*
484 * Lock the request.  Up to the next unlock_request() there mustn't be
485 * anything that could cause a page-fault.  If the request was already
486 * aborted bail out.
487 */
488static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
489{
490	int err = 0;
491	if (req) {
492		spin_lock(&fc->lock);
493		if (req->aborted)
494			err = -ENOENT;
495		else
496			req->locked = 1;
497		spin_unlock(&fc->lock);
498	}
499	return err;
500}
501
502/*
503 * Unlock request.  If it was aborted during being locked, the
504 * requester thread is currently waiting for it to be unlocked, so
505 * wake it up.
506 */
507static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
508{
509	if (req) {
510		spin_lock(&fc->lock);
511		req->locked = 0;
512		if (req->aborted)
513			wake_up(&req->waitq);
514		spin_unlock(&fc->lock);
515	}
516}
517
518struct fuse_copy_state {
519	struct fuse_conn *fc;
520	int write;
521	struct fuse_req *req;
522	const struct iovec *iov;
523	struct pipe_buffer *pipebufs;
524	struct pipe_buffer *currbuf;
525	struct pipe_inode_info *pipe;
526	unsigned long nr_segs;
527	unsigned long seglen;
528	unsigned long addr;
529	struct page *pg;
530	void *mapaddr;
531	void *buf;
532	unsigned len;
533	unsigned move_pages:1;
534};
535
536static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
537			   int write,
538			   const struct iovec *iov, unsigned long nr_segs)
539{
540	memset(cs, 0, sizeof(*cs));
541	cs->fc = fc;
542	cs->write = write;
543	cs->iov = iov;
544	cs->nr_segs = nr_segs;
545}
546
547/* Unmap and put previous page of userspace buffer */
548static void fuse_copy_finish(struct fuse_copy_state *cs)
549{
550	if (cs->currbuf) {
551		struct pipe_buffer *buf = cs->currbuf;
552
553		if (!cs->write) {
554			buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
555		} else {
556			kunmap(buf->page);
557			buf->len = PAGE_SIZE - cs->len;
558		}
559		cs->currbuf = NULL;
560		cs->mapaddr = NULL;
561	} else if (cs->mapaddr) {
562		kunmap(cs->pg);
563		if (cs->write) {
564			flush_dcache_page(cs->pg);
565			set_page_dirty_lock(cs->pg);
566		}
567		put_page(cs->pg);
568		cs->mapaddr = NULL;
569	}
570}
571
572/*
573 * Get another pagefull of userspace buffer, and map it to kernel
574 * address space, and lock request
575 */
576static int fuse_copy_fill(struct fuse_copy_state *cs)
577{
578	unsigned long offset;
579	int err;
580
581	unlock_request(cs->fc, cs->req);
582	fuse_copy_finish(cs);
583	if (cs->pipebufs) {
584		struct pipe_buffer *buf = cs->pipebufs;
585
586		if (!cs->write) {
587			err = buf->ops->confirm(cs->pipe, buf);
588			if (err)
589				return err;
590
591			BUG_ON(!cs->nr_segs);
592			cs->currbuf = buf;
593			cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
594			cs->len = buf->len;
595			cs->buf = cs->mapaddr + buf->offset;
596			cs->pipebufs++;
597			cs->nr_segs--;
598		} else {
599			struct page *page;
600
601			if (cs->nr_segs == cs->pipe->buffers)
602				return -EIO;
603
604			page = alloc_page(GFP_HIGHUSER);
605			if (!page)
606				return -ENOMEM;
607
608			buf->page = page;
609			buf->offset = 0;
610			buf->len = 0;
611
612			cs->currbuf = buf;
613			cs->mapaddr = kmap(page);
614			cs->buf = cs->mapaddr;
615			cs->len = PAGE_SIZE;
616			cs->pipebufs++;
617			cs->nr_segs++;
618		}
619	} else {
620		if (!cs->seglen) {
621			BUG_ON(!cs->nr_segs);
622			cs->seglen = cs->iov[0].iov_len;
623			cs->addr = (unsigned long) cs->iov[0].iov_base;
624			cs->iov++;
625			cs->nr_segs--;
626		}
627		err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
628		if (err < 0)
629			return err;
630		BUG_ON(err != 1);
631		offset = cs->addr % PAGE_SIZE;
632		cs->mapaddr = kmap(cs->pg);
633		cs->buf = cs->mapaddr + offset;
634		cs->len = min(PAGE_SIZE - offset, cs->seglen);
635		cs->seglen -= cs->len;
636		cs->addr += cs->len;
637	}
638
639	return lock_request(cs->fc, cs->req);
640}
641
642/* Do as much copy to/from userspace buffer as we can */
643static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
644{
645	unsigned ncpy = min(*size, cs->len);
646	if (val) {
647		if (cs->write)
648			memcpy(cs->buf, *val, ncpy);
649		else
650			memcpy(*val, cs->buf, ncpy);
651		*val += ncpy;
652	}
653	*size -= ncpy;
654	cs->len -= ncpy;
655	cs->buf += ncpy;
656	return ncpy;
657}
658
659static int fuse_check_page(struct page *page)
660{
661	if (page_mapcount(page) ||
662	    page->mapping != NULL ||
663	    page_count(page) != 1 ||
664	    (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
665	     ~(1 << PG_locked |
666	       1 << PG_referenced |
667	       1 << PG_uptodate |
668	       1 << PG_lru |
669	       1 << PG_active |
670	       1 << PG_reclaim))) {
671		printk(KERN_WARNING "fuse: trying to steal weird page\n");
672		printk(KERN_WARNING "  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
673		return 1;
674	}
675	return 0;
676}
677
678static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
679{
680	int err;
681	struct page *oldpage = *pagep;
682	struct page *newpage;
683	struct pipe_buffer *buf = cs->pipebufs;
684	struct address_space *mapping;
685	pgoff_t index;
686
687	unlock_request(cs->fc, cs->req);
688	fuse_copy_finish(cs);
689
690	err = buf->ops->confirm(cs->pipe, buf);
691	if (err)
692		return err;
693
694	BUG_ON(!cs->nr_segs);
695	cs->currbuf = buf;
696	cs->len = buf->len;
697	cs->pipebufs++;
698	cs->nr_segs--;
699
700	if (cs->len != PAGE_SIZE)
701		goto out_fallback;
702
703	if (buf->ops->steal(cs->pipe, buf) != 0)
704		goto out_fallback;
705
706	newpage = buf->page;
707
708	if (WARN_ON(!PageUptodate(newpage)))
709		return -EIO;
710
711	ClearPageMappedToDisk(newpage);
712
713	if (fuse_check_page(newpage) != 0)
714		goto out_fallback_unlock;
715
716	mapping = oldpage->mapping;
717	index = oldpage->index;
718
719	/*
720	 * This is a new and locked page, it shouldn't be mapped or
721	 * have any special flags on it
722	 */
723	if (WARN_ON(page_mapped(oldpage)))
724		goto out_fallback_unlock;
725	if (WARN_ON(page_has_private(oldpage)))
726		goto out_fallback_unlock;
727	if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
728		goto out_fallback_unlock;
729	if (WARN_ON(PageMlocked(oldpage)))
730		goto out_fallback_unlock;
731
732	remove_from_page_cache(oldpage);
733	page_cache_release(oldpage);
734
735	err = add_to_page_cache_locked(newpage, mapping, index, GFP_KERNEL);
736	if (err) {
737		printk(KERN_WARNING "fuse_try_move_page: failed to add page");
738		goto out_fallback_unlock;
739	}
740	page_cache_get(newpage);
741
742	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
743		lru_cache_add_file(newpage);
744
745	err = 0;
746	spin_lock(&cs->fc->lock);
747	if (cs->req->aborted)
748		err = -ENOENT;
749	else
750		*pagep = newpage;
751	spin_unlock(&cs->fc->lock);
752
753	if (err) {
754		unlock_page(newpage);
755		page_cache_release(newpage);
756		return err;
757	}
758
759	unlock_page(oldpage);
760	page_cache_release(oldpage);
761	cs->len = 0;
762
763	return 0;
764
765out_fallback_unlock:
766	unlock_page(newpage);
767out_fallback:
768	cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
769	cs->buf = cs->mapaddr + buf->offset;
770
771	err = lock_request(cs->fc, cs->req);
772	if (err)
773		return err;
774
775	return 1;
776}
777
778static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
779			 unsigned offset, unsigned count)
780{
781	struct pipe_buffer *buf;
782
783	if (cs->nr_segs == cs->pipe->buffers)
784		return -EIO;
785
786	unlock_request(cs->fc, cs->req);
787	fuse_copy_finish(cs);
788
789	buf = cs->pipebufs;
790	page_cache_get(page);
791	buf->page = page;
792	buf->offset = offset;
793	buf->len = count;
794
795	cs->pipebufs++;
796	cs->nr_segs++;
797	cs->len = 0;
798
799	return 0;
800}
801
802/*
803 * Copy a page in the request to/from the userspace buffer.  Must be
804 * done atomically
805 */
806static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
807			  unsigned offset, unsigned count, int zeroing)
808{
809	int err;
810	struct page *page = *pagep;
811
812	if (page && zeroing && count < PAGE_SIZE) {
813		void *mapaddr = kmap_atomic(page, KM_USER1);
814		memset(mapaddr, 0, PAGE_SIZE);
815		kunmap_atomic(mapaddr, KM_USER1);
816	}
817	while (count) {
818		if (cs->write && cs->pipebufs && page) {
819			return fuse_ref_page(cs, page, offset, count);
820		} else if (!cs->len) {
821			if (cs->move_pages && page &&
822			    offset == 0 && count == PAGE_SIZE) {
823				err = fuse_try_move_page(cs, pagep);
824				if (err <= 0)
825					return err;
826			} else {
827				err = fuse_copy_fill(cs);
828				if (err)
829					return err;
830			}
831		}
832		if (page) {
833			void *mapaddr = kmap_atomic(page, KM_USER1);
834			void *buf = mapaddr + offset;
835			offset += fuse_copy_do(cs, &buf, &count);
836			kunmap_atomic(mapaddr, KM_USER1);
837		} else
838			offset += fuse_copy_do(cs, NULL, &count);
839	}
840	if (page && !cs->write)
841		flush_dcache_page(page);
842	return 0;
843}
844
845/* Copy pages in the request to/from userspace buffer */
846static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
847			   int zeroing)
848{
849	unsigned i;
850	struct fuse_req *req = cs->req;
851	unsigned offset = req->page_offset;
852	unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
853
854	for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
855		int err;
856
857		err = fuse_copy_page(cs, &req->pages[i], offset, count,
858				     zeroing);
859		if (err)
860			return err;
861
862		nbytes -= count;
863		count = min(nbytes, (unsigned) PAGE_SIZE);
864		offset = 0;
865	}
866	return 0;
867}
868
869/* Copy a single argument in the request to/from userspace buffer */
870static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
871{
872	while (size) {
873		if (!cs->len) {
874			int err = fuse_copy_fill(cs);
875			if (err)
876				return err;
877		}
878		fuse_copy_do(cs, &val, &size);
879	}
880	return 0;
881}
882
883/* Copy request arguments to/from userspace buffer */
884static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
885			  unsigned argpages, struct fuse_arg *args,
886			  int zeroing)
887{
888	int err = 0;
889	unsigned i;
890
891	for (i = 0; !err && i < numargs; i++)  {
892		struct fuse_arg *arg = &args[i];
893		if (i == numargs - 1 && argpages)
894			err = fuse_copy_pages(cs, arg->size, zeroing);
895		else
896			err = fuse_copy_one(cs, arg->value, arg->size);
897	}
898	return err;
899}
900
901static int request_pending(struct fuse_conn *fc)
902{
903	return !list_empty(&fc->pending) || !list_empty(&fc->interrupts);
904}
905
906/* Wait until a request is available on the pending list */
907static void request_wait(struct fuse_conn *fc)
908__releases(fc->lock)
909__acquires(fc->lock)
910{
911	DECLARE_WAITQUEUE(wait, current);
912
913	add_wait_queue_exclusive(&fc->waitq, &wait);
914	while (fc->connected && !request_pending(fc)) {
915		set_current_state(TASK_INTERRUPTIBLE);
916		if (signal_pending(current))
917			break;
918
919		spin_unlock(&fc->lock);
920		schedule();
921		spin_lock(&fc->lock);
922	}
923	set_current_state(TASK_RUNNING);
924	remove_wait_queue(&fc->waitq, &wait);
925}
926
927/*
928 * Transfer an interrupt request to userspace
929 *
930 * Unlike other requests this is assembled on demand, without a need
931 * to allocate a separate fuse_req structure.
932 *
933 * Called with fc->lock held, releases it
934 */
935static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
936			       size_t nbytes, struct fuse_req *req)
937__releases(fc->lock)
938{
939	struct fuse_in_header ih;
940	struct fuse_interrupt_in arg;
941	unsigned reqsize = sizeof(ih) + sizeof(arg);
942	int err;
943
944	list_del_init(&req->intr_entry);
945	req->intr_unique = fuse_get_unique(fc);
946	memset(&ih, 0, sizeof(ih));
947	memset(&arg, 0, sizeof(arg));
948	ih.len = reqsize;
949	ih.opcode = FUSE_INTERRUPT;
950	ih.unique = req->intr_unique;
951	arg.unique = req->in.h.unique;
952
953	spin_unlock(&fc->lock);
954	if (nbytes < reqsize)
955		return -EINVAL;
956
957	err = fuse_copy_one(cs, &ih, sizeof(ih));
958	if (!err)
959		err = fuse_copy_one(cs, &arg, sizeof(arg));
960	fuse_copy_finish(cs);
961
962	return err ? err : reqsize;
963}
964
965/*
966 * Read a single request into the userspace filesystem's buffer.  This
967 * function waits until a request is available, then removes it from
968 * the pending list and copies request data to userspace buffer.  If
969 * no reply is needed (FORGET) or request has been aborted or there
970 * was an error during the copying then it's finished by calling
971 * request_end().  Otherwise add it to the processing list, and set
972 * the 'sent' flag.
973 */
974static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
975				struct fuse_copy_state *cs, size_t nbytes)
976{
977	int err;
978	struct fuse_req *req;
979	struct fuse_in *in;
980	unsigned reqsize;
981
982 restart:
983	spin_lock(&fc->lock);
984	err = -EAGAIN;
985	if ((file->f_flags & O_NONBLOCK) && fc->connected &&
986	    !request_pending(fc))
987		goto err_unlock;
988
989	request_wait(fc);
990	err = -ENODEV;
991	if (!fc->connected)
992		goto err_unlock;
993	err = -ERESTARTSYS;
994	if (!request_pending(fc))
995		goto err_unlock;
996
997	if (!list_empty(&fc->interrupts)) {
998		req = list_entry(fc->interrupts.next, struct fuse_req,
999				 intr_entry);
1000		return fuse_read_interrupt(fc, cs, nbytes, req);
1001	}
1002
1003	req = list_entry(fc->pending.next, struct fuse_req, list);
1004	req->state = FUSE_REQ_READING;
1005	list_move(&req->list, &fc->io);
1006
1007	in = &req->in;
1008	reqsize = in->h.len;
1009	/* If request is too large, reply with an error and restart the read */
1010	if (nbytes < reqsize) {
1011		req->out.h.error = -EIO;
1012		/* SETXATTR is special, since it may contain too large data */
1013		if (in->h.opcode == FUSE_SETXATTR)
1014			req->out.h.error = -E2BIG;
1015		request_end(fc, req);
1016		goto restart;
1017	}
1018	spin_unlock(&fc->lock);
1019	cs->req = req;
1020	err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1021	if (!err)
1022		err = fuse_copy_args(cs, in->numargs, in->argpages,
1023				     (struct fuse_arg *) in->args, 0);
1024	fuse_copy_finish(cs);
1025	spin_lock(&fc->lock);
1026	req->locked = 0;
1027	if (req->aborted) {
1028		request_end(fc, req);
1029		return -ENODEV;
1030	}
1031	if (err) {
1032		req->out.h.error = -EIO;
1033		request_end(fc, req);
1034		return err;
1035	}
1036	if (!req->isreply)
1037		request_end(fc, req);
1038	else {
1039		req->state = FUSE_REQ_SENT;
1040		list_move_tail(&req->list, &fc->processing);
1041		if (req->interrupted)
1042			queue_interrupt(fc, req);
1043		spin_unlock(&fc->lock);
1044	}
1045	return reqsize;
1046
1047 err_unlock:
1048	spin_unlock(&fc->lock);
1049	return err;
1050}
1051
1052static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1053			      unsigned long nr_segs, loff_t pos)
1054{
1055	struct fuse_copy_state cs;
1056	struct file *file = iocb->ki_filp;
1057	struct fuse_conn *fc = fuse_get_conn(file);
1058	if (!fc)
1059		return -EPERM;
1060
1061	fuse_copy_init(&cs, fc, 1, iov, nr_segs);
1062
1063	return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
1064}
1065
1066static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
1067				   struct pipe_buffer *buf)
1068{
1069	return 1;
1070}
1071
1072static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
1073	.can_merge = 0,
1074	.map = generic_pipe_buf_map,
1075	.unmap = generic_pipe_buf_unmap,
1076	.confirm = generic_pipe_buf_confirm,
1077	.release = generic_pipe_buf_release,
1078	.steal = fuse_dev_pipe_buf_steal,
1079	.get = generic_pipe_buf_get,
1080};
1081
1082static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1083				    struct pipe_inode_info *pipe,
1084				    size_t len, unsigned int flags)
1085{
1086	int ret;
1087	int page_nr = 0;
1088	int do_wakeup = 0;
1089	struct pipe_buffer *bufs;
1090	struct fuse_copy_state cs;
1091	struct fuse_conn *fc = fuse_get_conn(in);
1092	if (!fc)
1093		return -EPERM;
1094
1095	bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
1096	if (!bufs)
1097		return -ENOMEM;
1098
1099	fuse_copy_init(&cs, fc, 1, NULL, 0);
1100	cs.pipebufs = bufs;
1101	cs.pipe = pipe;
1102	ret = fuse_dev_do_read(fc, in, &cs, len);
1103	if (ret < 0)
1104		goto out;
1105
1106	ret = 0;
1107	pipe_lock(pipe);
1108
1109	if (!pipe->readers) {
1110		send_sig(SIGPIPE, current, 0);
1111		if (!ret)
1112			ret = -EPIPE;
1113		goto out_unlock;
1114	}
1115
1116	if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1117		ret = -EIO;
1118		goto out_unlock;
1119	}
1120
1121	while (page_nr < cs.nr_segs) {
1122		int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1123		struct pipe_buffer *buf = pipe->bufs + newbuf;
1124
1125		buf->page = bufs[page_nr].page;
1126		buf->offset = bufs[page_nr].offset;
1127		buf->len = bufs[page_nr].len;
1128		buf->ops = &fuse_dev_pipe_buf_ops;
1129
1130		pipe->nrbufs++;
1131		page_nr++;
1132		ret += buf->len;
1133
1134		if (pipe->inode)
1135			do_wakeup = 1;
1136	}
1137
1138out_unlock:
1139	pipe_unlock(pipe);
1140
1141	if (do_wakeup) {
1142		smp_mb();
1143		if (waitqueue_active(&pipe->wait))
1144			wake_up_interruptible(&pipe->wait);
1145		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1146	}
1147
1148out:
1149	for (; page_nr < cs.nr_segs; page_nr++)
1150		page_cache_release(bufs[page_nr].page);
1151
1152	kfree(bufs);
1153	return ret;
1154}
1155
1156static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1157			    struct fuse_copy_state *cs)
1158{
1159	struct fuse_notify_poll_wakeup_out outarg;
1160	int err = -EINVAL;
1161
1162	if (size != sizeof(outarg))
1163		goto err;
1164
1165	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1166	if (err)
1167		goto err;
1168
1169	fuse_copy_finish(cs);
1170	return fuse_notify_poll_wakeup(fc, &outarg);
1171
1172err:
1173	fuse_copy_finish(cs);
1174	return err;
1175}
1176
1177static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1178				   struct fuse_copy_state *cs)
1179{
1180	struct fuse_notify_inval_inode_out outarg;
1181	int err = -EINVAL;
1182
1183	if (size != sizeof(outarg))
1184		goto err;
1185
1186	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1187	if (err)
1188		goto err;
1189	fuse_copy_finish(cs);
1190
1191	down_read(&fc->killsb);
1192	err = -ENOENT;
1193	if (fc->sb) {
1194		err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1195					       outarg.off, outarg.len);
1196	}
1197	up_read(&fc->killsb);
1198	return err;
1199
1200err:
1201	fuse_copy_finish(cs);
1202	return err;
1203}
1204
1205static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1206				   struct fuse_copy_state *cs)
1207{
1208	struct fuse_notify_inval_entry_out outarg;
1209	int err = -ENOMEM;
1210	char *buf;
1211	struct qstr name;
1212
1213	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1214	if (!buf)
1215		goto err;
1216
1217	err = -EINVAL;
1218	if (size < sizeof(outarg))
1219		goto err;
1220
1221	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1222	if (err)
1223		goto err;
1224
1225	err = -ENAMETOOLONG;
1226	if (outarg.namelen > FUSE_NAME_MAX)
1227		goto err;
1228
1229	name.name = buf;
1230	name.len = outarg.namelen;
1231	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1232	if (err)
1233		goto err;
1234	fuse_copy_finish(cs);
1235	buf[outarg.namelen] = 0;
1236	name.hash = full_name_hash(name.name, name.len);
1237
1238	down_read(&fc->killsb);
1239	err = -ENOENT;
1240	if (fc->sb)
1241		err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
1242	up_read(&fc->killsb);
1243	kfree(buf);
1244	return err;
1245
1246err:
1247	kfree(buf);
1248	fuse_copy_finish(cs);
1249	return err;
1250}
1251
1252static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1253			     struct fuse_copy_state *cs)
1254{
1255	struct fuse_notify_store_out outarg;
1256	struct inode *inode;
1257	struct address_space *mapping;
1258	u64 nodeid;
1259	int err;
1260	pgoff_t index;
1261	unsigned int offset;
1262	unsigned int num;
1263	loff_t file_size;
1264	loff_t end;
1265
1266	err = -EINVAL;
1267	if (size < sizeof(outarg))
1268		goto out_finish;
1269
1270	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1271	if (err)
1272		goto out_finish;
1273
1274	err = -EINVAL;
1275	if (size - sizeof(outarg) != outarg.size)
1276		goto out_finish;
1277
1278	nodeid = outarg.nodeid;
1279
1280	down_read(&fc->killsb);
1281
1282	err = -ENOENT;
1283	if (!fc->sb)
1284		goto out_up_killsb;
1285
1286	inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1287	if (!inode)
1288		goto out_up_killsb;
1289
1290	mapping = inode->i_mapping;
1291	index = outarg.offset >> PAGE_CACHE_SHIFT;
1292	offset = outarg.offset & ~PAGE_CACHE_MASK;
1293	file_size = i_size_read(inode);
1294	end = outarg.offset + outarg.size;
1295	if (end > file_size) {
1296		file_size = end;
1297		fuse_write_update_size(inode, file_size);
1298	}
1299
1300	num = outarg.size;
1301	while (num) {
1302		struct page *page;
1303		unsigned int this_num;
1304
1305		err = -ENOMEM;
1306		page = find_or_create_page(mapping, index,
1307					   mapping_gfp_mask(mapping));
1308		if (!page)
1309			goto out_iput;
1310
1311		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1312		err = fuse_copy_page(cs, &page, offset, this_num, 0);
1313		if (!err && offset == 0 && (num != 0 || file_size == end))
1314			SetPageUptodate(page);
1315		unlock_page(page);
1316		page_cache_release(page);
1317
1318		if (err)
1319			goto out_iput;
1320
1321		num -= this_num;
1322		offset = 0;
1323		index++;
1324	}
1325
1326	err = 0;
1327
1328out_iput:
1329	iput(inode);
1330out_up_killsb:
1331	up_read(&fc->killsb);
1332out_finish:
1333	fuse_copy_finish(cs);
1334	return err;
1335}
1336
1337static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1338{
1339	int i;
1340
1341	for (i = 0; i < req->num_pages; i++) {
1342		struct page *page = req->pages[i];
1343		page_cache_release(page);
1344	}
1345}
1346
1347static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1348			 struct fuse_notify_retrieve_out *outarg)
1349{
1350	int err;
1351	struct address_space *mapping = inode->i_mapping;
1352	struct fuse_req *req;
1353	pgoff_t index;
1354	loff_t file_size;
1355	unsigned int num;
1356	unsigned int offset;
1357	size_t total_len = 0;
1358
1359	req = fuse_get_req(fc);
1360	if (IS_ERR(req))
1361		return PTR_ERR(req);
1362
1363	offset = outarg->offset & ~PAGE_CACHE_MASK;
1364
1365	req->in.h.opcode = FUSE_NOTIFY_REPLY;
1366	req->in.h.nodeid = outarg->nodeid;
1367	req->in.numargs = 2;
1368	req->in.argpages = 1;
1369	req->page_offset = offset;
1370	req->end = fuse_retrieve_end;
1371
1372	index = outarg->offset >> PAGE_CACHE_SHIFT;
1373	file_size = i_size_read(inode);
1374	num = outarg->size;
1375	if (outarg->offset > file_size)
1376		num = 0;
1377	else if (outarg->offset + num > file_size)
1378		num = file_size - outarg->offset;
1379
1380	while (num) {
1381		struct page *page;
1382		unsigned int this_num;
1383
1384		page = find_get_page(mapping, index);
1385		if (!page)
1386			break;
1387
1388		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1389		req->pages[req->num_pages] = page;
1390		req->num_pages++;
1391
1392		num -= this_num;
1393		total_len += this_num;
1394	}
1395	req->misc.retrieve_in.offset = outarg->offset;
1396	req->misc.retrieve_in.size = total_len;
1397	req->in.args[0].size = sizeof(req->misc.retrieve_in);
1398	req->in.args[0].value = &req->misc.retrieve_in;
1399	req->in.args[1].size = total_len;
1400
1401	err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1402	if (err)
1403		fuse_retrieve_end(fc, req);
1404
1405	return err;
1406}
1407
1408static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1409				struct fuse_copy_state *cs)
1410{
1411	struct fuse_notify_retrieve_out outarg;
1412	struct inode *inode;
1413	int err;
1414
1415	err = -EINVAL;
1416	if (size != sizeof(outarg))
1417		goto copy_finish;
1418
1419	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1420	if (err)
1421		goto copy_finish;
1422
1423	fuse_copy_finish(cs);
1424
1425	down_read(&fc->killsb);
1426	err = -ENOENT;
1427	if (fc->sb) {
1428		u64 nodeid = outarg.nodeid;
1429
1430		inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1431		if (inode) {
1432			err = fuse_retrieve(fc, inode, &outarg);
1433			iput(inode);
1434		}
1435	}
1436	up_read(&fc->killsb);
1437
1438	return err;
1439
1440copy_finish:
1441	fuse_copy_finish(cs);
1442	return err;
1443}
1444
1445static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1446		       unsigned int size, struct fuse_copy_state *cs)
1447{
1448	switch (code) {
1449	case FUSE_NOTIFY_POLL:
1450		return fuse_notify_poll(fc, size, cs);
1451
1452	case FUSE_NOTIFY_INVAL_INODE:
1453		return fuse_notify_inval_inode(fc, size, cs);
1454
1455	case FUSE_NOTIFY_INVAL_ENTRY:
1456		return fuse_notify_inval_entry(fc, size, cs);
1457
1458	case FUSE_NOTIFY_STORE:
1459		return fuse_notify_store(fc, size, cs);
1460
1461	case FUSE_NOTIFY_RETRIEVE:
1462		return fuse_notify_retrieve(fc, size, cs);
1463
1464	default:
1465		fuse_copy_finish(cs);
1466		return -EINVAL;
1467	}
1468}
1469
1470/* Look up request on processing list by unique ID */
1471static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
1472{
1473	struct list_head *entry;
1474
1475	list_for_each(entry, &fc->processing) {
1476		struct fuse_req *req;
1477		req = list_entry(entry, struct fuse_req, list);
1478		if (req->in.h.unique == unique || req->intr_unique == unique)
1479			return req;
1480	}
1481	return NULL;
1482}
1483
1484static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1485			 unsigned nbytes)
1486{
1487	unsigned reqsize = sizeof(struct fuse_out_header);
1488
1489	if (out->h.error)
1490		return nbytes != reqsize ? -EINVAL : 0;
1491
1492	reqsize += len_args(out->numargs, out->args);
1493
1494	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1495		return -EINVAL;
1496	else if (reqsize > nbytes) {
1497		struct fuse_arg *lastarg = &out->args[out->numargs-1];
1498		unsigned diffsize = reqsize - nbytes;
1499		if (diffsize > lastarg->size)
1500			return -EINVAL;
1501		lastarg->size -= diffsize;
1502	}
1503	return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1504			      out->page_zeroing);
1505}
1506
1507/*
1508 * Write a single reply to a request.  First the header is copied from
1509 * the write buffer.  The request is then searched on the processing
1510 * list by the unique ID found in the header.  If found, then remove
1511 * it from the list and copy the rest of the buffer to the request.
1512 * The request is finished by calling request_end()
1513 */
1514static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1515				 struct fuse_copy_state *cs, size_t nbytes)
1516{
1517	int err;
1518	struct fuse_req *req;
1519	struct fuse_out_header oh;
1520
1521	if (nbytes < sizeof(struct fuse_out_header))
1522		return -EINVAL;
1523
1524	err = fuse_copy_one(cs, &oh, sizeof(oh));
1525	if (err)
1526		goto err_finish;
1527
1528	err = -EINVAL;
1529	if (oh.len != nbytes)
1530		goto err_finish;
1531
1532	/*
1533	 * Zero oh.unique indicates unsolicited notification message
1534	 * and error contains notification code.
1535	 */
1536	if (!oh.unique) {
1537		err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1538		return err ? err : nbytes;
1539	}
1540
1541	err = -EINVAL;
1542	if (oh.error <= -1000 || oh.error > 0)
1543		goto err_finish;
1544
1545	spin_lock(&fc->lock);
1546	err = -ENOENT;
1547	if (!fc->connected)
1548		goto err_unlock;
1549
1550	req = request_find(fc, oh.unique);
1551	if (!req)
1552		goto err_unlock;
1553
1554	if (req->aborted) {
1555		spin_unlock(&fc->lock);
1556		fuse_copy_finish(cs);
1557		spin_lock(&fc->lock);
1558		request_end(fc, req);
1559		return -ENOENT;
1560	}
1561	/* Is it an interrupt reply? */
1562	if (req->intr_unique == oh.unique) {
1563		err = -EINVAL;
1564		if (nbytes != sizeof(struct fuse_out_header))
1565			goto err_unlock;
1566
1567		if (oh.error == -ENOSYS)
1568			fc->no_interrupt = 1;
1569		else if (oh.error == -EAGAIN)
1570			queue_interrupt(fc, req);
1571
1572		spin_unlock(&fc->lock);
1573		fuse_copy_finish(cs);
1574		return nbytes;
1575	}
1576
1577	req->state = FUSE_REQ_WRITING;
1578	list_move(&req->list, &fc->io);
1579	req->out.h = oh;
1580	req->locked = 1;
1581	cs->req = req;
1582	if (!req->out.page_replace)
1583		cs->move_pages = 0;
1584	spin_unlock(&fc->lock);
1585
1586	err = copy_out_args(cs, &req->out, nbytes);
1587	fuse_copy_finish(cs);
1588
1589	spin_lock(&fc->lock);
1590	req->locked = 0;
1591	if (!err) {
1592		if (req->aborted)
1593			err = -ENOENT;
1594	} else if (!req->aborted)
1595		req->out.h.error = -EIO;
1596	request_end(fc, req);
1597
1598	return err ? err : nbytes;
1599
1600 err_unlock:
1601	spin_unlock(&fc->lock);
1602 err_finish:
1603	fuse_copy_finish(cs);
1604	return err;
1605}
1606
1607static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1608			      unsigned long nr_segs, loff_t pos)
1609{
1610	struct fuse_copy_state cs;
1611	struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1612	if (!fc)
1613		return -EPERM;
1614
1615	fuse_copy_init(&cs, fc, 0, iov, nr_segs);
1616
1617	return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
1618}
1619
1620static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1621				     struct file *out, loff_t *ppos,
1622				     size_t len, unsigned int flags)
1623{
1624	unsigned nbuf;
1625	unsigned idx;
1626	struct pipe_buffer *bufs;
1627	struct fuse_copy_state cs;
1628	struct fuse_conn *fc;
1629	size_t rem;
1630	ssize_t ret;
1631
1632	fc = fuse_get_conn(out);
1633	if (!fc)
1634		return -EPERM;
1635
1636	bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
1637	if (!bufs)
1638		return -ENOMEM;
1639
1640	pipe_lock(pipe);
1641	nbuf = 0;
1642	rem = 0;
1643	for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1644		rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1645
1646	ret = -EINVAL;
1647	if (rem < len) {
1648		pipe_unlock(pipe);
1649		goto out;
1650	}
1651
1652	rem = len;
1653	while (rem) {
1654		struct pipe_buffer *ibuf;
1655		struct pipe_buffer *obuf;
1656
1657		BUG_ON(nbuf >= pipe->buffers);
1658		BUG_ON(!pipe->nrbufs);
1659		ibuf = &pipe->bufs[pipe->curbuf];
1660		obuf = &bufs[nbuf];
1661
1662		if (rem >= ibuf->len) {
1663			*obuf = *ibuf;
1664			ibuf->ops = NULL;
1665			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1666			pipe->nrbufs--;
1667		} else {
1668			ibuf->ops->get(pipe, ibuf);
1669			*obuf = *ibuf;
1670			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1671			obuf->len = rem;
1672			ibuf->offset += obuf->len;
1673			ibuf->len -= obuf->len;
1674		}
1675		nbuf++;
1676		rem -= obuf->len;
1677	}
1678	pipe_unlock(pipe);
1679
1680	fuse_copy_init(&cs, fc, 0, NULL, nbuf);
1681	cs.pipebufs = bufs;
1682	cs.pipe = pipe;
1683
1684	if (flags & SPLICE_F_MOVE)
1685		cs.move_pages = 1;
1686
1687	ret = fuse_dev_do_write(fc, &cs, len);
1688
1689	for (idx = 0; idx < nbuf; idx++) {
1690		struct pipe_buffer *buf = &bufs[idx];
1691		buf->ops->release(pipe, buf);
1692	}
1693out:
1694	kfree(bufs);
1695	return ret;
1696}
1697
1698static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1699{
1700	unsigned mask = POLLOUT | POLLWRNORM;
1701	struct fuse_conn *fc = fuse_get_conn(file);
1702	if (!fc)
1703		return POLLERR;
1704
1705	poll_wait(file, &fc->waitq, wait);
1706
1707	spin_lock(&fc->lock);
1708	if (!fc->connected)
1709		mask = POLLERR;
1710	else if (request_pending(fc))
1711		mask |= POLLIN | POLLRDNORM;
1712	spin_unlock(&fc->lock);
1713
1714	return mask;
1715}
1716
1717/*
1718 * Abort all requests on the given list (pending or processing)
1719 *
1720 * This function releases and reacquires fc->lock
1721 */
1722static void end_requests(struct fuse_conn *fc, struct list_head *head)
1723__releases(fc->lock)
1724__acquires(fc->lock)
1725{
1726	while (!list_empty(head)) {
1727		struct fuse_req *req;
1728		req = list_entry(head->next, struct fuse_req, list);
1729		req->out.h.error = -ECONNABORTED;
1730		request_end(fc, req);
1731		spin_lock(&fc->lock);
1732	}
1733}
1734
1735/*
1736 * Abort requests under I/O
1737 *
1738 * The requests are set to aborted and finished, and the request
1739 * waiter is woken up.  This will make request_wait_answer() wait
1740 * until the request is unlocked and then return.
1741 *
1742 * If the request is asynchronous, then the end function needs to be
1743 * called after waiting for the request to be unlocked (if it was
1744 * locked).
1745 */
1746static void end_io_requests(struct fuse_conn *fc)
1747__releases(fc->lock)
1748__acquires(fc->lock)
1749{
1750	while (!list_empty(&fc->io)) {
1751		struct fuse_req *req =
1752			list_entry(fc->io.next, struct fuse_req, list);
1753		void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
1754
1755		req->aborted = 1;
1756		req->out.h.error = -ECONNABORTED;
1757		req->state = FUSE_REQ_FINISHED;
1758		list_del_init(&req->list);
1759		wake_up(&req->waitq);
1760		if (end) {
1761			req->end = NULL;
1762			__fuse_get_request(req);
1763			spin_unlock(&fc->lock);
1764			wait_event(req->waitq, !req->locked);
1765			end(fc, req);
1766			fuse_put_request(fc, req);
1767			spin_lock(&fc->lock);
1768		}
1769	}
1770}
1771
1772static void end_queued_requests(struct fuse_conn *fc)
1773__releases(fc->lock)
1774__acquires(fc->lock)
1775{
1776	fc->max_background = UINT_MAX;
1777	flush_bg_queue(fc);
1778	end_requests(fc, &fc->pending);
1779	end_requests(fc, &fc->processing);
1780}
1781
1782/*
1783 * Abort all requests.
1784 *
1785 * Emergency exit in case of a malicious or accidental deadlock, or
1786 * just a hung filesystem.
1787 *
1788 * The same effect is usually achievable through killing the
1789 * filesystem daemon and all users of the filesystem.  The exception
1790 * is the combination of an asynchronous request and the tricky
1791 * deadlock (see Documentation/filesystems/fuse.txt).
1792 *
1793 * During the aborting, progression of requests from the pending and
1794 * processing lists onto the io list, and progression of new requests
1795 * onto the pending list is prevented by req->connected being false.
1796 *
1797 * Progression of requests under I/O to the processing list is
1798 * prevented by the req->aborted flag being true for these requests.
1799 * For this reason requests on the io list must be aborted first.
1800 */
1801void fuse_abort_conn(struct fuse_conn *fc)
1802{
1803	spin_lock(&fc->lock);
1804	if (fc->connected) {
1805		fc->connected = 0;
1806		fc->blocked = 0;
1807		end_io_requests(fc);
1808		end_queued_requests(fc);
1809		wake_up_all(&fc->waitq);
1810		wake_up_all(&fc->blocked_waitq);
1811		kill_fasync(&fc->fasync, SIGIO, POLL_IN);
1812	}
1813	spin_unlock(&fc->lock);
1814}
1815EXPORT_SYMBOL_GPL(fuse_abort_conn);
1816
1817int fuse_dev_release(struct inode *inode, struct file *file)
1818{
1819	struct fuse_conn *fc = fuse_get_conn(file);
1820	if (fc) {
1821		spin_lock(&fc->lock);
1822		fc->connected = 0;
1823		fc->blocked = 0;
1824		end_queued_requests(fc);
1825		wake_up_all(&fc->blocked_waitq);
1826		spin_unlock(&fc->lock);
1827		fuse_conn_put(fc);
1828	}
1829
1830	return 0;
1831}
1832EXPORT_SYMBOL_GPL(fuse_dev_release);
1833
1834static int fuse_dev_fasync(int fd, struct file *file, int on)
1835{
1836	struct fuse_conn *fc = fuse_get_conn(file);
1837	if (!fc)
1838		return -EPERM;
1839
1840	/* No locking - fasync_helper does its own locking */
1841	return fasync_helper(fd, file, on, &fc->fasync);
1842}
1843
1844const struct file_operations fuse_dev_operations = {
1845	.owner		= THIS_MODULE,
1846	.llseek		= no_llseek,
1847	.read		= do_sync_read,
1848	.aio_read	= fuse_dev_read,
1849	.splice_read	= fuse_dev_splice_read,
1850	.write		= do_sync_write,
1851	.aio_write	= fuse_dev_write,
1852	.splice_write	= fuse_dev_splice_write,
1853	.poll		= fuse_dev_poll,
1854	.release	= fuse_dev_release,
1855	.fasync		= fuse_dev_fasync,
1856};
1857EXPORT_SYMBOL_GPL(fuse_dev_operations);
1858
1859static struct miscdevice fuse_miscdevice = {
1860	.minor = FUSE_MINOR,
1861	.name  = "fuse",
1862	.fops = &fuse_dev_operations,
1863};
1864
1865int __init fuse_dev_init(void)
1866{
1867	int err = -ENOMEM;
1868	fuse_req_cachep = kmem_cache_create("fuse_request",
1869					    sizeof(struct fuse_req),
1870					    0, 0, NULL);
1871	if (!fuse_req_cachep)
1872		goto out;
1873
1874	err = misc_register(&fuse_miscdevice);
1875	if (err)
1876		goto out_cache_clean;
1877
1878	return 0;
1879
1880 out_cache_clean:
1881	kmem_cache_destroy(fuse_req_cachep);
1882 out:
1883	return err;
1884}
1885
1886void fuse_dev_cleanup(void)
1887{
1888	misc_deregister(&fuse_miscdevice);
1889	kmem_cache_destroy(fuse_req_cachep);
1890}
1891