1/* Modified by Broadcom Corp. Portions Copyright (c) Broadcom Corp, 2012. */
2/*
3 * "splice": joining two ropes together by interweaving their strands.
4 *
5 * This is the "extended pipe" functionality, where a pipe is used as
6 * an arbitrary in-memory buffer. Think of a pipe as a small kernel
7 * buffer that you can use to transfer data from one end to the other.
8 *
9 * The traditional unix read/write is extended with a "splice()" operation
10 * that transfers data buffers to or from a pipe buffer.
11 *
12 * Named by Larry McVoy, original implementation from Linus, extended by
13 * Jens to support splicing to files, network, direct splicing, etc and
14 * fixing lots of bugs.
15 *
16 * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk>
17 * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
18 * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
19 *
20 */
21#include <linux/fs.h>
22#include <linux/file.h>
23#include <linux/pagemap.h>
24#include <linux/splice.h>
25#include <linux/memcontrol.h>
26#include <linux/mm_inline.h>
27#include <linux/swap.h>
28#include <linux/writeback.h>
29#include <linux/buffer_head.h>
30#include <linux/module.h>
31#include <linux/syscalls.h>
32#include <linux/uio.h>
33#include <linux/security.h>
34#include <linux/gfp.h>
35
36#include <typedefs.h>
37#include <bcmdefs.h>
38#if defined(CONFIG_BCM_RECVFILE)
39#include <net/sock.h>
40#endif /* CONFIG_BCM_RECVFILE */
41
42/*
43 * Attempt to steal a page from a pipe buffer. This should perhaps go into
44 * a vm helper function, it's already simplified quite a bit by the
45 * addition of remove_mapping(). If success is returned, the caller may
46 * attempt to reuse this page for another destination.
47 */
48static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
49				     struct pipe_buffer *buf)
50{
51	struct page *page = buf->page;
52	struct address_space *mapping;
53
54	lock_page(page);
55
56	mapping = page_mapping(page);
57	if (mapping) {
58		WARN_ON(!PageUptodate(page));
59
60		/*
61		 * At least for ext2 with nobh option, we need to wait on
62		 * writeback completing on this page, since we'll remove it
63		 * from the pagecache.  Otherwise truncate wont wait on the
64		 * page, allowing the disk blocks to be reused by someone else
65		 * before we actually wrote our data to them. fs corruption
66		 * ensues.
67		 */
68		wait_on_page_writeback(page);
69
70		if (page_has_private(page) &&
71		    !try_to_release_page(page, GFP_KERNEL))
72			goto out_unlock;
73
74		/*
75		 * If we succeeded in removing the mapping, set LRU flag
76		 * and return good.
77		 */
78		if (remove_mapping(mapping, page)) {
79			buf->flags |= PIPE_BUF_FLAG_LRU;
80			return 0;
81		}
82	}
83
84	/*
85	 * Raced with truncate or failed to remove page from current
86	 * address space, unlock and return failure.
87	 */
88out_unlock:
89	unlock_page(page);
90	return 1;
91}
92
93static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
94					struct pipe_buffer *buf)
95{
96	page_cache_release(buf->page);
97	buf->flags &= ~PIPE_BUF_FLAG_LRU;
98}
99
100/*
101 * Check whether the contents of buf is OK to access. Since the content
102 * is a page cache page, IO may be in flight.
103 */
104static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
105				       struct pipe_buffer *buf)
106{
107	struct page *page = buf->page;
108	int err;
109
110	if (!PageUptodate(page)) {
111		lock_page(page);
112
113		/*
114		 * Page got truncated/unhashed. This will cause a 0-byte
115		 * splice, if this is the first page.
116		 */
117		if (!page->mapping) {
118			err = -ENODATA;
119			goto error;
120		}
121
122		/*
123		 * Uh oh, read-error from disk.
124		 */
125		if (!PageUptodate(page)) {
126			err = -EIO;
127			goto error;
128		}
129
130		/*
131		 * Page is ok afterall, we are done.
132		 */
133		unlock_page(page);
134	}
135
136	return 0;
137error:
138	unlock_page(page);
139	return err;
140}
141
142static const struct pipe_buf_operations page_cache_pipe_buf_ops = {
143	.can_merge = 0,
144	.map = generic_pipe_buf_map,
145	.unmap = generic_pipe_buf_unmap,
146	.confirm = page_cache_pipe_buf_confirm,
147	.release = page_cache_pipe_buf_release,
148	.steal = page_cache_pipe_buf_steal,
149	.get = generic_pipe_buf_get,
150};
151
152static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
153				    struct pipe_buffer *buf)
154{
155	if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
156		return 1;
157
158	buf->flags |= PIPE_BUF_FLAG_LRU;
159	return generic_pipe_buf_steal(pipe, buf);
160}
161
162static const struct pipe_buf_operations user_page_pipe_buf_ops = {
163	.can_merge = 0,
164	.map = generic_pipe_buf_map,
165	.unmap = generic_pipe_buf_unmap,
166	.confirm = generic_pipe_buf_confirm,
167	.release = page_cache_pipe_buf_release,
168	.steal = user_page_pipe_buf_steal,
169	.get = generic_pipe_buf_get,
170};
171
172/**
173 * splice_to_pipe - fill passed data into a pipe
174 * @pipe:	pipe to fill
175 * @spd:	data to fill
176 *
177 * Description:
178 *    @spd contains a map of pages and len/offset tuples, along with
179 *    the struct pipe_buf_operations associated with these pages. This
180 *    function will link that data to the pipe.
181 *
182 */
183ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
184		       struct splice_pipe_desc *spd)
185{
186	unsigned int spd_pages = spd->nr_pages;
187	int ret, do_wakeup, page_nr;
188
189	ret = 0;
190	do_wakeup = 0;
191	page_nr = 0;
192
193	pipe_lock(pipe);
194
195	for (;;) {
196		if (!pipe->readers) {
197			send_sig(SIGPIPE, current, 0);
198			if (!ret)
199				ret = -EPIPE;
200			break;
201		}
202
203		if (pipe->nrbufs < pipe->buffers) {
204			int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
205			struct pipe_buffer *buf = pipe->bufs + newbuf;
206
207			buf->page = spd->pages[page_nr];
208			buf->offset = spd->partial[page_nr].offset;
209			buf->len = spd->partial[page_nr].len;
210			buf->private = spd->partial[page_nr].private;
211			buf->ops = spd->ops;
212			if (spd->flags & SPLICE_F_GIFT)
213				buf->flags |= PIPE_BUF_FLAG_GIFT;
214
215			pipe->nrbufs++;
216			page_nr++;
217			ret += buf->len;
218
219			if (pipe->inode)
220				do_wakeup = 1;
221
222			if (!--spd->nr_pages)
223				break;
224			if (pipe->nrbufs < pipe->buffers)
225				continue;
226
227			break;
228		}
229
230		if (spd->flags & SPLICE_F_NONBLOCK) {
231			if (!ret)
232				ret = -EAGAIN;
233			break;
234		}
235
236		if (signal_pending(current)) {
237			if (!ret)
238				ret = -ERESTARTSYS;
239			break;
240		}
241
242		if (do_wakeup) {
243			smp_mb();
244			if (waitqueue_active(&pipe->wait))
245				wake_up_interruptible_sync(&pipe->wait);
246			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
247			do_wakeup = 0;
248		}
249
250		pipe->waiting_writers++;
251		pipe_wait(pipe);
252		pipe->waiting_writers--;
253	}
254
255	pipe_unlock(pipe);
256
257	if (do_wakeup) {
258		smp_mb();
259		if (waitqueue_active(&pipe->wait))
260			wake_up_interruptible(&pipe->wait);
261		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
262	}
263
264	while (page_nr < spd_pages)
265		spd->spd_release(spd, page_nr++);
266
267	return ret;
268}
269
270static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
271{
272	page_cache_release(spd->pages[i]);
273}
274
275/*
276 * Check if we need to grow the arrays holding pages and partial page
277 * descriptions.
278 */
279int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
280{
281	if (pipe->buffers <= PIPE_DEF_BUFFERS)
282		return 0;
283
284	spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL);
285	spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL);
286
287	if (spd->pages && spd->partial)
288		return 0;
289
290	kfree(spd->pages);
291	kfree(spd->partial);
292	return -ENOMEM;
293}
294
295void splice_shrink_spd(struct pipe_inode_info *pipe,
296		       struct splice_pipe_desc *spd)
297{
298	if (pipe->buffers <= PIPE_DEF_BUFFERS)
299		return;
300
301	kfree(spd->pages);
302	kfree(spd->partial);
303}
304
305static int BCMFASTPATH_HOST
306__generic_file_splice_read(struct file *in, loff_t *ppos,
307			   struct pipe_inode_info *pipe, size_t len,
308			   unsigned int flags)
309{
310	struct address_space *mapping = in->f_mapping;
311	unsigned int loff, nr_pages, req_pages;
312	struct page *pages[PIPE_DEF_BUFFERS];
313	struct partial_page partial[PIPE_DEF_BUFFERS];
314	struct page *page;
315	pgoff_t index, end_index;
316	loff_t isize;
317	int error, page_nr;
318	struct splice_pipe_desc spd = {
319		.pages = pages,
320		.partial = partial,
321		.flags = flags,
322		.ops = &page_cache_pipe_buf_ops,
323		.spd_release = spd_release_page,
324	};
325
326	if (splice_grow_spd(pipe, &spd))
327		return -ENOMEM;
328
329	index = *ppos >> PAGE_CACHE_SHIFT;
330	loff = *ppos & ~PAGE_CACHE_MASK;
331	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
332	nr_pages = min(req_pages, pipe->buffers);
333
334	/*
335	 * Lookup the (hopefully) full range of pages we need.
336	 */
337	spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages);
338	index += spd.nr_pages;
339
340	/*
341	 * If find_get_pages_contig() returned fewer pages than we needed,
342	 * readahead/allocate the rest and fill in the holes.
343	 */
344	if (spd.nr_pages < nr_pages)
345		page_cache_sync_readahead(mapping, &in->f_ra, in,
346				index, req_pages - spd.nr_pages);
347
348	error = 0;
349	while (spd.nr_pages < nr_pages) {
350		/*
351		 * Page could be there, find_get_pages_contig() breaks on
352		 * the first hole.
353		 */
354		page = find_get_page(mapping, index);
355		if (!page) {
356			/*
357			 * page didn't exist, allocate one.
358			 */
359			page = page_cache_alloc_cold(mapping);
360			if (!page)
361				break;
362
363			error = add_to_page_cache_lru(page, mapping, index,
364						GFP_KERNEL);
365			if (unlikely(error)) {
366				page_cache_release(page);
367				if (error == -EEXIST)
368					continue;
369				break;
370			}
371			/*
372			 * add_to_page_cache() locks the page, unlock it
373			 * to avoid convoluting the logic below even more.
374			 */
375			unlock_page(page);
376		}
377
378		spd.pages[spd.nr_pages++] = page;
379		index++;
380	}
381
382	/*
383	 * Now loop over the map and see if we need to start IO on any
384	 * pages, fill in the partial map, etc.
385	 */
386	index = *ppos >> PAGE_CACHE_SHIFT;
387	nr_pages = spd.nr_pages;
388	spd.nr_pages = 0;
389	for (page_nr = 0; page_nr < nr_pages; page_nr++) {
390		unsigned int this_len;
391
392		if (!len)
393			break;
394
395		/*
396		 * this_len is the max we'll use from this page
397		 */
398		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
399		page = spd.pages[page_nr];
400
401		if (PageReadahead(page))
402			page_cache_async_readahead(mapping, &in->f_ra, in,
403					page, index, req_pages - page_nr);
404
405		/*
406		 * If the page isn't uptodate, we may need to start io on it
407		 */
408		if (!PageUptodate(page)) {
409			lock_page(page);
410
411			/*
412			 * Page was truncated, or invalidated by the
413			 * filesystem.  Redo the find/create, but this time the
414			 * page is kept locked, so there's no chance of another
415			 * race with truncate/invalidate.
416			 */
417			if (!page->mapping) {
418				unlock_page(page);
419				page = find_or_create_page(mapping, index,
420						mapping_gfp_mask(mapping));
421
422				if (!page) {
423					error = -ENOMEM;
424					break;
425				}
426				page_cache_release(spd.pages[page_nr]);
427				spd.pages[page_nr] = page;
428			}
429			/*
430			 * page was already under io and is now done, great
431			 */
432			if (PageUptodate(page)) {
433				unlock_page(page);
434				goto fill_it;
435			}
436
437			/*
438			 * need to read in the page
439			 */
440			error = mapping->a_ops->readpage(in, page);
441			if (unlikely(error)) {
442				/*
443				 * We really should re-lookup the page here,
444				 * but it complicates things a lot. Instead
445				 * lets just do what we already stored, and
446				 * we'll get it the next time we are called.
447				 */
448				if (error == AOP_TRUNCATED_PAGE)
449					error = 0;
450
451				break;
452			}
453		}
454fill_it:
455		/*
456		 * i_size must be checked after PageUptodate.
457		 */
458		isize = i_size_read(mapping->host);
459		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
460		if (unlikely(!isize || index > end_index))
461			break;
462
463		/*
464		 * if this is the last page, see if we need to shrink
465		 * the length and stop
466		 */
467		if (end_index == index) {
468			unsigned int plen;
469
470			/*
471			 * max good bytes in this page
472			 */
473			plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
474			if (plen <= loff)
475				break;
476
477			/*
478			 * force quit after adding this page
479			 */
480			this_len = min(this_len, plen - loff);
481			len = this_len;
482		}
483
484		spd.partial[page_nr].offset = loff;
485		spd.partial[page_nr].len = this_len;
486		len -= this_len;
487		loff = 0;
488		spd.nr_pages++;
489		index++;
490	}
491
492	/*
493	 * Release any pages at the end, if we quit early. 'page_nr' is how far
494	 * we got, 'nr_pages' is how many pages are in the map.
495	 */
496	while (page_nr < nr_pages)
497		page_cache_release(spd.pages[page_nr++]);
498	in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
499
500	if (spd.nr_pages)
501		error = splice_to_pipe(pipe, &spd);
502
503	splice_shrink_spd(pipe, &spd);
504	return error;
505}
506
507/**
508 * generic_file_splice_read - splice data from file to a pipe
509 * @in:		file to splice from
510 * @ppos:	position in @in
511 * @pipe:	pipe to splice to
512 * @len:	number of bytes to splice
513 * @flags:	splice modifier flags
514 *
515 * Description:
516 *    Will read pages from given file and fill them into a pipe. Can be
517 *    used as long as the address_space operations for the source implements
518 *    a readpage() hook.
519 *
520 */
521ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
522				 struct pipe_inode_info *pipe, size_t len,
523				 unsigned int flags)
524{
525	loff_t isize, left;
526	int ret;
527
528	isize = i_size_read(in->f_mapping->host);
529	if (unlikely(*ppos >= isize))
530		return 0;
531
532	left = isize - *ppos;
533	if (unlikely(left < len))
534		len = left;
535
536	ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
537	if (ret > 0) {
538		*ppos += ret;
539		file_accessed(in);
540	}
541
542	return ret;
543}
544EXPORT_SYMBOL(generic_file_splice_read);
545
546static const struct pipe_buf_operations default_pipe_buf_ops = {
547	.can_merge = 0,
548	.map = generic_pipe_buf_map,
549	.unmap = generic_pipe_buf_unmap,
550	.confirm = generic_pipe_buf_confirm,
551	.release = generic_pipe_buf_release,
552	.steal = generic_pipe_buf_steal,
553	.get = generic_pipe_buf_get,
554};
555
556static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
557			    unsigned long vlen, loff_t offset)
558{
559	mm_segment_t old_fs;
560	loff_t pos = offset;
561	ssize_t res;
562
563	old_fs = get_fs();
564	set_fs(get_ds());
565	/* The cast to a user pointer is valid due to the set_fs() */
566	res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos);
567	set_fs(old_fs);
568
569	return res;
570}
571
572static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
573			    loff_t pos)
574{
575	mm_segment_t old_fs;
576	ssize_t res;
577
578	old_fs = get_fs();
579	set_fs(get_ds());
580	/* The cast to a user pointer is valid due to the set_fs() */
581	res = vfs_write(file, (const char __user *)buf, count, &pos);
582	set_fs(old_fs);
583
584	return res;
585}
586
587ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
588				 struct pipe_inode_info *pipe, size_t len,
589				 unsigned int flags)
590{
591	unsigned int nr_pages;
592	unsigned int nr_freed;
593	size_t offset;
594	struct page *pages[PIPE_DEF_BUFFERS];
595	struct partial_page partial[PIPE_DEF_BUFFERS];
596	struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
597	ssize_t res;
598	size_t this_len;
599	int error;
600	int i;
601	struct splice_pipe_desc spd = {
602		.pages = pages,
603		.partial = partial,
604		.flags = flags,
605		.ops = &default_pipe_buf_ops,
606		.spd_release = spd_release_page,
607	};
608
609	if (splice_grow_spd(pipe, &spd))
610		return -ENOMEM;
611
612	res = -ENOMEM;
613	vec = __vec;
614	if (pipe->buffers > PIPE_DEF_BUFFERS) {
615		vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL);
616		if (!vec)
617			goto shrink_ret;
618	}
619
620	offset = *ppos & ~PAGE_CACHE_MASK;
621	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
622
623	for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) {
624		struct page *page;
625
626		page = alloc_page(GFP_USER);
627		error = -ENOMEM;
628		if (!page)
629			goto err;
630
631		this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
632		vec[i].iov_base = (void __user *) page_address(page);
633		vec[i].iov_len = this_len;
634		spd.pages[i] = page;
635		spd.nr_pages++;
636		len -= this_len;
637		offset = 0;
638	}
639
640	res = kernel_readv(in, vec, spd.nr_pages, *ppos);
641	if (res < 0) {
642		error = res;
643		goto err;
644	}
645
646	error = 0;
647	if (!res)
648		goto err;
649
650	nr_freed = 0;
651	for (i = 0; i < spd.nr_pages; i++) {
652		this_len = min_t(size_t, vec[i].iov_len, res);
653		spd.partial[i].offset = 0;
654		spd.partial[i].len = this_len;
655		if (!this_len) {
656			__free_page(spd.pages[i]);
657			spd.pages[i] = NULL;
658			nr_freed++;
659		}
660		res -= this_len;
661	}
662	spd.nr_pages -= nr_freed;
663
664	res = splice_to_pipe(pipe, &spd);
665	if (res > 0)
666		*ppos += res;
667
668shrink_ret:
669	if (vec != __vec)
670		kfree(vec);
671	splice_shrink_spd(pipe, &spd);
672	return res;
673
674err:
675	for (i = 0; i < spd.nr_pages; i++)
676		__free_page(spd.pages[i]);
677
678	res = error;
679	goto shrink_ret;
680}
681EXPORT_SYMBOL(default_file_splice_read);
682
683/*
684 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
685 * using sendpage(). Return the number of bytes sent.
686 */
687static int pipe_to_sendpage(struct pipe_inode_info *pipe,
688			    struct pipe_buffer *buf, struct splice_desc *sd)
689{
690	struct file *file = sd->u.file;
691	loff_t pos = sd->pos;
692	int ret, more;
693
694	ret = buf->ops->confirm(pipe, buf);
695	if (!ret) {
696		more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
697		if (file->f_op && file->f_op->sendpage)
698			ret = file->f_op->sendpage(file, buf->page, buf->offset,
699						   sd->len, &pos, more);
700		else
701			ret = -EINVAL;
702	}
703
704	return ret;
705}
706
707/*
708 * This is a little more tricky than the file -> pipe splicing. There are
709 * basically three cases:
710 *
711 *	- Destination page already exists in the address space and there
712 *	  are users of it. For that case we have no other option that
713 *	  copying the data. Tough luck.
714 *	- Destination page already exists in the address space, but there
715 *	  are no users of it. Make sure it's uptodate, then drop it. Fall
716 *	  through to last case.
717 *	- Destination page does not exist, we can add the pipe page to
718 *	  the page cache and avoid the copy.
719 *
720 * If asked to move pages to the output file (SPLICE_F_MOVE is set in
721 * sd->flags), we attempt to migrate pages from the pipe to the output
722 * file address space page cache. This is possible if no one else has
723 * the pipe page referenced outside of the pipe and page cache. If
724 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
725 * a new page in the output file page cache and fill/dirty that.
726 */
727int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
728		 struct splice_desc *sd)
729{
730	struct file *file = sd->u.file;
731	struct address_space *mapping = file->f_mapping;
732	unsigned int offset, this_len;
733	struct page *page;
734	void *fsdata;
735	int ret;
736
737	/*
738	 * make sure the data in this buffer is uptodate
739	 */
740	ret = buf->ops->confirm(pipe, buf);
741	if (unlikely(ret))
742		return ret;
743
744	offset = sd->pos & ~PAGE_CACHE_MASK;
745
746	this_len = sd->len;
747	if (this_len + offset > PAGE_CACHE_SIZE)
748		this_len = PAGE_CACHE_SIZE - offset;
749
750	ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
751				AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
752	if (unlikely(ret))
753		goto out;
754
755	if (buf->page != page) {
756		/*
757		 * Careful, ->map() uses KM_USER0!
758		 */
759		char *src = buf->ops->map(pipe, buf, 1);
760		char *dst = kmap_atomic(page, KM_USER1);
761
762		memcpy(dst + offset, src + buf->offset, this_len);
763		flush_dcache_page(page);
764		kunmap_atomic(dst, KM_USER1);
765		buf->ops->unmap(pipe, buf, src);
766	}
767	ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
768				page, fsdata);
769out:
770	return ret;
771}
772EXPORT_SYMBOL(pipe_to_file);
773
774static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
775{
776	smp_mb();
777	if (waitqueue_active(&pipe->wait))
778		wake_up_interruptible(&pipe->wait);
779	kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
780}
781
782/**
783 * splice_from_pipe_feed - feed available data from a pipe to a file
784 * @pipe:	pipe to splice from
785 * @sd:		information to @actor
786 * @actor:	handler that splices the data
787 *
788 * Description:
789 *    This function loops over the pipe and calls @actor to do the
790 *    actual moving of a single struct pipe_buffer to the desired
791 *    destination.  It returns when there's no more buffers left in
792 *    the pipe or if the requested number of bytes (@sd->total_len)
793 *    have been copied.  It returns a positive number (one) if the
794 *    pipe needs to be filled with more data, zero if the required
795 *    number of bytes have been copied and -errno on error.
796 *
797 *    This, together with splice_from_pipe_{begin,end,next}, may be
798 *    used to implement the functionality of __splice_from_pipe() when
799 *    locking is required around copying the pipe buffers to the
800 *    destination.
801 */
802int BCMFASTPATH_HOST splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
803			  splice_actor *actor)
804{
805	int ret;
806
807	while (pipe->nrbufs) {
808		struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
809		const struct pipe_buf_operations *ops = buf->ops;
810
811		sd->len = buf->len;
812		if (sd->len > sd->total_len)
813			sd->len = sd->total_len;
814
815		ret = actor(pipe, buf, sd);
816		if (ret <= 0) {
817			if (ret == -ENODATA)
818				ret = 0;
819			return ret;
820		}
821		buf->offset += ret;
822		buf->len -= ret;
823
824		sd->num_spliced += ret;
825		sd->len -= ret;
826		sd->pos += ret;
827		sd->total_len -= ret;
828
829		if (!buf->len) {
830			buf->ops = NULL;
831			ops->release(pipe, buf);
832			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
833			pipe->nrbufs--;
834			if (pipe->inode)
835				sd->need_wakeup = true;
836		}
837
838		if (!sd->total_len)
839			return 0;
840	}
841
842	return 1;
843}
844EXPORT_SYMBOL(splice_from_pipe_feed);
845
846/**
847 * splice_from_pipe_next - wait for some data to splice from
848 * @pipe:	pipe to splice from
849 * @sd:		information about the splice operation
850 *
851 * Description:
852 *    This function will wait for some data and return a positive
853 *    value (one) if pipe buffers are available.  It will return zero
854 *    or -errno if no more data needs to be spliced.
855 */
856int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
857{
858	while (!pipe->nrbufs) {
859		if (!pipe->writers)
860			return 0;
861
862		if (!pipe->waiting_writers && sd->num_spliced)
863			return 0;
864
865		if (sd->flags & SPLICE_F_NONBLOCK)
866			return -EAGAIN;
867
868		if (signal_pending(current))
869			return -ERESTARTSYS;
870
871		if (sd->need_wakeup) {
872			wakeup_pipe_writers(pipe);
873			sd->need_wakeup = false;
874		}
875
876		pipe_wait(pipe);
877	}
878
879	return 1;
880}
881EXPORT_SYMBOL(splice_from_pipe_next);
882
883/**
884 * splice_from_pipe_begin - start splicing from pipe
885 * @sd:		information about the splice operation
886 *
887 * Description:
888 *    This function should be called before a loop containing
889 *    splice_from_pipe_next() and splice_from_pipe_feed() to
890 *    initialize the necessary fields of @sd.
891 */
892void splice_from_pipe_begin(struct splice_desc *sd)
893{
894	sd->num_spliced = 0;
895	sd->need_wakeup = false;
896}
897EXPORT_SYMBOL(splice_from_pipe_begin);
898
899/**
900 * splice_from_pipe_end - finish splicing from pipe
901 * @pipe:	pipe to splice from
902 * @sd:		information about the splice operation
903 *
904 * Description:
905 *    This function will wake up pipe writers if necessary.  It should
906 *    be called after a loop containing splice_from_pipe_next() and
907 *    splice_from_pipe_feed().
908 */
909void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
910{
911	if (sd->need_wakeup)
912		wakeup_pipe_writers(pipe);
913}
914EXPORT_SYMBOL(splice_from_pipe_end);
915
916/**
917 * __splice_from_pipe - splice data from a pipe to given actor
918 * @pipe:	pipe to splice from
919 * @sd:		information to @actor
920 * @actor:	handler that splices the data
921 *
922 * Description:
923 *    This function does little more than loop over the pipe and call
924 *    @actor to do the actual moving of a single struct pipe_buffer to
925 *    the desired destination. See pipe_to_file, pipe_to_sendpage, or
926 *    pipe_to_user.
927 *
928 */
929ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
930			   splice_actor *actor)
931{
932	int ret;
933
934	splice_from_pipe_begin(sd);
935	do {
936		ret = splice_from_pipe_next(pipe, sd);
937		if (ret > 0)
938			ret = splice_from_pipe_feed(pipe, sd, actor);
939	} while (ret > 0);
940	splice_from_pipe_end(pipe, sd);
941
942	return sd->num_spliced ? sd->num_spliced : ret;
943}
944EXPORT_SYMBOL(__splice_from_pipe);
945
946/**
947 * splice_from_pipe - splice data from a pipe to a file
948 * @pipe:	pipe to splice from
949 * @out:	file to splice to
950 * @ppos:	position in @out
951 * @len:	how many bytes to splice
952 * @flags:	splice modifier flags
953 * @actor:	handler that splices the data
954 *
955 * Description:
956 *    See __splice_from_pipe. This function locks the pipe inode,
957 *    otherwise it's identical to __splice_from_pipe().
958 *
959 */
960ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
961			 loff_t *ppos, size_t len, unsigned int flags,
962			 splice_actor *actor)
963{
964	ssize_t ret;
965	struct splice_desc sd = {
966		.total_len = len,
967		.flags = flags,
968		.pos = *ppos,
969		.u.file = out,
970	};
971
972	pipe_lock(pipe);
973	ret = __splice_from_pipe(pipe, &sd, actor);
974	pipe_unlock(pipe);
975
976	return ret;
977}
978
979/**
980 * generic_file_splice_write - splice data from a pipe to a file
981 * @pipe:	pipe info
982 * @out:	file to write to
983 * @ppos:	position in @out
984 * @len:	number of bytes to splice
985 * @flags:	splice modifier flags
986 *
987 * Description:
988 *    Will either move or copy pages (determined by @flags options) from
989 *    the given pipe inode to the given file.
990 *
991 */
992ssize_t
993generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
994			  loff_t *ppos, size_t len, unsigned int flags)
995{
996	struct address_space *mapping = out->f_mapping;
997	struct inode *inode = mapping->host;
998	struct splice_desc sd = {
999		.total_len = len,
1000		.flags = flags,
1001		.pos = *ppos,
1002		.u.file = out,
1003	};
1004	ssize_t ret;
1005
1006	pipe_lock(pipe);
1007
1008	splice_from_pipe_begin(&sd);
1009	do {
1010		ret = splice_from_pipe_next(pipe, &sd);
1011		if (ret <= 0)
1012			break;
1013
1014		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1015		ret = file_remove_suid(out);
1016		if (!ret) {
1017			file_update_time(out);
1018			ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
1019		}
1020		mutex_unlock(&inode->i_mutex);
1021	} while (ret > 0);
1022	splice_from_pipe_end(pipe, &sd);
1023
1024	pipe_unlock(pipe);
1025
1026	if (sd.num_spliced)
1027		ret = sd.num_spliced;
1028
1029	if (ret > 0) {
1030		unsigned long nr_pages;
1031		int err;
1032
1033		nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1034
1035		err = generic_write_sync(out, *ppos, ret);
1036		if (err)
1037			ret = err;
1038		else
1039			*ppos += ret;
1040		balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
1041	}
1042
1043	return ret;
1044}
1045
1046EXPORT_SYMBOL(generic_file_splice_write);
1047
1048static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1049			  struct splice_desc *sd)
1050{
1051	int ret;
1052	void *data;
1053
1054	ret = buf->ops->confirm(pipe, buf);
1055	if (ret)
1056		return ret;
1057
1058	data = buf->ops->map(pipe, buf, 0);
1059	ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
1060	buf->ops->unmap(pipe, buf, data);
1061
1062	return ret;
1063}
1064
1065static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
1066					 struct file *out, loff_t *ppos,
1067					 size_t len, unsigned int flags)
1068{
1069	ssize_t ret;
1070
1071	ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
1072	if (ret > 0)
1073		*ppos += ret;
1074
1075	return ret;
1076}
1077
1078/**
1079 * generic_splice_sendpage - splice data from a pipe to a socket
1080 * @pipe:	pipe to splice from
1081 * @out:	socket to write to
1082 * @ppos:	position in @out
1083 * @len:	number of bytes to splice
1084 * @flags:	splice modifier flags
1085 *
1086 * Description:
1087 *    Will send @len bytes from the pipe to a network socket. No data copying
1088 *    is involved.
1089 *
1090 */
1091ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
1092				loff_t *ppos, size_t len, unsigned int flags)
1093{
1094	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
1095}
1096
1097EXPORT_SYMBOL(generic_splice_sendpage);
1098
1099/*
1100 * Attempt to initiate a splice from pipe to file.
1101 */
1102static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
1103			   loff_t *ppos, size_t len, unsigned int flags)
1104{
1105	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
1106				loff_t *, size_t, unsigned int);
1107	int ret;
1108
1109	if (unlikely(!(out->f_mode & FMODE_WRITE)))
1110		return -EBADF;
1111
1112	if (unlikely(out->f_flags & O_APPEND))
1113		return -EINVAL;
1114
1115	ret = rw_verify_area(WRITE, out, ppos, len);
1116	if (unlikely(ret < 0))
1117		return ret;
1118
1119	if (out->f_op && out->f_op->splice_write)
1120		splice_write = out->f_op->splice_write;
1121	else
1122		splice_write = default_file_splice_write;
1123
1124	return splice_write(pipe, out, ppos, len, flags);
1125}
1126
1127/*
1128 * Attempt to initiate a splice from a file to a pipe.
1129 */
1130static long do_splice_to(struct file *in, loff_t *ppos,
1131			 struct pipe_inode_info *pipe, size_t len,
1132			 unsigned int flags)
1133{
1134	ssize_t (*splice_read)(struct file *, loff_t *,
1135			       struct pipe_inode_info *, size_t, unsigned int);
1136	int ret;
1137
1138	if (unlikely(!(in->f_mode & FMODE_READ)))
1139		return -EBADF;
1140
1141	ret = rw_verify_area(READ, in, ppos, len);
1142	if (unlikely(ret < 0))
1143		return ret;
1144
1145	if (in->f_op && in->f_op->splice_read)
1146		splice_read = in->f_op->splice_read;
1147	else
1148		splice_read = default_file_splice_read;
1149
1150	return splice_read(in, ppos, pipe, len, flags);
1151}
1152
1153/**
1154 * splice_direct_to_actor - splices data directly between two non-pipes
1155 * @in:		file to splice from
1156 * @sd:		actor information on where to splice to
1157 * @actor:	handles the data splicing
1158 *
1159 * Description:
1160 *    This is a special case helper to splice directly between two
1161 *    points, without requiring an explicit pipe. Internally an allocated
1162 *    pipe is cached in the process, and reused during the lifetime of
1163 *    that process.
1164 *
1165 */
1166ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1167			       splice_direct_actor *actor)
1168{
1169	struct pipe_inode_info *pipe;
1170	long ret, bytes;
1171	umode_t i_mode;
1172	size_t len;
1173	int i, flags;
1174
1175	/*
1176	 * We require the input being a regular file, as we don't want to
1177	 * randomly drop data for eg socket -> socket splicing. Use the
1178	 * piped splicing for that!
1179	 */
1180	i_mode = in->f_path.dentry->d_inode->i_mode;
1181	if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
1182		return -EINVAL;
1183
1184	/*
1185	 * neither in nor out is a pipe, setup an internal pipe attached to
1186	 * 'out' and transfer the wanted data from 'in' to 'out' through that
1187	 */
1188	pipe = current->splice_pipe;
1189	if (unlikely(!pipe)) {
1190		pipe = alloc_pipe_info(NULL);
1191		if (!pipe)
1192			return -ENOMEM;
1193
1194		/*
1195		 * We don't have an immediate reader, but we'll read the stuff
1196		 * out of the pipe right after the splice_to_pipe(). So set
1197		 * PIPE_READERS appropriately.
1198		 */
1199		pipe->readers = 1;
1200
1201		current->splice_pipe = pipe;
1202	}
1203
1204	/*
1205	 * Do the splice.
1206	 */
1207	ret = 0;
1208	bytes = 0;
1209	len = sd->total_len;
1210	flags = sd->flags;
1211
1212	/*
1213	 * Don't block on output, we have to drain the direct pipe.
1214	 */
1215	sd->flags &= ~SPLICE_F_NONBLOCK;
1216
1217	while (len) {
1218		size_t read_len;
1219		loff_t pos = sd->pos, prev_pos = pos;
1220
1221		ret = do_splice_to(in, &pos, pipe, len, flags);
1222		if (unlikely(ret <= 0))
1223			goto out_release;
1224
1225		read_len = ret;
1226		sd->total_len = read_len;
1227
1228		/*
1229		 * NOTE: nonblocking mode only applies to the input. We
1230		 * must not do the output in nonblocking mode as then we
1231		 * could get stuck data in the internal pipe:
1232		 */
1233		ret = actor(pipe, sd);
1234		if (unlikely(ret <= 0)) {
1235			sd->pos = prev_pos;
1236			goto out_release;
1237		}
1238
1239		bytes += ret;
1240		len -= ret;
1241		sd->pos = pos;
1242
1243		if (ret < read_len) {
1244			sd->pos = prev_pos + ret;
1245			goto out_release;
1246		}
1247	}
1248
1249done:
1250	pipe->nrbufs = pipe->curbuf = 0;
1251	file_accessed(in);
1252	return bytes;
1253
1254out_release:
1255	/*
1256	 * If we did an incomplete transfer we must release
1257	 * the pipe buffers in question:
1258	 */
1259	for (i = 0; i < pipe->buffers; i++) {
1260		struct pipe_buffer *buf = pipe->bufs + i;
1261
1262		if (buf->ops) {
1263			buf->ops->release(pipe, buf);
1264			buf->ops = NULL;
1265		}
1266	}
1267
1268	if (!bytes)
1269		bytes = ret;
1270
1271	goto done;
1272}
1273EXPORT_SYMBOL(splice_direct_to_actor);
1274
1275static int direct_splice_actor(struct pipe_inode_info *pipe,
1276			       struct splice_desc *sd)
1277{
1278	struct file *file = sd->u.file;
1279
1280	return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
1281			      sd->flags);
1282}
1283
1284/**
1285 * do_splice_direct - splices data directly between two files
1286 * @in:		file to splice from
1287 * @ppos:	input file offset
1288 * @out:	file to splice to
1289 * @len:	number of bytes to splice
1290 * @flags:	splice modifier flags
1291 *
1292 * Description:
1293 *    For use by do_sendfile(). splice can easily emulate sendfile, but
1294 *    doing it in the application would incur an extra system call
1295 *    (splice in + splice out, as compared to just sendfile()). So this helper
1296 *    can splice directly through a process-private pipe.
1297 *
1298 */
1299long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1300		      size_t len, unsigned int flags)
1301{
1302	struct splice_desc sd = {
1303		.len		= len,
1304		.total_len	= len,
1305		.flags		= flags,
1306		.pos		= *ppos,
1307		.u.file		= out,
1308	};
1309	long ret;
1310
1311	ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
1312	if (ret > 0)
1313		*ppos = sd.pos;
1314
1315	return ret;
1316}
1317
1318static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
1319			       struct pipe_inode_info *opipe,
1320			       size_t len, unsigned int flags);
1321
1322/*
1323 * Determine where to splice to/from.
1324 */
1325static long do_splice(struct file *in, loff_t __user *off_in,
1326		      struct file *out, loff_t __user *off_out,
1327		      size_t len, unsigned int flags)
1328{
1329	struct pipe_inode_info *ipipe;
1330	struct pipe_inode_info *opipe;
1331	loff_t offset, *off;
1332	long ret;
1333
1334	ipipe = get_pipe_info(in);
1335	opipe = get_pipe_info(out);
1336
1337	if (ipipe && opipe) {
1338		if (off_in || off_out)
1339			return -ESPIPE;
1340
1341		if (!(in->f_mode & FMODE_READ))
1342			return -EBADF;
1343
1344		if (!(out->f_mode & FMODE_WRITE))
1345			return -EBADF;
1346
1347		/* Splicing to self would be fun, but... */
1348		if (ipipe == opipe)
1349			return -EINVAL;
1350
1351		return splice_pipe_to_pipe(ipipe, opipe, len, flags);
1352	}
1353
1354	if (ipipe) {
1355		if (off_in)
1356			return -ESPIPE;
1357		if (off_out) {
1358			if (!(out->f_mode & FMODE_PWRITE))
1359				return -EINVAL;
1360			if (copy_from_user(&offset, off_out, sizeof(loff_t)))
1361				return -EFAULT;
1362			off = &offset;
1363		} else
1364			off = &out->f_pos;
1365
1366		ret = do_splice_from(ipipe, out, off, len, flags);
1367
1368		if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
1369			ret = -EFAULT;
1370
1371		return ret;
1372	}
1373
1374	if (opipe) {
1375		if (off_out)
1376			return -ESPIPE;
1377		if (off_in) {
1378			if (!(in->f_mode & FMODE_PREAD))
1379				return -EINVAL;
1380			if (copy_from_user(&offset, off_in, sizeof(loff_t)))
1381				return -EFAULT;
1382			off = &offset;
1383		} else
1384			off = &in->f_pos;
1385
1386		ret = do_splice_to(in, off, opipe, len, flags);
1387
1388		if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
1389			ret = -EFAULT;
1390
1391		return ret;
1392	}
1393
1394	return -EINVAL;
1395}
1396
1397#if defined(CONFIG_BCM_RECVFILE)
1398/* Copy data directly from socket to file(pagecache) */
1399static ssize_t BCMFASTPATH_HOST do_splice_from_socket(struct file *file, struct socket *sock,
1400				     loff_t __user *off_out, size_t count)
1401{
1402	struct address_space *mapping = file->f_mapping;
1403	struct inode *inode = mapping->host;
1404	loff_t pos, start_pos;
1405	int count_tmp, copied_bytes;
1406	int err = 0;
1407	int idx;
1408	int cPagesAllocated = 0;
1409	struct recvfile_ctl_blk *rv_cb;
1410	struct kvec *iov;
1411	struct msghdr msg;
1412	long rcvtimeo;
1413	int ret;
1414
1415	if (count > MAX_PAGES_PER_RECVFILE * PAGE_SIZE) {
1416		printk(KERN_WARNING "%s: count(%u) exceeds maxinum\n", __func__, count);
1417		return -EINVAL;
1418	}
1419
1420	if (off_out) {
1421		if (copy_from_user(&start_pos, off_out, sizeof(loff_t)))
1422			return -EFAULT;
1423	} else {
1424		return -EINVAL;
1425	}
1426
1427	pos = start_pos;
1428
1429	rv_cb = kmalloc(MAX_PAGES_PER_RECVFILE * sizeof(struct recvfile_ctl_blk), GFP_KERNEL);
1430	if (!rv_cb) {
1431		printk(KERN_WARNING "%s:memory allocation for rcv_cb failed\n", __func__);
1432		return -ENOMEM;
1433	}
1434
1435	iov = kmalloc(MAX_PAGES_PER_RECVFILE * sizeof(struct kvec), GFP_KERNEL);
1436	if (!iov) {
1437		kfree(rv_cb);
1438		printk(KERN_WARNING "%s:memory allocation for iov failed\n", __func__);
1439		return -ENOMEM;
1440	}
1441
1442	mutex_lock(&inode->i_mutex);
1443
1444	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1445
1446	/* We can write back this queue in page reclaim */
1447	current->backing_dev_info = mapping->backing_dev_info;
1448
1449	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1450	if (err != 0 || count == 0)
1451		goto done;
1452
1453	file_remove_suid(file);
1454	file_update_time(file);
1455
1456	count_tmp = count;
1457	do {
1458		unsigned long bytes;	/* Bytes to write to page */
1459		unsigned long offset;	/* Offset into pagecache page */
1460		struct page *pageP;
1461		void *fsdata;
1462
1463		offset = (pos & (PAGE_CACHE_SIZE - 1));
1464		bytes = PAGE_CACHE_SIZE - offset;
1465		if (bytes > count_tmp)
1466			bytes = count_tmp;
1467
1468		ret = mapping->a_ops->write_begin(file, mapping, pos, bytes,
1469				AOP_FLAG_UNINTERRUPTIBLE,
1470				&pageP, &fsdata);
1471
1472		if (unlikely(ret)) {
1473			err = ret;
1474			for (idx = 0; idx < cPagesAllocated; idx++) {
1475				kunmap(rv_cb[idx].rv_page);
1476				ret = mapping->a_ops->write_end(file, mapping,
1477						rv_cb[idx].rv_pos,
1478						rv_cb[idx].rv_count,
1479						0,
1480						rv_cb[idx].rv_page,
1481						rv_cb[idx].rv_fsdata);
1482			}
1483			goto done;
1484		}
1485		rv_cb[cPagesAllocated].rv_page = pageP;
1486		rv_cb[cPagesAllocated].rv_pos = pos;
1487		rv_cb[cPagesAllocated].rv_count = bytes;
1488		rv_cb[cPagesAllocated].rv_fsdata = fsdata;
1489		iov[cPagesAllocated].iov_base = kmap(pageP) + offset;
1490		iov[cPagesAllocated].iov_len = bytes;
1491		cPagesAllocated++;
1492		count_tmp -= bytes;
1493		pos += bytes;
1494	} while (count_tmp);
1495
1496	/* IOV is ready, receive the data from socket now */
1497	msg.msg_name = NULL;
1498	msg.msg_namelen = 0;
1499	msg.msg_iov = (struct iovec *)&iov[0];
1500	msg.msg_iovlen = cPagesAllocated ;
1501	msg.msg_control = NULL;
1502	msg.msg_controllen = 0;
1503	msg.msg_flags = MSG_KERNSPACE;
1504	rcvtimeo = sock->sk->sk_rcvtimeo;
1505	sock->sk->sk_rcvtimeo = 8 * HZ;
1506
1507	ret = kernel_recvmsg(sock, &msg, &iov[0], cPagesAllocated, count,
1508			MSG_WAITALL | MSG_NOCATCHSIG);
1509
1510	sock->sk->sk_rcvtimeo = rcvtimeo;
1511
1512	if (unlikely(ret != count)) {
1513		if (ret < 0) {
1514			err = -EPIPE;
1515			count = 0;
1516		} else {
1517			/* We have read some data from socket */
1518			count = ret;
1519		}
1520	} else {
1521		err = 0;
1522	}
1523
1524	/* Adjust the pagecache pages len based on the amount of data copied
1525	 * truncate the pages which are not used
1526	 */
1527	count_tmp = count;
1528
1529	for (idx=0; idx < cPagesAllocated; idx++) {
1530		if (count_tmp) {
1531			copied_bytes = min(rv_cb[idx].rv_count, (unsigned int)count_tmp);
1532			count_tmp -= copied_bytes;
1533		} else {
1534			copied_bytes = 0;
1535		}
1536
1537		kunmap(rv_cb[idx].rv_page);
1538		ret = mapping->a_ops->write_end(file, mapping,
1539				rv_cb[idx].rv_pos,
1540				rv_cb[idx].rv_count,
1541				copied_bytes,
1542				rv_cb[idx].rv_page,
1543				rv_cb[idx].rv_fsdata);
1544
1545		if (unlikely(ret < 0)) {
1546			printk(KERN_WARNING"%s: write_end fail,ret = %d\n", __func__, ret);
1547		}
1548	}
1549
1550	if (count) {
1551		balance_dirty_pages_ratelimited_nr(mapping, cPagesAllocated);
1552	}
1553
1554	/* Fix pos based on returned bytes from recvmsg */
1555	pos = start_pos + count;
1556	if (off_out && copy_to_user(off_out, &pos, sizeof(loff_t)))
1557		ret = -EFAULT;
1558
1559done:
1560	current->backing_dev_info = NULL;
1561	mutex_unlock(&inode->i_mutex);
1562
1563	kfree(rv_cb);
1564	kfree(iov);
1565
1566	if (err)
1567		return err;
1568	else
1569		return count;
1570}
1571#endif /* CONFIG_BCM_RECVFILE */
1572
1573/*
1574 * Map an iov into an array of pages and offset/length tupples. With the
1575 * partial_page structure, we can map several non-contiguous ranges into
1576 * our ones pages[] map instead of splitting that operation into pieces.
1577 * Could easily be exported as a generic helper for other users, in which
1578 * case one would probably want to add a 'max_nr_pages' parameter as well.
1579 */
1580static int get_iovec_page_array(const struct iovec __user *iov,
1581				unsigned int nr_vecs, struct page **pages,
1582				struct partial_page *partial, int aligned,
1583				unsigned int pipe_buffers)
1584{
1585	int buffers = 0, error = 0;
1586
1587	while (nr_vecs) {
1588		unsigned long off, npages;
1589		struct iovec entry;
1590		void __user *base;
1591		size_t len;
1592		int i;
1593
1594		error = -EFAULT;
1595		if (copy_from_user(&entry, iov, sizeof(entry)))
1596			break;
1597
1598		base = entry.iov_base;
1599		len = entry.iov_len;
1600
1601		/*
1602		 * Sanity check this iovec. 0 read succeeds.
1603		 */
1604		error = 0;
1605		if (unlikely(!len))
1606			break;
1607		error = -EFAULT;
1608		if (!access_ok(VERIFY_READ, base, len))
1609			break;
1610
1611		/*
1612		 * Get this base offset and number of pages, then map
1613		 * in the user pages.
1614		 */
1615		off = (unsigned long) base & ~PAGE_MASK;
1616
1617		/*
1618		 * If asked for alignment, the offset must be zero and the
1619		 * length a multiple of the PAGE_SIZE.
1620		 */
1621		error = -EINVAL;
1622		if (aligned && (off || len & ~PAGE_MASK))
1623			break;
1624
1625		npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1626		if (npages > pipe_buffers - buffers)
1627			npages = pipe_buffers - buffers;
1628
1629		error = get_user_pages_fast((unsigned long)base, npages,
1630					0, &pages[buffers]);
1631
1632		if (unlikely(error <= 0))
1633			break;
1634
1635		/*
1636		 * Fill this contiguous range into the partial page map.
1637		 */
1638		for (i = 0; i < error; i++) {
1639			const int plen = min_t(size_t, len, PAGE_SIZE - off);
1640
1641			partial[buffers].offset = off;
1642			partial[buffers].len = plen;
1643
1644			off = 0;
1645			len -= plen;
1646			buffers++;
1647		}
1648
1649		/*
1650		 * We didn't complete this iov, stop here since it probably
1651		 * means we have to move some of this into a pipe to
1652		 * be able to continue.
1653		 */
1654		if (len)
1655			break;
1656
1657		/*
1658		 * Don't continue if we mapped fewer pages than we asked for,
1659		 * or if we mapped the max number of pages that we have
1660		 * room for.
1661		 */
1662		if (error < npages || buffers == pipe_buffers)
1663			break;
1664
1665		nr_vecs--;
1666		iov++;
1667	}
1668
1669	if (buffers)
1670		return buffers;
1671
1672	return error;
1673}
1674
1675static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1676			struct splice_desc *sd)
1677{
1678	char *src;
1679	int ret;
1680
1681	ret = buf->ops->confirm(pipe, buf);
1682	if (unlikely(ret))
1683		return ret;
1684
1685	/*
1686	 * See if we can use the atomic maps, by prefaulting in the
1687	 * pages and doing an atomic copy
1688	 */
1689	if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
1690		src = buf->ops->map(pipe, buf, 1);
1691		ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
1692							sd->len);
1693		buf->ops->unmap(pipe, buf, src);
1694		if (!ret) {
1695			ret = sd->len;
1696			goto out;
1697		}
1698	}
1699
1700	/*
1701	 * No dice, use slow non-atomic map and copy
1702 	 */
1703	src = buf->ops->map(pipe, buf, 0);
1704
1705	ret = sd->len;
1706	if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
1707		ret = -EFAULT;
1708
1709	buf->ops->unmap(pipe, buf, src);
1710out:
1711	if (ret > 0)
1712		sd->u.userptr += ret;
1713	return ret;
1714}
1715
1716/*
1717 * For lack of a better implementation, implement vmsplice() to userspace
1718 * as a simple copy of the pipes pages to the user iov.
1719 */
1720static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
1721			     unsigned long nr_segs, unsigned int flags)
1722{
1723	struct pipe_inode_info *pipe;
1724	struct splice_desc sd;
1725	ssize_t size;
1726	int error;
1727	long ret;
1728
1729	pipe = get_pipe_info(file);
1730	if (!pipe)
1731		return -EBADF;
1732
1733	pipe_lock(pipe);
1734
1735	error = ret = 0;
1736	while (nr_segs) {
1737		void __user *base;
1738		size_t len;
1739
1740		/*
1741		 * Get user address base and length for this iovec.
1742		 */
1743		error = get_user(base, &iov->iov_base);
1744		if (unlikely(error))
1745			break;
1746		error = get_user(len, &iov->iov_len);
1747		if (unlikely(error))
1748			break;
1749
1750		/*
1751		 * Sanity check this iovec. 0 read succeeds.
1752		 */
1753		if (unlikely(!len))
1754			break;
1755		if (unlikely(!base)) {
1756			error = -EFAULT;
1757			break;
1758		}
1759
1760		if (unlikely(!access_ok(VERIFY_WRITE, base, len))) {
1761			error = -EFAULT;
1762			break;
1763		}
1764
1765		sd.len = 0;
1766		sd.total_len = len;
1767		sd.flags = flags;
1768		sd.u.userptr = base;
1769		sd.pos = 0;
1770
1771		size = __splice_from_pipe(pipe, &sd, pipe_to_user);
1772		if (size < 0) {
1773			if (!ret)
1774				ret = size;
1775
1776			break;
1777		}
1778
1779		ret += size;
1780
1781		if (size < len)
1782			break;
1783
1784		nr_segs--;
1785		iov++;
1786	}
1787
1788	pipe_unlock(pipe);
1789
1790	if (!ret)
1791		ret = error;
1792
1793	return ret;
1794}
1795
1796/*
1797 * vmsplice splices a user address range into a pipe. It can be thought of
1798 * as splice-from-memory, where the regular splice is splice-from-file (or
1799 * to file). In both cases the output is a pipe, naturally.
1800 */
1801static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1802			     unsigned long nr_segs, unsigned int flags)
1803{
1804	struct pipe_inode_info *pipe;
1805	struct page *pages[PIPE_DEF_BUFFERS];
1806	struct partial_page partial[PIPE_DEF_BUFFERS];
1807	struct splice_pipe_desc spd = {
1808		.pages = pages,
1809		.partial = partial,
1810		.flags = flags,
1811		.ops = &user_page_pipe_buf_ops,
1812		.spd_release = spd_release_page,
1813	};
1814	long ret;
1815
1816	pipe = get_pipe_info(file);
1817	if (!pipe)
1818		return -EBADF;
1819
1820	if (splice_grow_spd(pipe, &spd))
1821		return -ENOMEM;
1822
1823	spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
1824					    spd.partial, flags & SPLICE_F_GIFT,
1825					    pipe->buffers);
1826	if (spd.nr_pages <= 0)
1827		ret = spd.nr_pages;
1828	else
1829		ret = splice_to_pipe(pipe, &spd);
1830
1831	splice_shrink_spd(pipe, &spd);
1832	return ret;
1833}
1834
1835/*
1836 * Note that vmsplice only really supports true splicing _from_ user memory
1837 * to a pipe, not the other way around. Splicing from user memory is a simple
1838 * operation that can be supported without any funky alignment restrictions
1839 * or nasty vm tricks. We simply map in the user memory and fill them into
1840 * a pipe. The reverse isn't quite as easy, though. There are two possible
1841 * solutions for that:
1842 *
1843 *	- memcpy() the data internally, at which point we might as well just
1844 *	  do a regular read() on the buffer anyway.
1845 *	- Lots of nasty vm tricks, that are neither fast nor flexible (it
1846 *	  has restriction limitations on both ends of the pipe).
1847 *
1848 * Currently we punt and implement it as a normal copy, see pipe_to_user().
1849 *
1850 */
1851SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
1852		unsigned long, nr_segs, unsigned int, flags)
1853{
1854	struct file *file;
1855	long error;
1856	int fput;
1857
1858	if (unlikely(nr_segs > UIO_MAXIOV))
1859		return -EINVAL;
1860	else if (unlikely(!nr_segs))
1861		return 0;
1862
1863	error = -EBADF;
1864	file = fget_light(fd, &fput);
1865	if (file) {
1866		if (file->f_mode & FMODE_WRITE)
1867			error = vmsplice_to_pipe(file, iov, nr_segs, flags);
1868		else if (file->f_mode & FMODE_READ)
1869			error = vmsplice_to_user(file, iov, nr_segs, flags);
1870
1871		fput_light(file, fput);
1872	}
1873
1874	return error;
1875}
1876
1877SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
1878		int, fd_out, loff_t __user *, off_out,
1879		size_t, len, unsigned int, flags)
1880{
1881	long error;
1882	struct file *in, *out;
1883	int fput_in, fput_out;
1884
1885	if (unlikely(!len))
1886		return 0;
1887
1888	error = -EBADF;
1889
1890#if defined(CONFIG_BCM_RECVFILE)
1891	/* If input is socket and output is file try to copy from socket to file directly */
1892	{
1893		struct socket *sock = NULL;
1894
1895		/* Check if fd_in is a socket */
1896		sock = sockfd_lookup(fd_in, (int *)&error);
1897		if (sock) {
1898			out = NULL;
1899			if (!sock->sk)
1900				goto done;
1901
1902			out = fget_light(fd_out, &fput_out);
1903
1904			if (out) {
1905				struct pipe_inode_info *opipe;
1906
1907				opipe = get_pipe_info(out);
1908				if (opipe) {
1909					/* Output is pipe go regular processing */
1910					printk(KERN_WARNING "out_fd is a pipe\n");
1911					goto regular_proc;
1912				}
1913
1914				if (!(out->f_mode & FMODE_WRITE))
1915					goto done;
1916
1917				if ((out->f_op && out->f_op->splice_write)) {
1918					ssize_t (*splice_from_socket)(struct file *, struct socket *, loff_t __user *, size_t);
1919
1920					splice_from_socket = out->f_op->splice_write_from_socket;
1921					if(!splice_from_socket)
1922						splice_from_socket = do_splice_from_socket;
1923					error = splice_from_socket(out, sock, off_out, len);
1924				} else {
1925					/* Splice from socket->file not supported */
1926					error = -EBADF;
1927				}
1928			}
1929done:
1930			if (out)
1931				fput_light(out, fput_out);
1932			fput(sock->file);
1933			return error;
1934
1935regular_proc:
1936			if (out)
1937				fput_light(out, fput_out);
1938			fput(sock->file);
1939		}
1940	}
1941#endif /* CONFIG_BCM_RECVFILE */
1942
1943	in = fget_light(fd_in, &fput_in);
1944	if (in) {
1945		if (in->f_mode & FMODE_READ) {
1946			out = fget_light(fd_out, &fput_out);
1947			if (out) {
1948				if (out->f_mode & FMODE_WRITE)
1949					error = do_splice(in, off_in,
1950							  out, off_out,
1951							  len, flags);
1952				fput_light(out, fput_out);
1953			}
1954		}
1955
1956		fput_light(in, fput_in);
1957	}
1958
1959	return error;
1960}
1961
1962/*
1963 * Make sure there's data to read. Wait for input if we can, otherwise
1964 * return an appropriate error.
1965 */
1966static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1967{
1968	int ret;
1969
1970	/*
1971	 * Check ->nrbufs without the inode lock first. This function
1972	 * is speculative anyways, so missing one is ok.
1973	 */
1974	if (pipe->nrbufs)
1975		return 0;
1976
1977	ret = 0;
1978	pipe_lock(pipe);
1979
1980	while (!pipe->nrbufs) {
1981		if (signal_pending(current)) {
1982			ret = -ERESTARTSYS;
1983			break;
1984		}
1985		if (!pipe->writers)
1986			break;
1987		if (!pipe->waiting_writers) {
1988			if (flags & SPLICE_F_NONBLOCK) {
1989				ret = -EAGAIN;
1990				break;
1991			}
1992		}
1993		pipe_wait(pipe);
1994	}
1995
1996	pipe_unlock(pipe);
1997	return ret;
1998}
1999
2000/*
2001 * Make sure there's writeable room. Wait for room if we can, otherwise
2002 * return an appropriate error.
2003 */
2004static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
2005{
2006	int ret;
2007
2008	/*
2009	 * Check ->nrbufs without the inode lock first. This function
2010	 * is speculative anyways, so missing one is ok.
2011	 */
2012	if (pipe->nrbufs < pipe->buffers)
2013		return 0;
2014
2015	ret = 0;
2016	pipe_lock(pipe);
2017
2018	while (pipe->nrbufs >= pipe->buffers) {
2019		if (!pipe->readers) {
2020			send_sig(SIGPIPE, current, 0);
2021			ret = -EPIPE;
2022			break;
2023		}
2024		if (flags & SPLICE_F_NONBLOCK) {
2025			ret = -EAGAIN;
2026			break;
2027		}
2028		if (signal_pending(current)) {
2029			ret = -ERESTARTSYS;
2030			break;
2031		}
2032		pipe->waiting_writers++;
2033		pipe_wait(pipe);
2034		pipe->waiting_writers--;
2035	}
2036
2037	pipe_unlock(pipe);
2038	return ret;
2039}
2040
2041/*
2042 * Splice contents of ipipe to opipe.
2043 */
2044static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
2045			       struct pipe_inode_info *opipe,
2046			       size_t len, unsigned int flags)
2047{
2048	struct pipe_buffer *ibuf, *obuf;
2049	int ret = 0, nbuf;
2050	bool input_wakeup = false;
2051
2052
2053retry:
2054	ret = ipipe_prep(ipipe, flags);
2055	if (ret)
2056		return ret;
2057
2058	ret = opipe_prep(opipe, flags);
2059	if (ret)
2060		return ret;
2061
2062	pipe_double_lock(ipipe, opipe);
2063
2064	do {
2065		if (!opipe->readers) {
2066			send_sig(SIGPIPE, current, 0);
2067			if (!ret)
2068				ret = -EPIPE;
2069			break;
2070		}
2071
2072		if (!ipipe->nrbufs && !ipipe->writers)
2073			break;
2074
2075		/*
2076		 * Cannot make any progress, because either the input
2077		 * pipe is empty or the output pipe is full.
2078		 */
2079		if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) {
2080			/* Already processed some buffers, break */
2081			if (ret)
2082				break;
2083
2084			if (flags & SPLICE_F_NONBLOCK) {
2085				ret = -EAGAIN;
2086				break;
2087			}
2088
2089			/*
2090			 * We raced with another reader/writer and haven't
2091			 * managed to process any buffers.  A zero return
2092			 * value means EOF, so retry instead.
2093			 */
2094			pipe_unlock(ipipe);
2095			pipe_unlock(opipe);
2096			goto retry;
2097		}
2098
2099		ibuf = ipipe->bufs + ipipe->curbuf;
2100		nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1);
2101		obuf = opipe->bufs + nbuf;
2102
2103		if (len >= ibuf->len) {
2104			/*
2105			 * Simply move the whole buffer from ipipe to opipe
2106			 */
2107			*obuf = *ibuf;
2108			ibuf->ops = NULL;
2109			opipe->nrbufs++;
2110			ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1);
2111			ipipe->nrbufs--;
2112			input_wakeup = true;
2113		} else {
2114			/*
2115			 * Get a reference to this pipe buffer,
2116			 * so we can copy the contents over.
2117			 */
2118			ibuf->ops->get(ipipe, ibuf);
2119			*obuf = *ibuf;
2120
2121			/*
2122			 * Don't inherit the gift flag, we need to
2123			 * prevent multiple steals of this page.
2124			 */
2125			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
2126
2127			obuf->len = len;
2128			opipe->nrbufs++;
2129			ibuf->offset += obuf->len;
2130			ibuf->len -= obuf->len;
2131		}
2132		ret += obuf->len;
2133		len -= obuf->len;
2134	} while (len);
2135
2136	pipe_unlock(ipipe);
2137	pipe_unlock(opipe);
2138
2139	/*
2140	 * If we put data in the output pipe, wakeup any potential readers.
2141	 */
2142	if (ret > 0) {
2143		smp_mb();
2144		if (waitqueue_active(&opipe->wait))
2145			wake_up_interruptible(&opipe->wait);
2146		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
2147	}
2148	if (input_wakeup)
2149		wakeup_pipe_writers(ipipe);
2150
2151	return ret;
2152}
2153
2154/*
2155 * Link contents of ipipe to opipe.
2156 */
2157static int link_pipe(struct pipe_inode_info *ipipe,
2158		     struct pipe_inode_info *opipe,
2159		     size_t len, unsigned int flags)
2160{
2161	struct pipe_buffer *ibuf, *obuf;
2162	int ret = 0, i = 0, nbuf;
2163
2164	pipe_double_lock(ipipe, opipe);
2165
2166	do {
2167		if (!opipe->readers) {
2168			send_sig(SIGPIPE, current, 0);
2169			if (!ret)
2170				ret = -EPIPE;
2171			break;
2172		}
2173
2174		/*
2175		 * If we have iterated all input buffers or ran out of
2176		 * output room, break.
2177		 */
2178		if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers)
2179			break;
2180
2181		ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1));
2182		nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1);
2183
2184		/*
2185		 * Get a reference to this pipe buffer,
2186		 * so we can copy the contents over.
2187		 */
2188		ibuf->ops->get(ipipe, ibuf);
2189
2190		obuf = opipe->bufs + nbuf;
2191		*obuf = *ibuf;
2192
2193		/*
2194		 * Don't inherit the gift flag, we need to
2195		 * prevent multiple steals of this page.
2196		 */
2197		obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
2198
2199		if (obuf->len > len)
2200			obuf->len = len;
2201
2202		opipe->nrbufs++;
2203		ret += obuf->len;
2204		len -= obuf->len;
2205		i++;
2206	} while (len);
2207
2208	/*
2209	 * return EAGAIN if we have the potential of some data in the
2210	 * future, otherwise just return 0
2211	 */
2212	if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
2213		ret = -EAGAIN;
2214
2215	pipe_unlock(ipipe);
2216	pipe_unlock(opipe);
2217
2218	/*
2219	 * If we put data in the output pipe, wakeup any potential readers.
2220	 */
2221	if (ret > 0) {
2222		smp_mb();
2223		if (waitqueue_active(&opipe->wait))
2224			wake_up_interruptible(&opipe->wait);
2225		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
2226	}
2227
2228	return ret;
2229}
2230
2231/*
2232 * This is a tee(1) implementation that works on pipes. It doesn't copy
2233 * any data, it simply references the 'in' pages on the 'out' pipe.
2234 * The 'flags' used are the SPLICE_F_* variants, currently the only
2235 * applicable one is SPLICE_F_NONBLOCK.
2236 */
2237static long do_tee(struct file *in, struct file *out, size_t len,
2238		   unsigned int flags)
2239{
2240	struct pipe_inode_info *ipipe = get_pipe_info(in);
2241	struct pipe_inode_info *opipe = get_pipe_info(out);
2242	int ret = -EINVAL;
2243
2244	/*
2245	 * Duplicate the contents of ipipe to opipe without actually
2246	 * copying the data.
2247	 */
2248	if (ipipe && opipe && ipipe != opipe) {
2249		/*
2250		 * Keep going, unless we encounter an error. The ipipe/opipe
2251		 * ordering doesn't really matter.
2252		 */
2253		ret = ipipe_prep(ipipe, flags);
2254		if (!ret) {
2255			ret = opipe_prep(opipe, flags);
2256			if (!ret)
2257				ret = link_pipe(ipipe, opipe, len, flags);
2258		}
2259	}
2260
2261	return ret;
2262}
2263
2264SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
2265{
2266	struct file *in;
2267	int error, fput_in;
2268
2269	if (unlikely(!len))
2270		return 0;
2271
2272	error = -EBADF;
2273	in = fget_light(fdin, &fput_in);
2274	if (in) {
2275		if (in->f_mode & FMODE_READ) {
2276			int fput_out;
2277			struct file *out = fget_light(fdout, &fput_out);
2278
2279			if (out) {
2280				if (out->f_mode & FMODE_WRITE)
2281					error = do_tee(in, out, len, flags);
2282				fput_light(out, fput_out);
2283			}
2284		}
2285 		fput_light(in, fput_in);
2286 	}
2287
2288	return error;
2289}
2290