1/*
2 *  linux/fs/read_write.c
3 *
4 *  Copyright (C) 1991, 1992  Linus Torvalds
5 */
6
7#include <linux/slab.h>
8#include <linux/stat.h>
9#include <linux/fcntl.h>
10#include <linux/file.h>
11#include <linux/uio.h>
12#include <linux/smp_lock.h>
13#include <linux/fsnotify.h>
14#include <linux/security.h>
15#include <linux/module.h>
16#include <linux/syscalls.h>
17#include <linux/pagemap.h>
18#include <linux/splice.h>
19#include "read_write.h"
20
21#include <asm/uaccess.h>
22#include <asm/unistd.h>
23
24const struct file_operations generic_ro_fops = {
25	.llseek		= generic_file_llseek,
26	.read		= do_sync_read,
27	.aio_read	= generic_file_aio_read,
28	.mmap		= generic_file_readonly_mmap,
29	.splice_read	= generic_file_splice_read,
30};
31
32EXPORT_SYMBOL(generic_ro_fops);
33
34/**
35 * generic_file_llseek_unlocked - lockless generic llseek implementation
36 * @file:	file structure to seek on
37 * @offset:	file offset to seek to
38 * @origin:	type of seek
39 *
40 * Updates the file offset to the value specified by @offset and @origin.
41 * Locking must be provided by the caller.
42 */
43loff_t
44generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
45{
46	struct inode *inode = file->f_mapping->host;
47
48	switch (origin) {
49	case SEEK_END:
50		offset += inode->i_size;
51		break;
52	case SEEK_CUR:
53		/*
54		 * Here we special-case the lseek(fd, 0, SEEK_CUR)
55		 * position-querying operation.  Avoid rewriting the "same"
56		 * f_pos value back to the file because a concurrent read(),
57		 * write() or lseek() might have altered it
58		 */
59		if (offset == 0)
60			return file->f_pos;
61		offset += file->f_pos;
62		break;
63	}
64
65	if (offset < 0 || offset > inode->i_sb->s_maxbytes)
66		return -EINVAL;
67
68	/* Special lock needed here? */
69	if (offset != file->f_pos) {
70		file->f_pos = offset;
71		file->f_version = 0;
72	}
73
74	return offset;
75}
76EXPORT_SYMBOL(generic_file_llseek_unlocked);
77
78/**
79 * generic_file_llseek - generic llseek implementation for regular files
80 * @file:	file structure to seek on
81 * @offset:	file offset to seek to
82 * @origin:	type of seek
83 *
84 * This is a generic implemenation of ->llseek useable for all normal local
85 * filesystems.  It just updates the file offset to the value specified by
86 * @offset and @origin under i_mutex.
87 */
88loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
89{
90	loff_t rval;
91
92	mutex_lock(&file->f_dentry->d_inode->i_mutex);
93	rval = generic_file_llseek_unlocked(file, offset, origin);
94	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
95
96	return rval;
97}
98EXPORT_SYMBOL(generic_file_llseek);
99
100/**
101 * noop_llseek - No Operation Performed llseek implementation
102 * @file:	file structure to seek on
103 * @offset:	file offset to seek to
104 * @origin:	type of seek
105 *
106 * This is an implementation of ->llseek useable for the rare special case when
107 * userspace expects the seek to succeed but the (device) file is actually not
108 * able to perform the seek. In this case you use noop_llseek() instead of
109 * falling back to the default implementation of ->llseek.
110 */
111loff_t noop_llseek(struct file *file, loff_t offset, int origin)
112{
113	return file->f_pos;
114}
115EXPORT_SYMBOL(noop_llseek);
116
117loff_t no_llseek(struct file *file, loff_t offset, int origin)
118{
119	return -ESPIPE;
120}
121EXPORT_SYMBOL(no_llseek);
122
123loff_t default_llseek(struct file *file, loff_t offset, int origin)
124{
125	loff_t retval;
126
127	lock_kernel();
128	switch (origin) {
129		case SEEK_END:
130			offset += i_size_read(file->f_path.dentry->d_inode);
131			break;
132		case SEEK_CUR:
133			if (offset == 0) {
134				retval = file->f_pos;
135				goto out;
136			}
137			offset += file->f_pos;
138	}
139	retval = -EINVAL;
140	if (offset >= 0) {
141		if (offset != file->f_pos) {
142			file->f_pos = offset;
143			file->f_version = 0;
144		}
145		retval = offset;
146	}
147out:
148	unlock_kernel();
149	return retval;
150}
151EXPORT_SYMBOL(default_llseek);
152
153loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
154{
155	loff_t (*fn)(struct file *, loff_t, int);
156
157	fn = no_llseek;
158	if (file->f_mode & FMODE_LSEEK) {
159		fn = default_llseek;
160		if (file->f_op && file->f_op->llseek)
161			fn = file->f_op->llseek;
162	}
163	return fn(file, offset, origin);
164}
165EXPORT_SYMBOL(vfs_llseek);
166
167SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
168{
169	off_t retval;
170	struct file * file;
171	int fput_needed;
172
173	retval = -EBADF;
174	file = fget_light(fd, &fput_needed);
175	if (!file)
176		goto bad;
177
178	retval = -EINVAL;
179	if (origin <= SEEK_MAX) {
180		loff_t res = vfs_llseek(file, offset, origin);
181		retval = res;
182		if (res != (loff_t)retval)
183			retval = -EOVERFLOW;	/* LFS: should only happen on 32 bit platforms */
184	}
185	fput_light(file, fput_needed);
186bad:
187	return retval;
188}
189
190#ifdef __ARCH_WANT_SYS_LLSEEK
191SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
192		unsigned long, offset_low, loff_t __user *, result,
193		unsigned int, origin)
194{
195	int retval;
196	struct file * file;
197	loff_t offset;
198	int fput_needed;
199
200	retval = -EBADF;
201	file = fget_light(fd, &fput_needed);
202	if (!file)
203		goto bad;
204
205	retval = -EINVAL;
206	if (origin > SEEK_MAX)
207		goto out_putf;
208
209	offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
210			origin);
211
212	retval = (int)offset;
213	if (offset >= 0) {
214		retval = -EFAULT;
215		if (!copy_to_user(result, &offset, sizeof(offset)))
216			retval = 0;
217	}
218out_putf:
219	fput_light(file, fput_needed);
220bad:
221	return retval;
222}
223#endif
224
225/*
226 * rw_verify_area doesn't like huge counts. We limit
227 * them to something that fits in "int" so that others
228 * won't have to do range checks all the time.
229 */
230#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
231
232int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
233{
234	struct inode *inode;
235	loff_t pos;
236	int retval = -EINVAL;
237
238	inode = file->f_path.dentry->d_inode;
239	if (unlikely((ssize_t) count < 0))
240		return retval;
241	pos = *ppos;
242	if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
243		return retval;
244
245	if (unlikely(inode->i_flock && mandatory_lock(inode))) {
246		retval = locks_mandatory_area(
247			read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
248			inode, file, pos, count);
249		if (retval < 0)
250			return retval;
251	}
252	retval = security_file_permission(file,
253				read_write == READ ? MAY_READ : MAY_WRITE);
254	if (retval)
255		return retval;
256	return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
257}
258
259static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
260{
261	set_current_state(TASK_UNINTERRUPTIBLE);
262	if (!kiocbIsKicked(iocb))
263		schedule();
264	else
265		kiocbClearKicked(iocb);
266	__set_current_state(TASK_RUNNING);
267}
268
269ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
270{
271	struct iovec iov = { .iov_base = buf, .iov_len = len };
272	struct kiocb kiocb;
273	ssize_t ret;
274
275	init_sync_kiocb(&kiocb, filp);
276	kiocb.ki_pos = *ppos;
277	kiocb.ki_left = len;
278	kiocb.ki_nbytes = len;
279
280	for (;;) {
281		ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
282		if (ret != -EIOCBRETRY)
283			break;
284		wait_on_retry_sync_kiocb(&kiocb);
285	}
286
287	if (-EIOCBQUEUED == ret)
288		ret = wait_on_sync_kiocb(&kiocb);
289	*ppos = kiocb.ki_pos;
290	return ret;
291}
292
293EXPORT_SYMBOL(do_sync_read);
294
295ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
296{
297	ssize_t ret;
298
299	if (!(file->f_mode & FMODE_READ))
300		return -EBADF;
301	if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
302		return -EINVAL;
303	if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
304		return -EFAULT;
305
306	ret = rw_verify_area(READ, file, pos, count);
307	if (ret >= 0) {
308		count = ret;
309		if (file->f_op->read)
310			ret = file->f_op->read(file, buf, count, pos);
311		else
312			ret = do_sync_read(file, buf, count, pos);
313		if (ret > 0) {
314			fsnotify_access(file);
315			add_rchar(current, ret);
316		}
317		inc_syscr(current);
318	}
319
320	return ret;
321}
322
323EXPORT_SYMBOL(vfs_read);
324
325ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
326{
327	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
328	struct kiocb kiocb;
329	ssize_t ret;
330
331	init_sync_kiocb(&kiocb, filp);
332	kiocb.ki_pos = *ppos;
333	kiocb.ki_left = len;
334	kiocb.ki_nbytes = len;
335
336	for (;;) {
337		ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
338		if (ret != -EIOCBRETRY)
339			break;
340		wait_on_retry_sync_kiocb(&kiocb);
341	}
342
343	if (-EIOCBQUEUED == ret)
344		ret = wait_on_sync_kiocb(&kiocb);
345	*ppos = kiocb.ki_pos;
346	return ret;
347}
348
349EXPORT_SYMBOL(do_sync_write);
350
351ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
352{
353	ssize_t ret;
354
355	if (!(file->f_mode & FMODE_WRITE))
356		return -EBADF;
357	if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
358		return -EINVAL;
359	if (unlikely(!access_ok(VERIFY_READ, buf, count)))
360		return -EFAULT;
361
362	ret = rw_verify_area(WRITE, file, pos, count);
363	if (ret >= 0) {
364		count = ret;
365		if (file->f_op->write)
366			ret = file->f_op->write(file, buf, count, pos);
367		else
368			ret = do_sync_write(file, buf, count, pos);
369		if (ret > 0) {
370			fsnotify_modify(file);
371			add_wchar(current, ret);
372		}
373		inc_syscw(current);
374	}
375
376	return ret;
377}
378
379EXPORT_SYMBOL(vfs_write);
380
381static inline loff_t file_pos_read(struct file *file)
382{
383	return file->f_pos;
384}
385
386static inline void file_pos_write(struct file *file, loff_t pos)
387{
388	file->f_pos = pos;
389}
390
391SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
392{
393	struct file *file;
394	ssize_t ret = -EBADF;
395	int fput_needed;
396
397	file = fget_light(fd, &fput_needed);
398	if (file) {
399		loff_t pos = file_pos_read(file);
400		ret = vfs_read(file, buf, count, &pos);
401		file_pos_write(file, pos);
402		fput_light(file, fput_needed);
403	}
404
405	return ret;
406}
407
408SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
409		size_t, count)
410{
411	struct file *file;
412	ssize_t ret = -EBADF;
413	int fput_needed;
414
415	file = fget_light(fd, &fput_needed);
416	if (file) {
417		loff_t pos = file_pos_read(file);
418		ret = vfs_write(file, buf, count, &pos);
419		file_pos_write(file, pos);
420		fput_light(file, fput_needed);
421	}
422
423	return ret;
424}
425
426SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
427			size_t count, loff_t pos)
428{
429	struct file *file;
430	ssize_t ret = -EBADF;
431	int fput_needed;
432
433	if (pos < 0)
434		return -EINVAL;
435
436	file = fget_light(fd, &fput_needed);
437	if (file) {
438		ret = -ESPIPE;
439		if (file->f_mode & FMODE_PREAD)
440			ret = vfs_read(file, buf, count, &pos);
441		fput_light(file, fput_needed);
442	}
443
444	return ret;
445}
446#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
447asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos)
448{
449	return SYSC_pread64((unsigned int) fd, (char __user *) buf,
450			    (size_t) count, pos);
451}
452SYSCALL_ALIAS(sys_pread64, SyS_pread64);
453#endif
454
455SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
456			 size_t count, loff_t pos)
457{
458	struct file *file;
459	ssize_t ret = -EBADF;
460	int fput_needed;
461
462	if (pos < 0)
463		return -EINVAL;
464
465	file = fget_light(fd, &fput_needed);
466	if (file) {
467		ret = -ESPIPE;
468		if (file->f_mode & FMODE_PWRITE)
469			ret = vfs_write(file, buf, count, &pos);
470		fput_light(file, fput_needed);
471	}
472
473	return ret;
474}
475#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
476asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos)
477{
478	return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf,
479			     (size_t) count, pos);
480}
481SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64);
482#endif
483
484/*
485 * Reduce an iovec's length in-place.  Return the resulting number of segments
486 */
487unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
488{
489	unsigned long seg = 0;
490	size_t len = 0;
491
492	while (seg < nr_segs) {
493		seg++;
494		if (len + iov->iov_len >= to) {
495			iov->iov_len = to - len;
496			break;
497		}
498		len += iov->iov_len;
499		iov++;
500	}
501	return seg;
502}
503EXPORT_SYMBOL(iov_shorten);
504
505ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
506		unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
507{
508	struct kiocb kiocb;
509	ssize_t ret;
510
511	init_sync_kiocb(&kiocb, filp);
512	kiocb.ki_pos = *ppos;
513	kiocb.ki_left = len;
514	kiocb.ki_nbytes = len;
515
516	for (;;) {
517		ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
518		if (ret != -EIOCBRETRY)
519			break;
520		wait_on_retry_sync_kiocb(&kiocb);
521	}
522
523	if (ret == -EIOCBQUEUED)
524		ret = wait_on_sync_kiocb(&kiocb);
525	*ppos = kiocb.ki_pos;
526	return ret;
527}
528
529/* Do it by hand, with file-ops */
530ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
531		unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
532{
533	struct iovec *vector = iov;
534	ssize_t ret = 0;
535
536	while (nr_segs > 0) {
537		void __user *base;
538		size_t len;
539		ssize_t nr;
540
541		base = vector->iov_base;
542		len = vector->iov_len;
543		vector++;
544		nr_segs--;
545
546		nr = fn(filp, base, len, ppos);
547
548		if (nr < 0) {
549			if (!ret)
550				ret = nr;
551			break;
552		}
553		ret += nr;
554		if (nr != len)
555			break;
556	}
557
558	return ret;
559}
560
561/* A write operation does a read from user space and vice versa */
562#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
563
564ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
565			      unsigned long nr_segs, unsigned long fast_segs,
566			      struct iovec *fast_pointer,
567			      struct iovec **ret_pointer)
568  {
569	unsigned long seg;
570  	ssize_t ret;
571	struct iovec *iov = fast_pointer;
572
573  	/*
574  	 * SuS says "The readv() function *may* fail if the iovcnt argument
575  	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
576  	 * traditionally returned zero for zero segments, so...
577  	 */
578	if (nr_segs == 0) {
579		ret = 0;
580  		goto out;
581	}
582
583  	/*
584  	 * First get the "struct iovec" from user memory and
585  	 * verify all the pointers
586  	 */
587	if (nr_segs > UIO_MAXIOV) {
588		ret = -EINVAL;
589  		goto out;
590	}
591	if (nr_segs > fast_segs) {
592  		iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
593		if (iov == NULL) {
594			ret = -ENOMEM;
595  			goto out;
596		}
597  	}
598	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
599		ret = -EFAULT;
600  		goto out;
601	}
602
603  	/*
604	 * According to the Single Unix Specification we should return EINVAL
605	 * if an element length is < 0 when cast to ssize_t or if the
606	 * total length would overflow the ssize_t return value of the
607	 * system call.
608  	 */
609	ret = 0;
610  	for (seg = 0; seg < nr_segs; seg++) {
611  		void __user *buf = iov[seg].iov_base;
612  		ssize_t len = (ssize_t)iov[seg].iov_len;
613
614		/* see if we we're about to use an invalid len or if
615		 * it's about to overflow ssize_t */
616		if (len < 0 || (ret + len < ret)) {
617			ret = -EINVAL;
618  			goto out;
619		}
620		if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
621			ret = -EFAULT;
622  			goto out;
623		}
624
625		ret += len;
626  	}
627out:
628	*ret_pointer = iov;
629	return ret;
630}
631
632static ssize_t do_readv_writev(int type, struct file *file,
633			       const struct iovec __user * uvector,
634			       unsigned long nr_segs, loff_t *pos)
635{
636	size_t tot_len;
637	struct iovec iovstack[UIO_FASTIOV];
638	struct iovec *iov = iovstack;
639	ssize_t ret;
640	io_fn_t fn;
641	iov_fn_t fnv;
642
643	if (!file->f_op) {
644		ret = -EINVAL;
645		goto out;
646	}
647
648	ret = rw_copy_check_uvector(type, uvector, nr_segs,
649			ARRAY_SIZE(iovstack), iovstack, &iov);
650	if (ret <= 0)
651		goto out;
652
653	tot_len = ret;
654	ret = rw_verify_area(type, file, pos, tot_len);
655	if (ret < 0)
656		goto out;
657
658	fnv = NULL;
659	if (type == READ) {
660		fn = file->f_op->read;
661		fnv = file->f_op->aio_read;
662	} else {
663		fn = (io_fn_t)file->f_op->write;
664		fnv = file->f_op->aio_write;
665	}
666
667	if (fnv)
668		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
669						pos, fnv);
670	else
671		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
672
673out:
674	if (iov != iovstack)
675		kfree(iov);
676	if ((ret + (type == READ)) > 0) {
677		if (type == READ)
678			fsnotify_access(file);
679		else
680			fsnotify_modify(file);
681	}
682	return ret;
683}
684
685ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
686		  unsigned long vlen, loff_t *pos)
687{
688	if (!(file->f_mode & FMODE_READ))
689		return -EBADF;
690	if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
691		return -EINVAL;
692
693	return do_readv_writev(READ, file, vec, vlen, pos);
694}
695
696EXPORT_SYMBOL(vfs_readv);
697
698ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
699		   unsigned long vlen, loff_t *pos)
700{
701	if (!(file->f_mode & FMODE_WRITE))
702		return -EBADF;
703	if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
704		return -EINVAL;
705
706	return do_readv_writev(WRITE, file, vec, vlen, pos);
707}
708
709EXPORT_SYMBOL(vfs_writev);
710
711SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
712		unsigned long, vlen)
713{
714	struct file *file;
715	ssize_t ret = -EBADF;
716	int fput_needed;
717
718	file = fget_light(fd, &fput_needed);
719	if (file) {
720		loff_t pos = file_pos_read(file);
721		ret = vfs_readv(file, vec, vlen, &pos);
722		file_pos_write(file, pos);
723		fput_light(file, fput_needed);
724	}
725
726	if (ret > 0)
727		add_rchar(current, ret);
728	inc_syscr(current);
729	return ret;
730}
731
732SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
733		unsigned long, vlen)
734{
735	struct file *file;
736	ssize_t ret = -EBADF;
737	int fput_needed;
738
739	file = fget_light(fd, &fput_needed);
740	if (file) {
741		loff_t pos = file_pos_read(file);
742		ret = vfs_writev(file, vec, vlen, &pos);
743		file_pos_write(file, pos);
744		fput_light(file, fput_needed);
745	}
746
747	if (ret > 0)
748		add_wchar(current, ret);
749	inc_syscw(current);
750	return ret;
751}
752
753static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
754{
755#define HALF_LONG_BITS (BITS_PER_LONG / 2)
756	return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
757}
758
759SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
760		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
761{
762	loff_t pos = pos_from_hilo(pos_h, pos_l);
763	struct file *file;
764	ssize_t ret = -EBADF;
765	int fput_needed;
766
767	if (pos < 0)
768		return -EINVAL;
769
770	file = fget_light(fd, &fput_needed);
771	if (file) {
772		ret = -ESPIPE;
773		if (file->f_mode & FMODE_PREAD)
774			ret = vfs_readv(file, vec, vlen, &pos);
775		fput_light(file, fput_needed);
776	}
777
778	if (ret > 0)
779		add_rchar(current, ret);
780	inc_syscr(current);
781	return ret;
782}
783
784SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
785		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
786{
787	loff_t pos = pos_from_hilo(pos_h, pos_l);
788	struct file *file;
789	ssize_t ret = -EBADF;
790	int fput_needed;
791
792	if (pos < 0)
793		return -EINVAL;
794
795	file = fget_light(fd, &fput_needed);
796	if (file) {
797		ret = -ESPIPE;
798		if (file->f_mode & FMODE_PWRITE)
799			ret = vfs_writev(file, vec, vlen, &pos);
800		fput_light(file, fput_needed);
801	}
802
803	if (ret > 0)
804		add_wchar(current, ret);
805	inc_syscw(current);
806	return ret;
807}
808
809static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
810			   size_t count, loff_t max)
811{
812	struct file * in_file, * out_file;
813	struct inode * in_inode, * out_inode;
814	loff_t pos;
815	ssize_t retval;
816	int fput_needed_in, fput_needed_out, fl;
817
818	/*
819	 * Get input file, and verify that it is ok..
820	 */
821	retval = -EBADF;
822	in_file = fget_light(in_fd, &fput_needed_in);
823	if (!in_file)
824		goto out;
825	if (!(in_file->f_mode & FMODE_READ))
826		goto fput_in;
827	retval = -ESPIPE;
828	if (!ppos)
829		ppos = &in_file->f_pos;
830	else
831		if (!(in_file->f_mode & FMODE_PREAD))
832			goto fput_in;
833	retval = rw_verify_area(READ, in_file, ppos, count);
834	if (retval < 0)
835		goto fput_in;
836	count = retval;
837
838	/*
839	 * Get output file, and verify that it is ok..
840	 */
841	retval = -EBADF;
842	out_file = fget_light(out_fd, &fput_needed_out);
843	if (!out_file)
844		goto fput_in;
845	if (!(out_file->f_mode & FMODE_WRITE))
846		goto fput_out;
847	retval = -EINVAL;
848	in_inode = in_file->f_path.dentry->d_inode;
849	out_inode = out_file->f_path.dentry->d_inode;
850	retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
851	if (retval < 0)
852		goto fput_out;
853	count = retval;
854
855	if (!max)
856		max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
857
858	pos = *ppos;
859	if (unlikely(pos + count > max)) {
860		retval = -EOVERFLOW;
861		if (pos >= max)
862			goto fput_out;
863		count = max - pos;
864	}
865
866	fl = 0;
867	retval = do_splice_direct(in_file, ppos, out_file, count, fl);
868
869	if (retval > 0) {
870		add_rchar(current, retval);
871		add_wchar(current, retval);
872	}
873
874	inc_syscr(current);
875	inc_syscw(current);
876	if (*ppos > max)
877		retval = -EOVERFLOW;
878
879fput_out:
880	fput_light(out_file, fput_needed_out);
881fput_in:
882	fput_light(in_file, fput_needed_in);
883out:
884	return retval;
885}
886
887SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
888{
889	loff_t pos;
890	off_t off;
891	ssize_t ret;
892
893	if (offset) {
894		if (unlikely(get_user(off, offset)))
895			return -EFAULT;
896		pos = off;
897		ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
898		if (unlikely(put_user(pos, offset)))
899			return -EFAULT;
900		return ret;
901	}
902
903	return do_sendfile(out_fd, in_fd, NULL, count, 0);
904}
905
906SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
907{
908	loff_t pos;
909	ssize_t ret;
910
911	if (offset) {
912		if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
913			return -EFAULT;
914		ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
915		if (unlikely(put_user(pos, offset)))
916			return -EFAULT;
917		return ret;
918	}
919
920	return do_sendfile(out_fd, in_fd, NULL, count, 0);
921}
922