1/*
2 *  linux/fs/open.c
3 *
4 *  Copyright (C) 1991, 1992  Linus Torvalds
5 */
6
7#include <linux/string.h>
8#include <linux/mm.h>
9#include <linux/file.h>
10#include <linux/fdtable.h>
11#include <linux/fsnotify.h>
12#include <linux/module.h>
13#include <linux/tty.h>
14#include <linux/namei.h>
15#include <linux/backing-dev.h>
16#include <linux/capability.h>
17#include <linux/securebits.h>
18#include <linux/security.h>
19#include <linux/mount.h>
20#include <linux/fcntl.h>
21#include <linux/slab.h>
22#include <asm/uaccess.h>
23#include <linux/fs.h>
24#include <linux/personality.h>
25#include <linux/pagemap.h>
26#include <linux/syscalls.h>
27#include <linux/rcupdate.h>
28#include <linux/audit.h>
29#include <linux/falloc.h>
30#include <linux/fs_struct.h>
31#include <linux/ima.h>
32#include <linux/dnotify.h>
33
34#include "internal.h"
35
36int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
37	struct file *filp)
38{
39	int ret;
40	struct iattr newattrs;
41
42	/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
43	if (length < 0)
44		return -EINVAL;
45
46	newattrs.ia_size = length;
47	newattrs.ia_valid = ATTR_SIZE | time_attrs;
48	if (filp) {
49		newattrs.ia_file = filp;
50		newattrs.ia_valid |= ATTR_FILE;
51	}
52
53	/* Remove suid/sgid on truncate too */
54	ret = should_remove_suid(dentry);
55	if (ret)
56		newattrs.ia_valid |= ret | ATTR_FORCE;
57
58	mutex_lock(&dentry->d_inode->i_mutex);
59	ret = notify_change(dentry, &newattrs);
60	mutex_unlock(&dentry->d_inode->i_mutex);
61	return ret;
62}
63
64static long do_sys_truncate(const char __user *pathname, loff_t length)
65{
66	struct path path;
67	struct inode *inode;
68	int error;
69
70	error = -EINVAL;
71	if (length < 0)	/* sorry, but loff_t says... */
72		goto out;
73
74	error = user_path(pathname, &path);
75	if (error)
76		goto out;
77	inode = path.dentry->d_inode;
78
79	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
80	error = -EISDIR;
81	if (S_ISDIR(inode->i_mode))
82		goto dput_and_out;
83
84	error = -EINVAL;
85	if (!S_ISREG(inode->i_mode))
86		goto dput_and_out;
87
88	error = mnt_want_write(path.mnt);
89	if (error)
90		goto dput_and_out;
91
92	error = inode_permission(inode, MAY_WRITE);
93	if (error)
94		goto mnt_drop_write_and_out;
95
96	error = -EPERM;
97	if (IS_APPEND(inode))
98		goto mnt_drop_write_and_out;
99
100	error = get_write_access(inode);
101	if (error)
102		goto mnt_drop_write_and_out;
103
104	/*
105	 * Make sure that there are no leases.  get_write_access() protects
106	 * against the truncate racing with a lease-granting setlease().
107	 */
108	error = break_lease(inode, O_WRONLY);
109	if (error)
110		goto put_write_and_out;
111
112	error = locks_verify_truncate(inode, NULL, length);
113	if (!error)
114		error = security_path_truncate(&path);
115	if (!error)
116		error = do_truncate(path.dentry, length, 0, NULL);
117
118put_write_and_out:
119	put_write_access(inode);
120mnt_drop_write_and_out:
121	mnt_drop_write(path.mnt);
122dput_and_out:
123	path_put(&path);
124out:
125	return error;
126}
127
128SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
129{
130	return do_sys_truncate(path, length);
131}
132
133static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
134{
135	struct inode * inode;
136	struct dentry *dentry;
137	struct file * file;
138	int error;
139
140	error = -EINVAL;
141	if (length < 0)
142		goto out;
143	error = -EBADF;
144	file = fget(fd);
145	if (!file)
146		goto out;
147
148	/* explicitly opened as large or we are on 64-bit box */
149	if (file->f_flags & O_LARGEFILE)
150		small = 0;
151
152	dentry = file->f_path.dentry;
153	inode = dentry->d_inode;
154	error = -EINVAL;
155	if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
156		goto out_putf;
157
158	error = -EINVAL;
159
160    /* Foxconn modified start pling 12/04/2009 */
161    /* Remove large file limitation */
162#if (!defined SAMBA_ENABLE)
163	/* Cannot ftruncate over 2^31 bytes without large file support */
164	if (small && length > MAX_NON_LFS)
165		goto out_putf;
166#endif
167    /* Foxconn modified end pling 12/04/2009 */
168
169	error = -EPERM;
170	if (IS_APPEND(inode))
171		goto out_putf;
172
173	error = locks_verify_truncate(inode, file, length);
174	if (!error)
175		error = security_path_truncate(&file->f_path);
176	if (!error)
177		error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
178out_putf:
179	fput(file);
180out:
181	return error;
182}
183
184SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
185{
186	long ret = do_sys_ftruncate(fd, length, 1);
187	/* avoid REGPARM breakage on x86: */
188	asmlinkage_protect(2, ret, fd, length);
189	return ret;
190}
191
192/* LFS versions of truncate are only needed on 32 bit machines */
193#if BITS_PER_LONG == 32
194SYSCALL_DEFINE(truncate64)(const char __user * path, loff_t length)
195{
196	return do_sys_truncate(path, length);
197}
198#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
199asmlinkage long SyS_truncate64(long path, loff_t length)
200{
201	return SYSC_truncate64((const char __user *) path, length);
202}
203SYSCALL_ALIAS(sys_truncate64, SyS_truncate64);
204#endif
205
206SYSCALL_DEFINE(ftruncate64)(unsigned int fd, loff_t length)
207{
208	long ret = do_sys_ftruncate(fd, length, 0);
209	/* avoid REGPARM breakage on x86: */
210	asmlinkage_protect(2, ret, fd, length);
211	return ret;
212}
213#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
214asmlinkage long SyS_ftruncate64(long fd, loff_t length)
215{
216	return SYSC_ftruncate64((unsigned int) fd, length);
217}
218SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
219#endif
220#endif /* BITS_PER_LONG == 32 */
221
222
223int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
224{
225	struct inode *inode = file->f_path.dentry->d_inode;
226	long ret;
227
228	if (offset < 0 || len <= 0)
229		return -EINVAL;
230
231	/* Return error if mode is not supported */
232	if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
233		return -EOPNOTSUPP;
234
235	if (!(file->f_mode & FMODE_WRITE))
236		return -EBADF;
237	/*
238	 * Revalidate the write permissions, in case security policy has
239	 * changed since the files were opened.
240	 */
241	ret = security_file_permission(file, MAY_WRITE);
242	if (ret)
243		return ret;
244
245	if (S_ISFIFO(inode->i_mode))
246		return -ESPIPE;
247
248	/*
249	 * Let individual file system decide if it supports preallocation
250	 * for directories or not.
251	 */
252	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
253		return -ENODEV;
254
255	/* Check for wrap through zero too */
256	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
257		return -EFBIG;
258
259	if (!inode->i_op->fallocate)
260		return -EOPNOTSUPP;
261
262	return inode->i_op->fallocate(inode, mode, offset, len);
263}
264
265SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
266{
267	struct file *file;
268	int error = -EBADF;
269
270	file = fget(fd);
271	if (file) {
272		error = do_fallocate(file, mode, offset, len);
273		fput(file);
274	}
275
276	return error;
277}
278
279#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
280asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
281{
282	return SYSC_fallocate((int)fd, (int)mode, offset, len);
283}
284SYSCALL_ALIAS(sys_fallocate, SyS_fallocate);
285#endif
286
287/*
288 * access() needs to use the real uid/gid, not the effective uid/gid.
289 * We do this by temporarily clearing all FS-related capabilities and
290 * switching the fsuid/fsgid around to the real ones.
291 */
292SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
293{
294	const struct cred *old_cred;
295	struct cred *override_cred;
296	struct path path;
297	struct inode *inode;
298	int res;
299
300	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
301		return -EINVAL;
302
303	override_cred = prepare_creds();
304	if (!override_cred)
305		return -ENOMEM;
306
307	override_cred->fsuid = override_cred->uid;
308	override_cred->fsgid = override_cred->gid;
309
310	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
311		/* Clear the capabilities if we switch to a non-root user */
312		if (override_cred->uid)
313			cap_clear(override_cred->cap_effective);
314		else
315			override_cred->cap_effective =
316				override_cred->cap_permitted;
317	}
318
319	old_cred = override_creds(override_cred);
320
321	res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
322	if (res)
323		goto out;
324
325	inode = path.dentry->d_inode;
326
327	if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
328		/*
329		 * MAY_EXEC on regular files is denied if the fs is mounted
330		 * with the "noexec" flag.
331		 */
332		res = -EACCES;
333		if (path.mnt->mnt_flags & MNT_NOEXEC)
334			goto out_path_release;
335	}
336
337	res = inode_permission(inode, mode | MAY_ACCESS);
338	/* SuS v2 requires we report a read only fs too */
339	if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
340		goto out_path_release;
341	/*
342	 * This is a rare case where using __mnt_is_readonly()
343	 * is OK without a mnt_want/drop_write() pair.  Since
344	 * no actual write to the fs is performed here, we do
345	 * not need to telegraph to that to anyone.
346	 *
347	 * By doing this, we accept that this access is
348	 * inherently racy and know that the fs may change
349	 * state before we even see this result.
350	 */
351	if (__mnt_is_readonly(path.mnt))
352		res = -EROFS;
353
354out_path_release:
355	path_put(&path);
356out:
357	revert_creds(old_cred);
358	put_cred(override_cred);
359	return res;
360}
361
362SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
363{
364	return sys_faccessat(AT_FDCWD, filename, mode);
365}
366
367SYSCALL_DEFINE1(chdir, const char __user *, filename)
368{
369	struct path path;
370	int error;
371
372	error = user_path_dir(filename, &path);
373	if (error)
374		goto out;
375
376	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
377	if (error)
378		goto dput_and_out;
379
380	set_fs_pwd(current->fs, &path);
381
382dput_and_out:
383	path_put(&path);
384out:
385	return error;
386}
387
388SYSCALL_DEFINE1(fchdir, unsigned int, fd)
389{
390	struct file *file;
391	struct inode *inode;
392	int error;
393
394	error = -EBADF;
395	file = fget(fd);
396	if (!file)
397		goto out;
398
399	inode = file->f_path.dentry->d_inode;
400
401	error = -ENOTDIR;
402	if (!S_ISDIR(inode->i_mode))
403		goto out_putf;
404
405	error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
406	if (!error)
407		set_fs_pwd(current->fs, &file->f_path);
408out_putf:
409	fput(file);
410out:
411	return error;
412}
413
414SYSCALL_DEFINE1(chroot, const char __user *, filename)
415{
416	struct path path;
417	int error;
418
419	error = user_path_dir(filename, &path);
420	if (error)
421		goto out;
422
423	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
424	if (error)
425		goto dput_and_out;
426
427	error = -EPERM;
428	if (!capable(CAP_SYS_CHROOT))
429		goto dput_and_out;
430	error = security_path_chroot(&path);
431	if (error)
432		goto dput_and_out;
433
434	set_fs_root(current->fs, &path);
435	error = 0;
436dput_and_out:
437	path_put(&path);
438out:
439	return error;
440}
441
442SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
443{
444	struct inode * inode;
445	struct dentry * dentry;
446	struct file * file;
447	int err = -EBADF;
448	struct iattr newattrs;
449
450	file = fget(fd);
451	if (!file)
452		goto out;
453
454	dentry = file->f_path.dentry;
455	inode = dentry->d_inode;
456
457	audit_inode(NULL, dentry);
458
459	err = mnt_want_write_file(file);
460	if (err)
461		goto out_putf;
462	mutex_lock(&inode->i_mutex);
463	err = security_path_chmod(dentry, file->f_vfsmnt, mode);
464	if (err)
465		goto out_unlock;
466	if (mode == (mode_t) -1)
467		mode = inode->i_mode;
468	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
469	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
470	err = notify_change(dentry, &newattrs);
471out_unlock:
472	mutex_unlock(&inode->i_mutex);
473	mnt_drop_write(file->f_path.mnt);
474out_putf:
475	fput(file);
476out:
477	return err;
478}
479
480SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
481{
482	struct path path;
483	struct inode *inode;
484	int error;
485	struct iattr newattrs;
486
487	error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
488	if (error)
489		goto out;
490	inode = path.dentry->d_inode;
491
492	error = mnt_want_write(path.mnt);
493	if (error)
494		goto dput_and_out;
495	mutex_lock(&inode->i_mutex);
496	error = security_path_chmod(path.dentry, path.mnt, mode);
497	if (error)
498		goto out_unlock;
499	if (mode == (mode_t) -1)
500		mode = inode->i_mode;
501	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
502	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
503	error = notify_change(path.dentry, &newattrs);
504out_unlock:
505	mutex_unlock(&inode->i_mutex);
506	mnt_drop_write(path.mnt);
507dput_and_out:
508	path_put(&path);
509out:
510	return error;
511}
512
513SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode)
514{
515	return sys_fchmodat(AT_FDCWD, filename, mode);
516}
517
518static int chown_common(struct path *path, uid_t user, gid_t group)
519{
520	struct inode *inode = path->dentry->d_inode;
521	int error;
522	struct iattr newattrs;
523
524	newattrs.ia_valid =  ATTR_CTIME;
525	if (user != (uid_t) -1) {
526		newattrs.ia_valid |= ATTR_UID;
527		newattrs.ia_uid = user;
528	}
529	if (group != (gid_t) -1) {
530		newattrs.ia_valid |= ATTR_GID;
531		newattrs.ia_gid = group;
532	}
533	if (!S_ISDIR(inode->i_mode))
534		newattrs.ia_valid |=
535			ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
536	mutex_lock(&inode->i_mutex);
537	error = security_path_chown(path, user, group);
538	if (!error)
539		error = notify_change(path->dentry, &newattrs);
540	mutex_unlock(&inode->i_mutex);
541
542	return error;
543}
544
545SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
546{
547	struct path path;
548	int error;
549
550	error = user_path(filename, &path);
551	if (error)
552		goto out;
553	error = mnt_want_write(path.mnt);
554	if (error)
555		goto out_release;
556	error = chown_common(&path, user, group);
557	mnt_drop_write(path.mnt);
558out_release:
559	path_put(&path);
560out:
561	return error;
562}
563
564SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
565		gid_t, group, int, flag)
566{
567	struct path path;
568	int error = -EINVAL;
569	int follow;
570
571	if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
572		goto out;
573
574	follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
575	error = user_path_at(dfd, filename, follow, &path);
576	if (error)
577		goto out;
578	error = mnt_want_write(path.mnt);
579	if (error)
580		goto out_release;
581	error = chown_common(&path, user, group);
582	mnt_drop_write(path.mnt);
583out_release:
584	path_put(&path);
585out:
586	return error;
587}
588
589SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
590{
591	struct path path;
592	int error;
593
594	error = user_lpath(filename, &path);
595	if (error)
596		goto out;
597	error = mnt_want_write(path.mnt);
598	if (error)
599		goto out_release;
600	error = chown_common(&path, user, group);
601	mnt_drop_write(path.mnt);
602out_release:
603	path_put(&path);
604out:
605	return error;
606}
607
608SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
609{
610	struct file * file;
611	int error = -EBADF;
612	struct dentry * dentry;
613
614	file = fget(fd);
615	if (!file)
616		goto out;
617
618	error = mnt_want_write_file(file);
619	if (error)
620		goto out_fput;
621	dentry = file->f_path.dentry;
622	audit_inode(NULL, dentry);
623	error = chown_common(&file->f_path, user, group);
624	mnt_drop_write(file->f_path.mnt);
625out_fput:
626	fput(file);
627out:
628	return error;
629}
630
631/*
632 * You have to be very careful that these write
633 * counts get cleaned up in error cases and
634 * upon __fput().  This should probably never
635 * be called outside of __dentry_open().
636 */
637static inline int __get_file_write_access(struct inode *inode,
638					  struct vfsmount *mnt)
639{
640	int error;
641	error = get_write_access(inode);
642	if (error)
643		return error;
644	/*
645	 * Do not take mount writer counts on
646	 * special files since no writes to
647	 * the mount itself will occur.
648	 */
649	if (!special_file(inode->i_mode)) {
650		/*
651		 * Balanced in __fput()
652		 */
653		error = mnt_want_write(mnt);
654		if (error)
655			put_write_access(inode);
656	}
657	return error;
658}
659
660static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
661					struct file *f,
662					int (*open)(struct inode *, struct file *),
663					const struct cred *cred)
664{
665	struct inode *inode;
666	int error;
667
668	f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
669				FMODE_PREAD | FMODE_PWRITE;
670	inode = dentry->d_inode;
671	if (f->f_mode & FMODE_WRITE) {
672		error = __get_file_write_access(inode, mnt);
673		if (error)
674			goto cleanup_file;
675		if (!special_file(inode->i_mode))
676			file_take_write(f);
677	}
678
679	f->f_mapping = inode->i_mapping;
680	f->f_path.dentry = dentry;
681	f->f_path.mnt = mnt;
682	f->f_pos = 0;
683	f->f_op = fops_get(inode->i_fop);
684	file_sb_list_add(f, inode->i_sb);
685
686	error = security_dentry_open(f, cred);
687	if (error)
688		goto cleanup_all;
689
690	if (!open && f->f_op)
691		open = f->f_op->open;
692	if (open) {
693		error = open(inode, f);
694		if (error)
695			goto cleanup_all;
696	}
697	ima_counts_get(f);
698
699	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
700
701	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
702
703	/* NB: we're sure to have correct a_ops only after f_op->open */
704	if (f->f_flags & O_DIRECT) {
705		if (!f->f_mapping->a_ops ||
706		    ((!f->f_mapping->a_ops->direct_IO) &&
707		    (!f->f_mapping->a_ops->get_xip_mem))) {
708			fput(f);
709			f = ERR_PTR(-EINVAL);
710		}
711	}
712
713	return f;
714
715cleanup_all:
716	fops_put(f->f_op);
717	if (f->f_mode & FMODE_WRITE) {
718		put_write_access(inode);
719		if (!special_file(inode->i_mode)) {
720			/*
721			 * We don't consider this a real
722			 * mnt_want/drop_write() pair
723			 * because it all happenend right
724			 * here, so just reset the state.
725			 */
726			file_reset_write(f);
727			mnt_drop_write(mnt);
728		}
729	}
730	file_sb_list_del(f);
731	f->f_path.dentry = NULL;
732	f->f_path.mnt = NULL;
733cleanup_file:
734	put_filp(f);
735	dput(dentry);
736	mntput(mnt);
737	return ERR_PTR(error);
738}
739
740/**
741 * lookup_instantiate_filp - instantiates the open intent filp
742 * @nd: pointer to nameidata
743 * @dentry: pointer to dentry
744 * @open: open callback
745 *
746 * Helper for filesystems that want to use lookup open intents and pass back
747 * a fully instantiated struct file to the caller.
748 * This function is meant to be called from within a filesystem's
749 * lookup method.
750 * Beware of calling it for non-regular files! Those ->open methods might block
751 * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo,
752 * leading to a deadlock, as nobody can open that fifo anymore, because
753 * another process to open fifo will block on locked parent when doing lookup).
754 * Note that in case of error, nd->intent.open.file is destroyed, but the
755 * path information remains valid.
756 * If the open callback is set to NULL, then the standard f_op->open()
757 * filesystem callback is substituted.
758 */
759struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
760		int (*open)(struct inode *, struct file *))
761{
762	const struct cred *cred = current_cred();
763
764	if (IS_ERR(nd->intent.open.file))
765		goto out;
766	if (IS_ERR(dentry))
767		goto out_err;
768	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
769					     nd->intent.open.file,
770					     open, cred);
771out:
772	return nd->intent.open.file;
773out_err:
774	release_open_intent(nd);
775	nd->intent.open.file = (struct file *)dentry;
776	goto out;
777}
778EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
779
780/**
781 * nameidata_to_filp - convert a nameidata to an open filp.
782 * @nd: pointer to nameidata
783 * @flags: open flags
784 *
785 * Note that this function destroys the original nameidata
786 */
787struct file *nameidata_to_filp(struct nameidata *nd)
788{
789	const struct cred *cred = current_cred();
790	struct file *filp;
791
792	/* Pick up the filp from the open intent */
793	filp = nd->intent.open.file;
794	/* Has the filesystem initialised the file for us? */
795	if (filp->f_path.dentry == NULL)
796		filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
797				     NULL, cred);
798	else
799		path_put(&nd->path);
800	return filp;
801}
802
803/*
804 * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
805 * error.
806 */
807struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
808			 const struct cred *cred)
809{
810	int error;
811	struct file *f;
812
813	validate_creds(cred);
814
815	/*
816	 * We must always pass in a valid mount pointer.   Historically
817	 * callers got away with not passing it, but we must enforce this at
818	 * the earliest possible point now to avoid strange problems deep in the
819	 * filesystem stack.
820	 */
821	if (!mnt) {
822		printk(KERN_WARNING "%s called with NULL vfsmount\n", __func__);
823		dump_stack();
824		return ERR_PTR(-EINVAL);
825	}
826
827	error = -ENFILE;
828	f = get_empty_filp();
829	if (f == NULL) {
830		dput(dentry);
831		mntput(mnt);
832		return ERR_PTR(error);
833	}
834
835	f->f_flags = flags;
836	return __dentry_open(dentry, mnt, f, NULL, cred);
837}
838EXPORT_SYMBOL(dentry_open);
839
840static void __put_unused_fd(struct files_struct *files, unsigned int fd)
841{
842	struct fdtable *fdt = files_fdtable(files);
843	__FD_CLR(fd, fdt->open_fds);
844	if (fd < files->next_fd)
845		files->next_fd = fd;
846}
847
848void put_unused_fd(unsigned int fd)
849{
850	struct files_struct *files = current->files;
851	spin_lock(&files->file_lock);
852	__put_unused_fd(files, fd);
853	spin_unlock(&files->file_lock);
854}
855
856EXPORT_SYMBOL(put_unused_fd);
857
858/*
859 * Install a file pointer in the fd array.
860 *
861 * The VFS is full of places where we drop the files lock between
862 * setting the open_fds bitmap and installing the file in the file
863 * array.  At any such point, we are vulnerable to a dup2() race
864 * installing a file in the array before us.  We need to detect this and
865 * fput() the struct file we are about to overwrite in this case.
866 *
867 * It should never happen - if we allow dup2() do it, _really_ bad things
868 * will follow.
869 */
870
871void fd_install(unsigned int fd, struct file *file)
872{
873	struct files_struct *files = current->files;
874	struct fdtable *fdt;
875	spin_lock(&files->file_lock);
876	fdt = files_fdtable(files);
877	BUG_ON(fdt->fd[fd] != NULL);
878	rcu_assign_pointer(fdt->fd[fd], file);
879	spin_unlock(&files->file_lock);
880}
881
882EXPORT_SYMBOL(fd_install);
883
884long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
885{
886	char *tmp = getname(filename);
887	int fd = PTR_ERR(tmp);
888
889	if (!IS_ERR(tmp)) {
890		fd = get_unused_fd_flags(flags);
891		if (fd >= 0) {
892			struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);
893			if (IS_ERR(f)) {
894				put_unused_fd(fd);
895				fd = PTR_ERR(f);
896			} else {
897				fsnotify_open(f);
898				fd_install(fd, f);
899			}
900		}
901		putname(tmp);
902	}
903	return fd;
904}
905
906SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
907{
908	long ret;
909
910	if (force_o_largefile())
911		flags |= O_LARGEFILE;
912
913	ret = do_sys_open(AT_FDCWD, filename, flags, mode);
914	/* avoid REGPARM breakage on x86: */
915	asmlinkage_protect(3, ret, filename, flags, mode);
916	return ret;
917}
918
919SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
920		int, mode)
921{
922	long ret;
923
924	if (force_o_largefile())
925		flags |= O_LARGEFILE;
926
927	ret = do_sys_open(dfd, filename, flags, mode);
928	/* avoid REGPARM breakage on x86: */
929	asmlinkage_protect(4, ret, dfd, filename, flags, mode);
930	return ret;
931}
932
933#ifndef __alpha__
934
935/*
936 * For backward compatibility?  Maybe this should be moved
937 * into arch/i386 instead?
938 */
939SYSCALL_DEFINE2(creat, const char __user *, pathname, int, mode)
940{
941	return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
942}
943
944#endif
945
946/*
947 * "id" is the POSIX thread ID. We use the
948 * files pointer for this..
949 */
950int filp_close(struct file *filp, fl_owner_t id)
951{
952	int retval = 0;
953
954	if (!file_count(filp)) {
955		printk(KERN_ERR "VFS: Close: file count is 0\n");
956		return 0;
957	}
958
959	if (filp->f_op && filp->f_op->flush)
960		retval = filp->f_op->flush(filp, id);
961
962	dnotify_flush(filp, id);
963	locks_remove_posix(filp, id);
964	fput(filp);
965	return retval;
966}
967
968EXPORT_SYMBOL(filp_close);
969
970/*
971 * Careful here! We test whether the file pointer is NULL before
972 * releasing the fd. This ensures that one clone task can't release
973 * an fd while another clone is opening it.
974 */
975SYSCALL_DEFINE1(close, unsigned int, fd)
976{
977	struct file * filp;
978	struct files_struct *files = current->files;
979	struct fdtable *fdt;
980	int retval;
981
982	spin_lock(&files->file_lock);
983	fdt = files_fdtable(files);
984	if (fd >= fdt->max_fds)
985		goto out_unlock;
986	filp = fdt->fd[fd];
987	if (!filp)
988		goto out_unlock;
989	rcu_assign_pointer(fdt->fd[fd], NULL);
990	FD_CLR(fd, fdt->close_on_exec);
991	__put_unused_fd(files, fd);
992	spin_unlock(&files->file_lock);
993	retval = filp_close(filp, files);
994
995	/* can't restart close syscall because file table entry was cleared */
996	if (unlikely(retval == -ERESTARTSYS ||
997		     retval == -ERESTARTNOINTR ||
998		     retval == -ERESTARTNOHAND ||
999		     retval == -ERESTART_RESTARTBLOCK))
1000		retval = -EINTR;
1001
1002	return retval;
1003
1004out_unlock:
1005	spin_unlock(&files->file_lock);
1006	return -EBADF;
1007}
1008EXPORT_SYMBOL(sys_close);
1009
1010/*
1011 * This routine simulates a hangup on the tty, to arrange that users
1012 * are given clean terminals at login time.
1013 */
1014SYSCALL_DEFINE0(vhangup)
1015{
1016	if (capable(CAP_SYS_TTY_CONFIG)) {
1017		tty_vhangup_self();
1018		return 0;
1019	}
1020	return -EPERM;
1021}
1022
1023/*
1024 * Called when an inode is about to be open.
1025 * We use this to disallow opening large files on 32bit systems if
1026 * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
1027 * on this flag in sys_open.
1028 */
1029int generic_file_open(struct inode * inode, struct file * filp)
1030{
1031    /* Foxconn modifed start pling 06/12/2009 */
1032    /* remove "big file" limitation */
1033#if (!defined SAMBA_ENABLE)
1034	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1035		return -EOVERFLOW;
1036#endif
1037    /* Foxconn modifed end pling 06/12/2009 */
1038	return 0;
1039}
1040
1041EXPORT_SYMBOL(generic_file_open);
1042
1043/*
1044 * This is used by subsystems that don't want seekable
1045 * file descriptors. The function is not supposed to ever fail, the only
1046 * reason it returns an 'int' and not 'void' is so that it can be plugged
1047 * directly into file_operations structure.
1048 */
1049int nonseekable_open(struct inode *inode, struct file *filp)
1050{
1051	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1052	return 0;
1053}
1054
1055EXPORT_SYMBOL(nonseekable_open);
1056