• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/fs/nfsd/
1#define MSNFS	/* HACK HACK */
2/*
3 * File operations used by nfsd. Some of these have been ripped from
4 * other parts of the kernel because they weren't exported, others
5 * are partial duplicates with added or changed functionality.
6 *
7 * Note that several functions dget() the dentry upon which they want
8 * to act, most notably those that create directory entries. Response
9 * dentry's are dput()'d if necessary in the release callback.
10 * So if you notice code paths that apparently fail to dput() the
11 * dentry, don't worry--they have been taken care of.
12 *
13 * Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de>
14 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
15 */
16
17#include <linux/fs.h>
18#include <linux/file.h>
19#include <linux/splice.h>
20#include <linux/fcntl.h>
21#include <linux/namei.h>
22#include <linux/delay.h>
23#include <linux/fsnotify.h>
24#include <linux/posix_acl_xattr.h>
25#include <linux/xattr.h>
26#include <linux/jhash.h>
27#include <linux/ima.h>
28#include <linux/slab.h>
29#include <asm/uaccess.h>
30#include <linux/exportfs.h>
31#include <linux/writeback.h>
32
33#ifdef CONFIG_NFSD_V3
34#include "xdr3.h"
35#endif /* CONFIG_NFSD_V3 */
36
37#ifdef CONFIG_NFSD_V4
38#include <linux/nfs4_acl.h>
39#include <linux/nfsd_idmap.h>
40#endif /* CONFIG_NFSD_V4 */
41
42#include "nfsd.h"
43#include "vfs.h"
44
45#define NFSDDBG_FACILITY		NFSDDBG_FILEOP
46
47
48/*
49 * This is a cache of readahead params that help us choose the proper
50 * readahead strategy. Initially, we set all readahead parameters to 0
51 * and let the VFS handle things.
52 * If you increase the number of cached files very much, you'll need to
53 * add a hash table here.
54 */
55struct raparms {
56	struct raparms		*p_next;
57	unsigned int		p_count;
58	ino_t			p_ino;
59	dev_t			p_dev;
60	int			p_set;
61	struct file_ra_state	p_ra;
62	unsigned int		p_hindex;
63};
64
65struct raparm_hbucket {
66	struct raparms		*pb_head;
67	spinlock_t		pb_lock;
68} ____cacheline_aligned_in_smp;
69
70#define RAPARM_HASH_BITS	4
71#define RAPARM_HASH_SIZE	(1<<RAPARM_HASH_BITS)
72#define RAPARM_HASH_MASK	(RAPARM_HASH_SIZE-1)
73static struct raparm_hbucket	raparm_hash[RAPARM_HASH_SIZE];
74
75/*
76 * Called from nfsd_lookup and encode_dirent. Check if we have crossed
77 * a mount point.
78 * Returns -EAGAIN or -ETIMEDOUT leaving *dpp and *expp unchanged,
79 *  or nfs_ok having possibly changed *dpp and *expp
80 */
81int
82nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
83		        struct svc_export **expp)
84{
85	struct svc_export *exp = *expp, *exp2 = NULL;
86	struct dentry *dentry = *dpp;
87	struct path path = {.mnt = mntget(exp->ex_path.mnt),
88			    .dentry = dget(dentry)};
89	int err = 0;
90
91	while (d_mountpoint(path.dentry) && follow_down(&path))
92		;
93
94	exp2 = rqst_exp_get_by_name(rqstp, &path);
95	if (IS_ERR(exp2)) {
96		err = PTR_ERR(exp2);
97		/*
98		 * We normally allow NFS clients to continue
99		 * "underneath" a mountpoint that is not exported.
100		 * The exception is V4ROOT, where no traversal is ever
101		 * allowed without an explicit export of the new
102		 * directory.
103		 */
104		if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
105			err = 0;
106		path_put(&path);
107		goto out;
108	}
109	if (nfsd_v4client(rqstp) ||
110		(exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
111		/* successfully crossed mount point */
112		/*
113		 * This is subtle: path.dentry is *not* on path.mnt
114		 * at this point.  The only reason we are safe is that
115		 * original mnt is pinned down by exp, so we should
116		 * put path *before* putting exp
117		 */
118		*dpp = path.dentry;
119		path.dentry = dentry;
120		*expp = exp2;
121		exp2 = exp;
122	}
123	path_put(&path);
124	exp_put(exp2);
125out:
126	return err;
127}
128
129static void follow_to_parent(struct path *path)
130{
131	struct dentry *dp;
132
133	while (path->dentry == path->mnt->mnt_root && follow_up(path))
134		;
135	dp = dget_parent(path->dentry);
136	dput(path->dentry);
137	path->dentry = dp;
138}
139
140static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp)
141{
142	struct svc_export *exp2;
143	struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
144			    .dentry = dget(dparent)};
145
146	follow_to_parent(&path);
147
148	exp2 = rqst_exp_parent(rqstp, &path);
149	if (PTR_ERR(exp2) == -ENOENT) {
150		*dentryp = dget(dparent);
151	} else if (IS_ERR(exp2)) {
152		path_put(&path);
153		return PTR_ERR(exp2);
154	} else {
155		*dentryp = dget(path.dentry);
156		exp_put(*exp);
157		*exp = exp2;
158	}
159	path_put(&path);
160	return 0;
161}
162
163/*
164 * For nfsd purposes, we treat V4ROOT exports as though there was an
165 * export at *every* directory.
166 */
167int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
168{
169	if (d_mountpoint(dentry))
170		return 1;
171	if (!(exp->ex_flags & NFSEXP_V4ROOT))
172		return 0;
173	return dentry->d_inode != NULL;
174}
175
176__be32
177nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
178		   const char *name, unsigned int len,
179		   struct svc_export **exp_ret, struct dentry **dentry_ret)
180{
181	struct svc_export	*exp;
182	struct dentry		*dparent;
183	struct dentry		*dentry;
184	__be32			err;
185	int			host_err;
186
187	dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
188
189	/* Obtain dentry and export. */
190	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
191	if (err)
192		return err;
193
194	dparent = fhp->fh_dentry;
195	exp  = fhp->fh_export;
196	exp_get(exp);
197
198	/* Lookup the name, but don't follow links */
199	if (isdotent(name, len)) {
200		if (len==1)
201			dentry = dget(dparent);
202		else if (dparent != exp->ex_path.dentry)
203			dentry = dget_parent(dparent);
204		else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
205			dentry = dget(dparent); /* .. == . just like at / */
206		else {
207			/* checking mountpoint crossing is very different when stepping up */
208			host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
209			if (host_err)
210				goto out_nfserr;
211		}
212	} else {
213		fh_lock(fhp);
214		dentry = lookup_one_len(name, dparent, len);
215		host_err = PTR_ERR(dentry);
216		if (IS_ERR(dentry))
217			goto out_nfserr;
218		/*
219		 * check if we have crossed a mount point ...
220		 */
221		if (nfsd_mountpoint(dentry, exp)) {
222			if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
223				dput(dentry);
224				goto out_nfserr;
225			}
226		}
227	}
228	*dentry_ret = dentry;
229	*exp_ret = exp;
230	return 0;
231
232out_nfserr:
233	exp_put(exp);
234	return nfserrno(host_err);
235}
236
237/*
238 * Look up one component of a pathname.
239 * N.B. After this call _both_ fhp and resfh need an fh_put
240 *
241 * If the lookup would cross a mountpoint, and the mounted filesystem
242 * is exported to the client with NFSEXP_NOHIDE, then the lookup is
243 * accepted as it stands and the mounted directory is
244 * returned. Otherwise the covered directory is returned.
245 * NOTE: this mountpoint crossing is not supported properly by all
246 *   clients and is explicitly disallowed for NFSv3
247 *      NeilBrown <neilb@cse.unsw.edu.au>
248 */
249__be32
250nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
251				unsigned int len, struct svc_fh *resfh)
252{
253	struct svc_export	*exp;
254	struct dentry		*dentry;
255	__be32 err;
256
257	err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
258	if (err)
259		return err;
260	err = check_nfsd_access(exp, rqstp);
261	if (err)
262		goto out;
263	/*
264	 * Note: we compose the file handle now, but as the
265	 * dentry may be negative, it may need to be updated.
266	 */
267	err = fh_compose(resfh, exp, dentry, fhp);
268	if (!err && !dentry->d_inode)
269		err = nfserr_noent;
270out:
271	dput(dentry);
272	exp_put(exp);
273	return err;
274}
275
276/*
277 * Commit metadata changes to stable storage.
278 */
279static int
280commit_metadata(struct svc_fh *fhp)
281{
282	struct inode *inode = fhp->fh_dentry->d_inode;
283	const struct export_operations *export_ops = inode->i_sb->s_export_op;
284	int error = 0;
285
286	if (!EX_ISSYNC(fhp->fh_export))
287		return 0;
288
289	if (export_ops->commit_metadata) {
290		error = export_ops->commit_metadata(inode);
291	} else {
292		struct writeback_control wbc = {
293			.sync_mode = WB_SYNC_ALL,
294			.nr_to_write = 0, /* metadata only */
295		};
296
297		error = sync_inode(inode, &wbc);
298	}
299
300	return error;
301}
302
303/*
304 * Set various file attributes.
305 * N.B. After this call fhp needs an fh_put
306 */
307__be32
308nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
309	     int check_guard, time_t guardtime)
310{
311	struct dentry	*dentry;
312	struct inode	*inode;
313	int		accmode = NFSD_MAY_SATTR;
314	int		ftype = 0;
315	__be32		err;
316	int		host_err;
317	int		size_change = 0;
318
319	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
320		accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
321	if (iap->ia_valid & ATTR_SIZE)
322		ftype = S_IFREG;
323
324	/* Get inode */
325	err = fh_verify(rqstp, fhp, ftype, accmode);
326	if (err)
327		goto out;
328
329	dentry = fhp->fh_dentry;
330	inode = dentry->d_inode;
331
332	/* Ignore any mode updates on symlinks */
333	if (S_ISLNK(inode->i_mode))
334		iap->ia_valid &= ~ATTR_MODE;
335
336	if (!iap->ia_valid)
337		goto out;
338
339	/*
340	 * NFSv2 does not differentiate between "set-[ac]time-to-now"
341	 * which only requires access, and "set-[ac]time-to-X" which
342	 * requires ownership.
343	 * So if it looks like it might be "set both to the same time which
344	 * is close to now", and if inode_change_ok fails, then we
345	 * convert to "set to now" instead of "set to explicit time"
346	 *
347	 * We only call inode_change_ok as the last test as technically
348	 * it is not an interface that we should be using.  It is only
349	 * valid if the filesystem does not define it's own i_op->setattr.
350	 */
351#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
352#define	MAX_TOUCH_TIME_ERROR (30*60)
353	if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
354	    iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
355		/*
356		 * Looks probable.
357		 *
358		 * Now just make sure time is in the right ballpark.
359		 * Solaris, at least, doesn't seem to care what the time
360		 * request is.  We require it be within 30 minutes of now.
361		 */
362		time_t delta = iap->ia_atime.tv_sec - get_seconds();
363		if (delta < 0)
364			delta = -delta;
365		if (delta < MAX_TOUCH_TIME_ERROR &&
366		    inode_change_ok(inode, iap) != 0) {
367			/*
368			 * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
369			 * This will cause notify_change to set these times
370			 * to "now"
371			 */
372			iap->ia_valid &= ~BOTH_TIME_SET;
373		}
374	}
375
376	/*
377	 * The size case is special.
378	 * It changes the file as well as the attributes.
379	 */
380	if (iap->ia_valid & ATTR_SIZE) {
381		if (iap->ia_size < inode->i_size) {
382			err = nfsd_permission(rqstp, fhp->fh_export, dentry,
383					NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
384			if (err)
385				goto out;
386		}
387
388		/*
389		 * If we are changing the size of the file, then
390		 * we need to break all leases.
391		 */
392		host_err = break_lease(inode, O_WRONLY | O_NONBLOCK);
393		if (host_err == -EWOULDBLOCK)
394			host_err = -ETIMEDOUT;
395		if (host_err) /* ENOMEM or EWOULDBLOCK */
396			goto out_nfserr;
397
398		host_err = get_write_access(inode);
399		if (host_err)
400			goto out_nfserr;
401
402		size_change = 1;
403		host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
404		if (host_err) {
405			put_write_access(inode);
406			goto out_nfserr;
407		}
408	}
409
410	/* sanitize the mode change */
411	if (iap->ia_valid & ATTR_MODE) {
412		iap->ia_mode &= S_IALLUGO;
413		iap->ia_mode |= (inode->i_mode & ~S_IALLUGO);
414	}
415
416	/* Revoke setuid/setgid on chown */
417	if (!S_ISDIR(inode->i_mode) &&
418	    (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) ||
419	     ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) {
420		iap->ia_valid |= ATTR_KILL_PRIV;
421		if (iap->ia_valid & ATTR_MODE) {
422			/* we're setting mode too, just clear the s*id bits */
423			iap->ia_mode &= ~S_ISUID;
424			if (iap->ia_mode & S_IXGRP)
425				iap->ia_mode &= ~S_ISGID;
426		} else {
427			/* set ATTR_KILL_* bits and let VFS handle it */
428			iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
429		}
430	}
431
432	/* Change the attributes. */
433
434	iap->ia_valid |= ATTR_CTIME;
435
436	err = nfserr_notsync;
437	if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
438		fh_lock(fhp);
439		host_err = notify_change(dentry, iap);
440		err = nfserrno(host_err);
441		fh_unlock(fhp);
442	}
443	if (size_change)
444		put_write_access(inode);
445	if (!err)
446		commit_metadata(fhp);
447out:
448	return err;
449
450out_nfserr:
451	err = nfserrno(host_err);
452	goto out;
453}
454
455#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) || \
456	defined(CONFIG_NFSD_V4)
457static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
458{
459	ssize_t buflen;
460	ssize_t ret;
461
462	buflen = vfs_getxattr(dentry, key, NULL, 0);
463	if (buflen <= 0)
464		return buflen;
465
466	*buf = kmalloc(buflen, GFP_KERNEL);
467	if (!*buf)
468		return -ENOMEM;
469
470	ret = vfs_getxattr(dentry, key, *buf, buflen);
471	if (ret < 0)
472		kfree(*buf);
473	return ret;
474}
475#endif
476
477#if defined(CONFIG_NFSD_V4)
478static int
479set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
480{
481	int len;
482	size_t buflen;
483	char *buf = NULL;
484	int error = 0;
485
486	buflen = posix_acl_xattr_size(pacl->a_count);
487	buf = kmalloc(buflen, GFP_KERNEL);
488	error = -ENOMEM;
489	if (buf == NULL)
490		goto out;
491
492	len = posix_acl_to_xattr(pacl, buf, buflen);
493	if (len < 0) {
494		error = len;
495		goto out;
496	}
497
498	error = vfs_setxattr(dentry, key, buf, len, 0);
499out:
500	kfree(buf);
501	return error;
502}
503
504__be32
505nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
506    struct nfs4_acl *acl)
507{
508	__be32 error;
509	int host_error;
510	struct dentry *dentry;
511	struct inode *inode;
512	struct posix_acl *pacl = NULL, *dpacl = NULL;
513	unsigned int flags = 0;
514
515	/* Get inode */
516	error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
517	if (error)
518		return error;
519
520	dentry = fhp->fh_dentry;
521	inode = dentry->d_inode;
522	if (S_ISDIR(inode->i_mode))
523		flags = NFS4_ACL_DIR;
524
525	host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
526	if (host_error == -EINVAL) {
527		return nfserr_attrnotsupp;
528	} else if (host_error < 0)
529		goto out_nfserr;
530
531	host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
532	if (host_error < 0)
533		goto out_release;
534
535	if (S_ISDIR(inode->i_mode))
536		host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
537
538out_release:
539	posix_acl_release(pacl);
540	posix_acl_release(dpacl);
541out_nfserr:
542	if (host_error == -EOPNOTSUPP)
543		return nfserr_attrnotsupp;
544	else
545		return nfserrno(host_error);
546}
547
548static struct posix_acl *
549_get_posix_acl(struct dentry *dentry, char *key)
550{
551	void *buf = NULL;
552	struct posix_acl *pacl = NULL;
553	int buflen;
554
555	buflen = nfsd_getxattr(dentry, key, &buf);
556	if (!buflen)
557		buflen = -ENODATA;
558	if (buflen <= 0)
559		return ERR_PTR(buflen);
560
561	pacl = posix_acl_from_xattr(buf, buflen);
562	kfree(buf);
563	return pacl;
564}
565
566int
567nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl)
568{
569	struct inode *inode = dentry->d_inode;
570	int error = 0;
571	struct posix_acl *pacl = NULL, *dpacl = NULL;
572	unsigned int flags = 0;
573
574	pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS);
575	if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
576		pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
577	if (IS_ERR(pacl)) {
578		error = PTR_ERR(pacl);
579		pacl = NULL;
580		goto out;
581	}
582
583	if (S_ISDIR(inode->i_mode)) {
584		dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT);
585		if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
586			dpacl = NULL;
587		else if (IS_ERR(dpacl)) {
588			error = PTR_ERR(dpacl);
589			dpacl = NULL;
590			goto out;
591		}
592		flags = NFS4_ACL_DIR;
593	}
594
595	*acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags);
596	if (IS_ERR(*acl)) {
597		error = PTR_ERR(*acl);
598		*acl = NULL;
599	}
600 out:
601	posix_acl_release(pacl);
602	posix_acl_release(dpacl);
603	return error;
604}
605
606#endif /* defined(CONFIG_NFSD_V4) */
607
608#ifdef CONFIG_NFSD_V3
609/*
610 * Check server access rights to a file system object
611 */
612struct accessmap {
613	u32		access;
614	int		how;
615};
616static struct accessmap	nfs3_regaccess[] = {
617    {	NFS3_ACCESS_READ,	NFSD_MAY_READ			},
618    {	NFS3_ACCESS_EXECUTE,	NFSD_MAY_EXEC			},
619    {	NFS3_ACCESS_MODIFY,	NFSD_MAY_WRITE|NFSD_MAY_TRUNC	},
620    {	NFS3_ACCESS_EXTEND,	NFSD_MAY_WRITE			},
621
622    {	0,			0				}
623};
624
625static struct accessmap	nfs3_diraccess[] = {
626    {	NFS3_ACCESS_READ,	NFSD_MAY_READ			},
627    {	NFS3_ACCESS_LOOKUP,	NFSD_MAY_EXEC			},
628    {	NFS3_ACCESS_MODIFY,	NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC},
629    {	NFS3_ACCESS_EXTEND,	NFSD_MAY_EXEC|NFSD_MAY_WRITE	},
630    {	NFS3_ACCESS_DELETE,	NFSD_MAY_REMOVE			},
631
632    {	0,			0				}
633};
634
635static struct accessmap	nfs3_anyaccess[] = {
636	/* Some clients - Solaris 2.6 at least, make an access call
637	 * to the server to check for access for things like /dev/null
638	 * (which really, the server doesn't care about).  So
639	 * We provide simple access checking for them, looking
640	 * mainly at mode bits, and we make sure to ignore read-only
641	 * filesystem checks
642	 */
643    {	NFS3_ACCESS_READ,	NFSD_MAY_READ			},
644    {	NFS3_ACCESS_EXECUTE,	NFSD_MAY_EXEC			},
645    {	NFS3_ACCESS_MODIFY,	NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS	},
646    {	NFS3_ACCESS_EXTEND,	NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS	},
647
648    {	0,			0				}
649};
650
651__be32
652nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported)
653{
654	struct accessmap	*map;
655	struct svc_export	*export;
656	struct dentry		*dentry;
657	u32			query, result = 0, sresult = 0;
658	__be32			error;
659
660	error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
661	if (error)
662		goto out;
663
664	export = fhp->fh_export;
665	dentry = fhp->fh_dentry;
666
667	if (S_ISREG(dentry->d_inode->i_mode))
668		map = nfs3_regaccess;
669	else if (S_ISDIR(dentry->d_inode->i_mode))
670		map = nfs3_diraccess;
671	else
672		map = nfs3_anyaccess;
673
674
675	query = *access;
676	for  (; map->access; map++) {
677		if (map->access & query) {
678			__be32 err2;
679
680			sresult |= map->access;
681
682			err2 = nfsd_permission(rqstp, export, dentry, map->how);
683			switch (err2) {
684			case nfs_ok:
685				result |= map->access;
686				break;
687
688			/* the following error codes just mean the access was not allowed,
689			 * rather than an error occurred */
690			case nfserr_rofs:
691			case nfserr_acces:
692			case nfserr_perm:
693				/* simply don't "or" in the access bit. */
694				break;
695			default:
696				error = err2;
697				goto out;
698			}
699		}
700	}
701	*access = result;
702	if (supported)
703		*supported = sresult;
704
705 out:
706	return error;
707}
708#endif /* CONFIG_NFSD_V3 */
709
710
711
712/*
713 * Open an existing file or directory.
714 * The access argument indicates the type of open (read/write/lock)
715 * N.B. After this call fhp needs an fh_put
716 */
717__be32
718nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
719			int access, struct file **filp)
720{
721	struct dentry	*dentry;
722	struct inode	*inode;
723	int		flags = O_RDONLY|O_LARGEFILE;
724	__be32		err;
725	int		host_err = 0;
726
727	validate_process_creds();
728
729	/*
730	 * If we get here, then the client has already done an "open",
731	 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
732	 * in case a chmod has now revoked permission.
733	 */
734	err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE);
735	if (err)
736		goto out;
737
738	dentry = fhp->fh_dentry;
739	inode = dentry->d_inode;
740
741	/* Disallow write access to files with the append-only bit set
742	 * or any access when mandatory locking enabled
743	 */
744	err = nfserr_perm;
745	if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE))
746		goto out;
747	/*
748	 * We must ignore files (but only files) which might have mandatory
749	 * locks on them because there is no way to know if the accesser has
750	 * the lock.
751	 */
752	if (S_ISREG((inode)->i_mode) && mandatory_lock(inode))
753		goto out;
754
755	if (!inode->i_fop)
756		goto out;
757
758	/*
759	 * Check to see if there are any leases on this file.
760	 * This may block while leases are broken.
761	 */
762	if (!(access & NFSD_MAY_NOT_BREAK_LEASE))
763		host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
764	if (host_err == -EWOULDBLOCK)
765		host_err = -ETIMEDOUT;
766	if (host_err) /* NOMEM or WOULDBLOCK */
767		goto out_nfserr;
768
769	if (access & NFSD_MAY_WRITE) {
770		if (access & NFSD_MAY_READ)
771			flags = O_RDWR|O_LARGEFILE;
772		else
773			flags = O_WRONLY|O_LARGEFILE;
774	}
775	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
776			    flags, current_cred());
777	if (IS_ERR(*filp))
778		host_err = PTR_ERR(*filp);
779	else
780		host_err = ima_file_check(*filp, access);
781out_nfserr:
782	err = nfserrno(host_err);
783out:
784	validate_process_creds();
785	return err;
786}
787
788/*
789 * Close a file.
790 */
791void
792nfsd_close(struct file *filp)
793{
794	fput(filp);
795}
796
797/*
798 * Obtain the readahead parameters for the file
799 * specified by (dev, ino).
800 */
801
802static inline struct raparms *
803nfsd_get_raparms(dev_t dev, ino_t ino)
804{
805	struct raparms	*ra, **rap, **frap = NULL;
806	int depth = 0;
807	unsigned int hash;
808	struct raparm_hbucket *rab;
809
810	hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
811	rab = &raparm_hash[hash];
812
813	spin_lock(&rab->pb_lock);
814	for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
815		if (ra->p_ino == ino && ra->p_dev == dev)
816			goto found;
817		depth++;
818		if (ra->p_count == 0)
819			frap = rap;
820	}
821	depth = nfsdstats.ra_size*11/10;
822	if (!frap) {
823		spin_unlock(&rab->pb_lock);
824		return NULL;
825	}
826	rap = frap;
827	ra = *frap;
828	ra->p_dev = dev;
829	ra->p_ino = ino;
830	ra->p_set = 0;
831	ra->p_hindex = hash;
832found:
833	if (rap != &rab->pb_head) {
834		*rap = ra->p_next;
835		ra->p_next   = rab->pb_head;
836		rab->pb_head = ra;
837	}
838	ra->p_count++;
839	nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
840	spin_unlock(&rab->pb_lock);
841	return ra;
842}
843
844/*
845 * Grab and keep cached pages associated with a file in the svc_rqst
846 * so that they can be passed to the network sendmsg/sendpage routines
847 * directly. They will be released after the sending has completed.
848 */
849static int
850nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
851		  struct splice_desc *sd)
852{
853	struct svc_rqst *rqstp = sd->u.data;
854	struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
855	struct page *page = buf->page;
856	size_t size;
857	int ret;
858
859	ret = buf->ops->confirm(pipe, buf);
860	if (unlikely(ret))
861		return ret;
862
863	size = sd->len;
864
865	if (rqstp->rq_res.page_len == 0) {
866		get_page(page);
867		put_page(*pp);
868		*pp = page;
869		rqstp->rq_resused++;
870		rqstp->rq_res.page_base = buf->offset;
871		rqstp->rq_res.page_len = size;
872	} else if (page != pp[-1]) {
873		get_page(page);
874		if (*pp)
875			put_page(*pp);
876		*pp = page;
877		rqstp->rq_resused++;
878		rqstp->rq_res.page_len += size;
879	} else
880		rqstp->rq_res.page_len += size;
881
882	return size;
883}
884
885static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
886				    struct splice_desc *sd)
887{
888	return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
889}
890
891static inline int svc_msnfs(struct svc_fh *ffhp)
892{
893#ifdef MSNFS
894	return (ffhp->fh_export->ex_flags & NFSEXP_MSNFS);
895#else
896	return 0;
897#endif
898}
899
900static __be32
901nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
902              loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
903{
904	struct inode *inode;
905	mm_segment_t	oldfs;
906	__be32		err;
907	int		host_err;
908
909	err = nfserr_perm;
910	inode = file->f_path.dentry->d_inode;
911
912	if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count))
913		goto out;
914
915	if (file->f_op->splice_read && rqstp->rq_splice_ok) {
916		struct splice_desc sd = {
917			.len		= 0,
918			.total_len	= *count,
919			.pos		= offset,
920			.u.data		= rqstp,
921		};
922
923		rqstp->rq_resused = 1;
924		host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
925	} else {
926		oldfs = get_fs();
927		set_fs(KERNEL_DS);
928		host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
929		set_fs(oldfs);
930	}
931
932	if (host_err >= 0) {
933		nfsdstats.io_read += host_err;
934		*count = host_err;
935		err = 0;
936		fsnotify_access(file);
937	} else
938		err = nfserrno(host_err);
939out:
940	return err;
941}
942
943static void kill_suid(struct dentry *dentry)
944{
945	struct iattr	ia;
946	ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
947
948	mutex_lock(&dentry->d_inode->i_mutex);
949	notify_change(dentry, &ia);
950	mutex_unlock(&dentry->d_inode->i_mutex);
951}
952
953/*
954 * Gathered writes: If another process is currently writing to the file,
955 * there's a high chance this is another nfsd (triggered by a bulk write
956 * from a client's biod). Rather than syncing the file with each write
957 * request, we sleep for 10 msec.
958 *
959 * I don't know if this roughly approximates C. Juszak's idea of
960 * gathered writes, but it's a nice and simple solution (IMHO), and it
961 * seems to work:-)
962 *
963 * Note: we do this only in the NFSv2 case, since v3 and higher have a
964 * better tool (separate unstable writes and commits) for solving this
965 * problem.
966 */
967static int wait_for_concurrent_writes(struct file *file)
968{
969	struct inode *inode = file->f_path.dentry->d_inode;
970	static ino_t last_ino;
971	static dev_t last_dev;
972	int err = 0;
973
974	if (atomic_read(&inode->i_writecount) > 1
975	    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
976		dprintk("nfsd: write defer %d\n", task_pid_nr(current));
977		msleep(10);
978		dprintk("nfsd: write resume %d\n", task_pid_nr(current));
979	}
980
981	if (inode->i_state & I_DIRTY) {
982		dprintk("nfsd: write sync %d\n", task_pid_nr(current));
983		err = vfs_fsync(file, 0);
984	}
985	last_ino = inode->i_ino;
986	last_dev = inode->i_sb->s_dev;
987	return err;
988}
989
990static __be32
991nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
992				loff_t offset, struct kvec *vec, int vlen,
993				unsigned long *cnt, int *stablep)
994{
995	struct svc_export	*exp;
996	struct dentry		*dentry;
997	struct inode		*inode;
998	mm_segment_t		oldfs;
999	__be32			err = 0;
1000	int			host_err;
1001	int			stable = *stablep;
1002	int			use_wgather;
1003
1004#ifdef MSNFS
1005	err = nfserr_perm;
1006
1007	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
1008		(!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt)))
1009		goto out;
1010#endif
1011
1012	dentry = file->f_path.dentry;
1013	inode = dentry->d_inode;
1014	exp   = fhp->fh_export;
1015
1016	/*
1017	 * Request sync writes if
1018	 *  -	the sync export option has been set, or
1019	 *  -	the client requested O_SYNC behavior (NFSv3 feature).
1020	 *  -   The file system doesn't support fsync().
1021	 * When NFSv2 gathered writes have been configured for this volume,
1022	 * flushing the data to disk is handled separately below.
1023	 */
1024	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
1025
1026	if (!file->f_op->fsync) {/* COMMIT3 cannot work */
1027	       stable = 2;
1028	       *stablep = 2; /* FILE_SYNC */
1029	}
1030
1031	if (!EX_ISSYNC(exp))
1032		stable = 0;
1033	if (stable && !use_wgather) {
1034		spin_lock(&file->f_lock);
1035		file->f_flags |= O_SYNC;
1036		spin_unlock(&file->f_lock);
1037	}
1038
1039	/* Write the data. */
1040	oldfs = get_fs(); set_fs(KERNEL_DS);
1041	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
1042	set_fs(oldfs);
1043	if (host_err < 0)
1044		goto out_nfserr;
1045	*cnt = host_err;
1046	nfsdstats.io_write += host_err;
1047	fsnotify_modify(file);
1048
1049	/* clear setuid/setgid flag after write */
1050	if (inode->i_mode & (S_ISUID | S_ISGID))
1051		kill_suid(dentry);
1052
1053	if (stable && use_wgather)
1054		host_err = wait_for_concurrent_writes(file);
1055
1056out_nfserr:
1057	dprintk("nfsd: write complete host_err=%d\n", host_err);
1058	if (host_err >= 0)
1059		err = 0;
1060	else
1061		err = nfserrno(host_err);
1062out:
1063	return err;
1064}
1065
1066/*
1067 * Read data from a file. count must contain the requested read count
1068 * on entry. On return, *count contains the number of bytes actually read.
1069 * N.B. After this call fhp needs an fh_put
1070 */
1071__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
1072	loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
1073{
1074	struct file *file;
1075	struct inode *inode;
1076	struct raparms	*ra;
1077	__be32 err;
1078
1079	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
1080	if (err)
1081		return err;
1082
1083	inode = file->f_path.dentry->d_inode;
1084
1085	/* Get readahead parameters */
1086	ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
1087
1088	if (ra && ra->p_set)
1089		file->f_ra = ra->p_ra;
1090
1091	err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
1092
1093	/* Write back readahead params */
1094	if (ra) {
1095		struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
1096		spin_lock(&rab->pb_lock);
1097		ra->p_ra = file->f_ra;
1098		ra->p_set = 1;
1099		ra->p_count--;
1100		spin_unlock(&rab->pb_lock);
1101	}
1102
1103	nfsd_close(file);
1104	return err;
1105}
1106
1107/* As above, but use the provided file descriptor. */
1108__be32
1109nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1110		loff_t offset, struct kvec *vec, int vlen,
1111		unsigned long *count)
1112{
1113	__be32		err;
1114
1115	if (file) {
1116		err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
1117				NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
1118		if (err)
1119			goto out;
1120		err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
1121	} else /* Note file may still be NULL in NFSv4 special stateid case: */
1122		err = nfsd_read(rqstp, fhp, offset, vec, vlen, count);
1123out:
1124	return err;
1125}
1126
1127/*
1128 * Write data to a file.
1129 * The stable flag requests synchronous writes.
1130 * N.B. After this call fhp needs an fh_put
1131 */
1132__be32
1133nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1134		loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
1135		int *stablep)
1136{
1137	__be32			err = 0;
1138
1139	if (file) {
1140		err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
1141				NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
1142		if (err)
1143			goto out;
1144		err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
1145				stablep);
1146	} else {
1147		err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
1148		if (err)
1149			goto out;
1150
1151		if (cnt)
1152			err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
1153					     cnt, stablep);
1154		nfsd_close(file);
1155	}
1156out:
1157	return err;
1158}
1159
1160#ifdef CONFIG_NFSD_V3
1161/*
1162 * Commit all pending writes to stable storage.
1163 *
1164 * Note: we only guarantee that data that lies within the range specified
1165 * by the 'offset' and 'count' parameters will be synced.
1166 *
1167 * Unfortunately we cannot lock the file to make sure we return full WCC
1168 * data to the client, as locking happens lower down in the filesystem.
1169 */
1170__be32
1171nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
1172               loff_t offset, unsigned long count)
1173{
1174	struct file	*file;
1175	loff_t		end = LLONG_MAX;
1176	__be32		err = nfserr_inval;
1177
1178	if (offset < 0)
1179		goto out;
1180	if (count != 0) {
1181		end = offset + (loff_t)count - 1;
1182		if (end < offset)
1183			goto out;
1184	}
1185
1186	err = nfsd_open(rqstp, fhp, S_IFREG,
1187			NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
1188	if (err)
1189		goto out;
1190	if (EX_ISSYNC(fhp->fh_export)) {
1191		int err2 = vfs_fsync_range(file, offset, end, 0);
1192
1193		if (err2 != -EINVAL)
1194			err = nfserrno(err2);
1195		else
1196			err = nfserr_notsupp;
1197	}
1198
1199	nfsd_close(file);
1200out:
1201	return err;
1202}
1203#endif /* CONFIG_NFSD_V3 */
1204
1205static __be32
1206nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
1207			struct iattr *iap)
1208{
1209	/*
1210	 * Mode has already been set earlier in create:
1211	 */
1212	iap->ia_valid &= ~ATTR_MODE;
1213	/*
1214	 * Setting uid/gid works only for root.  Irix appears to
1215	 * send along the gid on create when it tries to implement
1216	 * setgid directories via NFS:
1217	 */
1218	if (current_fsuid() != 0)
1219		iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
1220	if (iap->ia_valid)
1221		return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
1222	return 0;
1223}
1224
1225/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
1226 * setting size to 0 may fail for some specific file systems by the permission
1227 * checking which requires WRITE permission but the mode is 000.
1228 * we ignore the resizing(to 0) on the just new created file, since the size is
1229 * 0 after file created.
1230 *
1231 * call this only after vfs_create() is called.
1232 * */
1233static void
1234nfsd_check_ignore_resizing(struct iattr *iap)
1235{
1236	if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
1237		iap->ia_valid &= ~ATTR_SIZE;
1238}
1239
1240/*
1241 * Create a file (regular, directory, device, fifo); UNIX sockets
1242 * not yet implemented.
1243 * If the response fh has been verified, the parent directory should
1244 * already be locked. Note that the parent directory is left locked.
1245 *
1246 * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
1247 */
1248__be32
1249nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1250		char *fname, int flen, struct iattr *iap,
1251		int type, dev_t rdev, struct svc_fh *resfhp)
1252{
1253	struct dentry	*dentry, *dchild = NULL;
1254	struct inode	*dirp;
1255	__be32		err;
1256	__be32		err2;
1257	int		host_err;
1258
1259	err = nfserr_perm;
1260	if (!flen)
1261		goto out;
1262	err = nfserr_exist;
1263	if (isdotent(fname, flen))
1264		goto out;
1265
1266	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1267	if (err)
1268		goto out;
1269
1270	dentry = fhp->fh_dentry;
1271	dirp = dentry->d_inode;
1272
1273	err = nfserr_notdir;
1274	if (!dirp->i_op->lookup)
1275		goto out;
1276	/*
1277	 * Check whether the response file handle has been verified yet.
1278	 * If it has, the parent directory should already be locked.
1279	 */
1280	if (!resfhp->fh_dentry) {
1281		/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
1282		fh_lock_nested(fhp, I_MUTEX_PARENT);
1283		dchild = lookup_one_len(fname, dentry, flen);
1284		host_err = PTR_ERR(dchild);
1285		if (IS_ERR(dchild))
1286			goto out_nfserr;
1287		err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
1288		if (err)
1289			goto out;
1290	} else {
1291		/* called from nfsd_proc_create */
1292		dchild = dget(resfhp->fh_dentry);
1293		if (!fhp->fh_locked) {
1294			/* not actually possible */
1295			printk(KERN_ERR
1296				"nfsd_create: parent %s/%s not locked!\n",
1297				dentry->d_parent->d_name.name,
1298				dentry->d_name.name);
1299			err = nfserr_io;
1300			goto out;
1301		}
1302	}
1303	/*
1304	 * Make sure the child dentry is still negative ...
1305	 */
1306	err = nfserr_exist;
1307	if (dchild->d_inode) {
1308		dprintk("nfsd_create: dentry %s/%s not negative!\n",
1309			dentry->d_name.name, dchild->d_name.name);
1310		goto out;
1311	}
1312
1313	if (!(iap->ia_valid & ATTR_MODE))
1314		iap->ia_mode = 0;
1315	iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
1316
1317	err = nfserr_inval;
1318	if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) {
1319		printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
1320		       type);
1321		goto out;
1322	}
1323
1324	host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1325	if (host_err)
1326		goto out_nfserr;
1327
1328	/*
1329	 * Get the dir op function pointer.
1330	 */
1331	err = 0;
1332	switch (type) {
1333	case S_IFREG:
1334		host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1335		if (!host_err)
1336			nfsd_check_ignore_resizing(iap);
1337		break;
1338	case S_IFDIR:
1339		host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
1340		break;
1341	case S_IFCHR:
1342	case S_IFBLK:
1343	case S_IFIFO:
1344	case S_IFSOCK:
1345		host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
1346		break;
1347	}
1348	if (host_err < 0) {
1349		mnt_drop_write(fhp->fh_export->ex_path.mnt);
1350		goto out_nfserr;
1351	}
1352
1353	err = nfsd_create_setattr(rqstp, resfhp, iap);
1354
1355	/*
1356	 * nfsd_setattr already committed the child.  Transactional filesystems
1357	 * had a chance to commit changes for both parent and child
1358	 * simultaneously making the following commit_metadata a noop.
1359	 */
1360	err2 = nfserrno(commit_metadata(fhp));
1361	if (err2)
1362		err = err2;
1363	mnt_drop_write(fhp->fh_export->ex_path.mnt);
1364	/*
1365	 * Update the file handle to get the new inode info.
1366	 */
1367	if (!err)
1368		err = fh_update(resfhp);
1369out:
1370	if (dchild && !IS_ERR(dchild))
1371		dput(dchild);
1372	return err;
1373
1374out_nfserr:
1375	err = nfserrno(host_err);
1376	goto out;
1377}
1378
1379#ifdef CONFIG_NFSD_V3
1380/*
1381 * NFSv3 version of nfsd_create
1382 */
1383__be32
1384nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1385		char *fname, int flen, struct iattr *iap,
1386		struct svc_fh *resfhp, int createmode, u32 *verifier,
1387	        int *truncp, int *created)
1388{
1389	struct dentry	*dentry, *dchild = NULL;
1390	struct inode	*dirp;
1391	__be32		err;
1392	int		host_err;
1393	__u32		v_mtime=0, v_atime=0;
1394
1395	err = nfserr_perm;
1396	if (!flen)
1397		goto out;
1398	err = nfserr_exist;
1399	if (isdotent(fname, flen))
1400		goto out;
1401	if (!(iap->ia_valid & ATTR_MODE))
1402		iap->ia_mode = 0;
1403	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1404	if (err)
1405		goto out;
1406
1407	dentry = fhp->fh_dentry;
1408	dirp = dentry->d_inode;
1409
1410	/* Get all the sanity checks out of the way before
1411	 * we lock the parent. */
1412	err = nfserr_notdir;
1413	if (!dirp->i_op->lookup)
1414		goto out;
1415	fh_lock_nested(fhp, I_MUTEX_PARENT);
1416
1417	/*
1418	 * Compose the response file handle.
1419	 */
1420	dchild = lookup_one_len(fname, dentry, flen);
1421	host_err = PTR_ERR(dchild);
1422	if (IS_ERR(dchild))
1423		goto out_nfserr;
1424
1425	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
1426	if (err)
1427		goto out;
1428
1429	if (createmode == NFS3_CREATE_EXCLUSIVE) {
1430		/* solaris7 gets confused (bugid 4218508) if these have
1431		 * the high bit set, so just clear the high bits. If this is
1432		 * ever changed to use different attrs for storing the
1433		 * verifier, then do_open_lookup() will also need to be fixed
1434		 * accordingly.
1435		 */
1436		v_mtime = verifier[0]&0x7fffffff;
1437		v_atime = verifier[1]&0x7fffffff;
1438	}
1439
1440	host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1441	if (host_err)
1442		goto out_nfserr;
1443	if (dchild->d_inode) {
1444		err = 0;
1445
1446		switch (createmode) {
1447		case NFS3_CREATE_UNCHECKED:
1448			if (! S_ISREG(dchild->d_inode->i_mode))
1449				err = nfserr_exist;
1450			else if (truncp) {
1451				/* in nfsv4, we need to treat this case a little
1452				 * differently.  we don't want to truncate the
1453				 * file now; this would be wrong if the OPEN
1454				 * fails for some other reason.  furthermore,
1455				 * if the size is nonzero, we should ignore it
1456				 * according to spec!
1457				 */
1458				*truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
1459			}
1460			else {
1461				iap->ia_valid &= ATTR_SIZE;
1462				goto set_attr;
1463			}
1464			break;
1465		case NFS3_CREATE_EXCLUSIVE:
1466			if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
1467			    && dchild->d_inode->i_atime.tv_sec == v_atime
1468			    && dchild->d_inode->i_size  == 0 )
1469				break;
1470			 /* fallthru */
1471		case NFS3_CREATE_GUARDED:
1472			err = nfserr_exist;
1473		}
1474		mnt_drop_write(fhp->fh_export->ex_path.mnt);
1475		goto out;
1476	}
1477
1478	host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1479	if (host_err < 0) {
1480		mnt_drop_write(fhp->fh_export->ex_path.mnt);
1481		goto out_nfserr;
1482	}
1483	if (created)
1484		*created = 1;
1485
1486	nfsd_check_ignore_resizing(iap);
1487
1488	if (createmode == NFS3_CREATE_EXCLUSIVE) {
1489		/* Cram the verifier into atime/mtime */
1490		iap->ia_valid = ATTR_MTIME|ATTR_ATIME
1491			| ATTR_MTIME_SET|ATTR_ATIME_SET;
1492		iap->ia_mtime.tv_sec = v_mtime;
1493		iap->ia_atime.tv_sec = v_atime;
1494		iap->ia_mtime.tv_nsec = 0;
1495		iap->ia_atime.tv_nsec = 0;
1496	}
1497
1498 set_attr:
1499	err = nfsd_create_setattr(rqstp, resfhp, iap);
1500
1501	/*
1502	 * nfsd_setattr already committed the child (and possibly also the parent).
1503	 */
1504	if (!err)
1505		err = nfserrno(commit_metadata(fhp));
1506
1507	mnt_drop_write(fhp->fh_export->ex_path.mnt);
1508	/*
1509	 * Update the filehandle to get the new inode info.
1510	 */
1511	if (!err)
1512		err = fh_update(resfhp);
1513
1514 out:
1515	fh_unlock(fhp);
1516	if (dchild && !IS_ERR(dchild))
1517		dput(dchild);
1518 	return err;
1519
1520 out_nfserr:
1521	err = nfserrno(host_err);
1522	goto out;
1523}
1524#endif /* CONFIG_NFSD_V3 */
1525
1526/*
1527 * Read a symlink. On entry, *lenp must contain the maximum path length that
1528 * fits into the buffer. On return, it contains the true length.
1529 * N.B. After this call fhp needs an fh_put
1530 */
1531__be32
1532nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
1533{
1534	struct dentry	*dentry;
1535	struct inode	*inode;
1536	mm_segment_t	oldfs;
1537	__be32		err;
1538	int		host_err;
1539
1540	err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
1541	if (err)
1542		goto out;
1543
1544	dentry = fhp->fh_dentry;
1545	inode = dentry->d_inode;
1546
1547	err = nfserr_inval;
1548	if (!inode->i_op->readlink)
1549		goto out;
1550
1551	touch_atime(fhp->fh_export->ex_path.mnt, dentry);
1552	/* N.B. Why does this call need a get_fs()??
1553	 * Remove the set_fs and watch the fireworks:-) --okir
1554	 */
1555
1556	oldfs = get_fs(); set_fs(KERNEL_DS);
1557	host_err = inode->i_op->readlink(dentry, buf, *lenp);
1558	set_fs(oldfs);
1559
1560	if (host_err < 0)
1561		goto out_nfserr;
1562	*lenp = host_err;
1563	err = 0;
1564out:
1565	return err;
1566
1567out_nfserr:
1568	err = nfserrno(host_err);
1569	goto out;
1570}
1571
1572/*
1573 * Create a symlink and look up its inode
1574 * N.B. After this call _both_ fhp and resfhp need an fh_put
1575 */
1576__be32
1577nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1578				char *fname, int flen,
1579				char *path,  int plen,
1580				struct svc_fh *resfhp,
1581				struct iattr *iap)
1582{
1583	struct dentry	*dentry, *dnew;
1584	__be32		err, cerr;
1585	int		host_err;
1586
1587	err = nfserr_noent;
1588	if (!flen || !plen)
1589		goto out;
1590	err = nfserr_exist;
1591	if (isdotent(fname, flen))
1592		goto out;
1593
1594	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1595	if (err)
1596		goto out;
1597	fh_lock(fhp);
1598	dentry = fhp->fh_dentry;
1599	dnew = lookup_one_len(fname, dentry, flen);
1600	host_err = PTR_ERR(dnew);
1601	if (IS_ERR(dnew))
1602		goto out_nfserr;
1603
1604	host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1605	if (host_err)
1606		goto out_nfserr;
1607
1608	if (unlikely(path[plen] != 0)) {
1609		char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
1610		if (path_alloced == NULL)
1611			host_err = -ENOMEM;
1612		else {
1613			strncpy(path_alloced, path, plen);
1614			path_alloced[plen] = 0;
1615			host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced);
1616			kfree(path_alloced);
1617		}
1618	} else
1619		host_err = vfs_symlink(dentry->d_inode, dnew, path);
1620	err = nfserrno(host_err);
1621	if (!err)
1622		err = nfserrno(commit_metadata(fhp));
1623	fh_unlock(fhp);
1624
1625	mnt_drop_write(fhp->fh_export->ex_path.mnt);
1626
1627	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
1628	dput(dnew);
1629	if (err==0) err = cerr;
1630out:
1631	return err;
1632
1633out_nfserr:
1634	err = nfserrno(host_err);
1635	goto out;
1636}
1637
1638/*
1639 * Create a hardlink
1640 * N.B. After this call _both_ ffhp and tfhp need an fh_put
1641 */
1642__be32
1643nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1644				char *name, int len, struct svc_fh *tfhp)
1645{
1646	struct dentry	*ddir, *dnew, *dold;
1647	struct inode	*dirp;
1648	__be32		err;
1649	int		host_err;
1650
1651	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
1652	if (err)
1653		goto out;
1654	err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP);
1655	if (err)
1656		goto out;
1657
1658	err = nfserr_perm;
1659	if (!len)
1660		goto out;
1661	err = nfserr_exist;
1662	if (isdotent(name, len))
1663		goto out;
1664
1665	fh_lock_nested(ffhp, I_MUTEX_PARENT);
1666	ddir = ffhp->fh_dentry;
1667	dirp = ddir->d_inode;
1668
1669	dnew = lookup_one_len(name, ddir, len);
1670	host_err = PTR_ERR(dnew);
1671	if (IS_ERR(dnew))
1672		goto out_nfserr;
1673
1674	dold = tfhp->fh_dentry;
1675
1676	host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt);
1677	if (host_err) {
1678		err = nfserrno(host_err);
1679		goto out_dput;
1680	}
1681	host_err = vfs_link(dold, dirp, dnew);
1682	if (!host_err) {
1683		err = nfserrno(commit_metadata(ffhp));
1684		if (!err)
1685			err = nfserrno(commit_metadata(tfhp));
1686	} else {
1687		if (host_err == -EXDEV && rqstp->rq_vers == 2)
1688			err = nfserr_acces;
1689		else
1690			err = nfserrno(host_err);
1691	}
1692	mnt_drop_write(tfhp->fh_export->ex_path.mnt);
1693out_dput:
1694	dput(dnew);
1695out_unlock:
1696	fh_unlock(ffhp);
1697out:
1698	return err;
1699
1700out_nfserr:
1701	err = nfserrno(host_err);
1702	goto out_unlock;
1703}
1704
1705/*
1706 * Rename a file
1707 * N.B. After this call _both_ ffhp and tfhp need an fh_put
1708 */
1709__be32
1710nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1711			    struct svc_fh *tfhp, char *tname, int tlen)
1712{
1713	struct dentry	*fdentry, *tdentry, *odentry, *ndentry, *trap;
1714	struct inode	*fdir, *tdir;
1715	__be32		err;
1716	int		host_err;
1717
1718	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
1719	if (err)
1720		goto out;
1721	err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE);
1722	if (err)
1723		goto out;
1724
1725	fdentry = ffhp->fh_dentry;
1726	fdir = fdentry->d_inode;
1727
1728	tdentry = tfhp->fh_dentry;
1729	tdir = tdentry->d_inode;
1730
1731	err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
1732	if (ffhp->fh_export != tfhp->fh_export)
1733		goto out;
1734
1735	err = nfserr_perm;
1736	if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
1737		goto out;
1738
1739	/* cannot use fh_lock as we need deadlock protective ordering
1740	 * so do it by hand */
1741	trap = lock_rename(tdentry, fdentry);
1742	ffhp->fh_locked = tfhp->fh_locked = 1;
1743	fill_pre_wcc(ffhp);
1744	fill_pre_wcc(tfhp);
1745
1746	odentry = lookup_one_len(fname, fdentry, flen);
1747	host_err = PTR_ERR(odentry);
1748	if (IS_ERR(odentry))
1749		goto out_nfserr;
1750
1751	host_err = -ENOENT;
1752	if (!odentry->d_inode)
1753		goto out_dput_old;
1754	host_err = -EINVAL;
1755	if (odentry == trap)
1756		goto out_dput_old;
1757
1758	ndentry = lookup_one_len(tname, tdentry, tlen);
1759	host_err = PTR_ERR(ndentry);
1760	if (IS_ERR(ndentry))
1761		goto out_dput_old;
1762	host_err = -ENOTEMPTY;
1763	if (ndentry == trap)
1764		goto out_dput_new;
1765
1766	if (svc_msnfs(ffhp) &&
1767		((atomic_read(&odentry->d_count) > 1)
1768		 || (atomic_read(&ndentry->d_count) > 1))) {
1769			host_err = -EPERM;
1770			goto out_dput_new;
1771	}
1772
1773	host_err = -EXDEV;
1774	if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
1775		goto out_dput_new;
1776	host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt);
1777	if (host_err)
1778		goto out_dput_new;
1779
1780	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1781	if (!host_err) {
1782		host_err = commit_metadata(tfhp);
1783		if (!host_err)
1784			host_err = commit_metadata(ffhp);
1785	}
1786
1787	mnt_drop_write(ffhp->fh_export->ex_path.mnt);
1788
1789 out_dput_new:
1790	dput(ndentry);
1791 out_dput_old:
1792	dput(odentry);
1793 out_nfserr:
1794	err = nfserrno(host_err);
1795
1796	/* we cannot reply on fh_unlock on the two filehandles,
1797	 * as that would do the wrong thing if the two directories
1798	 * were the same, so again we do it by hand
1799	 */
1800	fill_post_wcc(ffhp);
1801	fill_post_wcc(tfhp);
1802	unlock_rename(tdentry, fdentry);
1803	ffhp->fh_locked = tfhp->fh_locked = 0;
1804
1805out:
1806	return err;
1807}
1808
1809/*
1810 * Unlink a file or directory
1811 * N.B. After this call fhp needs an fh_put
1812 */
1813__be32
1814nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1815				char *fname, int flen)
1816{
1817	struct dentry	*dentry, *rdentry;
1818	struct inode	*dirp;
1819	__be32		err;
1820	int		host_err;
1821
1822	err = nfserr_acces;
1823	if (!flen || isdotent(fname, flen))
1824		goto out;
1825	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE);
1826	if (err)
1827		goto out;
1828
1829	fh_lock_nested(fhp, I_MUTEX_PARENT);
1830	dentry = fhp->fh_dentry;
1831	dirp = dentry->d_inode;
1832
1833	rdentry = lookup_one_len(fname, dentry, flen);
1834	host_err = PTR_ERR(rdentry);
1835	if (IS_ERR(rdentry))
1836		goto out_nfserr;
1837
1838	if (!rdentry->d_inode) {
1839		dput(rdentry);
1840		err = nfserr_noent;
1841		goto out;
1842	}
1843
1844	if (!type)
1845		type = rdentry->d_inode->i_mode & S_IFMT;
1846
1847	host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1848	if (host_err)
1849		goto out_nfserr;
1850
1851	if (type != S_IFDIR) { /* It's UNLINK */
1852#ifdef MSNFS
1853		if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
1854			(atomic_read(&rdentry->d_count) > 1)) {
1855			host_err = -EPERM;
1856		} else
1857#endif
1858		host_err = vfs_unlink(dirp, rdentry);
1859	} else { /* It's RMDIR */
1860		host_err = vfs_rmdir(dirp, rdentry);
1861	}
1862
1863	dput(rdentry);
1864
1865	if (!host_err)
1866		host_err = commit_metadata(fhp);
1867
1868	mnt_drop_write(fhp->fh_export->ex_path.mnt);
1869out_nfserr:
1870	err = nfserrno(host_err);
1871out:
1872	return err;
1873}
1874
1875/*
1876 * We do this buffering because we must not call back into the file
1877 * system's ->lookup() method from the filldir callback. That may well
1878 * deadlock a number of file systems.
1879 *
1880 * This is based heavily on the implementation of same in XFS.
1881 */
1882struct buffered_dirent {
1883	u64		ino;
1884	loff_t		offset;
1885	int		namlen;
1886	unsigned int	d_type;
1887	char		name[];
1888};
1889
1890struct readdir_data {
1891	char		*dirent;
1892	size_t		used;
1893	int		full;
1894};
1895
1896static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
1897				 loff_t offset, u64 ino, unsigned int d_type)
1898{
1899	struct readdir_data *buf = __buf;
1900	struct buffered_dirent *de = (void *)(buf->dirent + buf->used);
1901	unsigned int reclen;
1902
1903	reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64));
1904	if (buf->used + reclen > PAGE_SIZE) {
1905		buf->full = 1;
1906		return -EINVAL;
1907	}
1908
1909	de->namlen = namlen;
1910	de->offset = offset;
1911	de->ino = ino;
1912	de->d_type = d_type;
1913	memcpy(de->name, name, namlen);
1914	buf->used += reclen;
1915
1916	return 0;
1917}
1918
1919static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
1920				    struct readdir_cd *cdp, loff_t *offsetp)
1921{
1922	struct readdir_data buf;
1923	struct buffered_dirent *de;
1924	int host_err;
1925	int size;
1926	loff_t offset;
1927
1928	buf.dirent = (void *)__get_free_page(GFP_KERNEL);
1929	if (!buf.dirent)
1930		return nfserrno(-ENOMEM);
1931
1932	offset = *offsetp;
1933
1934	while (1) {
1935		struct inode *dir_inode = file->f_path.dentry->d_inode;
1936		unsigned int reclen;
1937
1938		cdp->err = nfserr_eof; /* will be cleared on successful read */
1939		buf.used = 0;
1940		buf.full = 0;
1941
1942		host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf);
1943		if (buf.full)
1944			host_err = 0;
1945
1946		if (host_err < 0)
1947			break;
1948
1949		size = buf.used;
1950
1951		if (!size)
1952			break;
1953
1954		/*
1955		 * Various filldir functions may end up calling back into
1956		 * lookup_one_len() and the file system's ->lookup() method.
1957		 * These expect i_mutex to be held, as it would within readdir.
1958		 */
1959		host_err = mutex_lock_killable(&dir_inode->i_mutex);
1960		if (host_err)
1961			break;
1962
1963		de = (struct buffered_dirent *)buf.dirent;
1964		while (size > 0) {
1965			offset = de->offset;
1966
1967			if (func(cdp, de->name, de->namlen, de->offset,
1968				 de->ino, de->d_type))
1969				break;
1970
1971			if (cdp->err != nfs_ok)
1972				break;
1973
1974			reclen = ALIGN(sizeof(*de) + de->namlen,
1975				       sizeof(u64));
1976			size -= reclen;
1977			de = (struct buffered_dirent *)((char *)de + reclen);
1978		}
1979		mutex_unlock(&dir_inode->i_mutex);
1980		if (size > 0) /* We bailed out early */
1981			break;
1982
1983		offset = vfs_llseek(file, 0, SEEK_CUR);
1984	}
1985
1986	free_page((unsigned long)(buf.dirent));
1987
1988	if (host_err)
1989		return nfserrno(host_err);
1990
1991	*offsetp = offset;
1992	return cdp->err;
1993}
1994
1995/*
1996 * Read entries from a directory.
1997 * The  NFSv3/4 verifier we ignore for now.
1998 */
1999__be32
2000nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
2001	     struct readdir_cd *cdp, filldir_t func)
2002{
2003	__be32		err;
2004	struct file	*file;
2005	loff_t		offset = *offsetp;
2006
2007	err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file);
2008	if (err)
2009		goto out;
2010
2011	offset = vfs_llseek(file, offset, 0);
2012	if (offset < 0) {
2013		err = nfserrno((int)offset);
2014		goto out_close;
2015	}
2016
2017	err = nfsd_buffered_readdir(file, func, cdp, offsetp);
2018
2019	if (err == nfserr_eof || err == nfserr_toosmall)
2020		err = nfs_ok; /* can still be found in ->err */
2021out_close:
2022	nfsd_close(file);
2023out:
2024	return err;
2025}
2026
2027/*
2028 * Get file system stats
2029 * N.B. After this call fhp needs an fh_put
2030 */
2031__be32
2032nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access)
2033{
2034	__be32 err;
2035
2036	err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
2037	if (!err) {
2038		struct path path = {
2039			.mnt	= fhp->fh_export->ex_path.mnt,
2040			.dentry	= fhp->fh_dentry,
2041		};
2042		if (vfs_statfs(&path, stat))
2043			err = nfserr_io;
2044	}
2045	return err;
2046}
2047
2048static int exp_rdonly(struct svc_rqst *rqstp, struct svc_export *exp)
2049{
2050	return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY;
2051}
2052
2053/*
2054 * Check for a user's access permissions to this inode.
2055 */
2056__be32
2057nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2058					struct dentry *dentry, int acc)
2059{
2060	struct inode	*inode = dentry->d_inode;
2061	int		err;
2062
2063	if (acc == NFSD_MAY_NOP)
2064		return 0;
2065
2066	/* Normally we reject any write/sattr etc access on a read-only file
2067	 * system.  But if it is IRIX doing check on write-access for a
2068	 * device special file, we ignore rofs.
2069	 */
2070	if (!(acc & NFSD_MAY_LOCAL_ACCESS))
2071		if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) {
2072			if (exp_rdonly(rqstp, exp) ||
2073			    __mnt_is_readonly(exp->ex_path.mnt))
2074				return nfserr_rofs;
2075			if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode))
2076				return nfserr_perm;
2077		}
2078	if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
2079		return nfserr_perm;
2080
2081	if (acc & NFSD_MAY_LOCK) {
2082		/* If we cannot rely on authentication in NLM requests,
2083		 * just allow locks, otherwise require read permission, or
2084		 * ownership
2085		 */
2086		if (exp->ex_flags & NFSEXP_NOAUTHNLM)
2087			return 0;
2088		else
2089			acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE;
2090	}
2091	/*
2092	 * The file owner always gets access permission for accesses that
2093	 * would normally be checked at open time. This is to make
2094	 * file access work even when the client has done a fchmod(fd, 0).
2095	 *
2096	 * However, `cp foo bar' should fail nevertheless when bar is
2097	 * readonly. A sensible way to do this might be to reject all
2098	 * attempts to truncate a read-only file, because a creat() call
2099	 * always implies file truncation.
2100	 * ... but this isn't really fair.  A process may reasonably call
2101	 * ftruncate on an open file descriptor on a file with perm 000.
2102	 * We must trust the client to do permission checking - using "ACCESS"
2103	 * with NFSv3.
2104	 */
2105	if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
2106	    inode->i_uid == current_fsuid())
2107		return 0;
2108
2109	/* This assumes  NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
2110	err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC));
2111
2112	/* Allow read access to binaries even when mode 111 */
2113	if (err == -EACCES && S_ISREG(inode->i_mode) &&
2114	    acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
2115		err = inode_permission(inode, MAY_EXEC);
2116
2117	return err? nfserrno(err) : 0;
2118}
2119
2120void
2121nfsd_racache_shutdown(void)
2122{
2123	struct raparms *raparm, *last_raparm;
2124	unsigned int i;
2125
2126	dprintk("nfsd: freeing readahead buffers.\n");
2127
2128	for (i = 0; i < RAPARM_HASH_SIZE; i++) {
2129		raparm = raparm_hash[i].pb_head;
2130		while(raparm) {
2131			last_raparm = raparm;
2132			raparm = raparm->p_next;
2133			kfree(last_raparm);
2134		}
2135		raparm_hash[i].pb_head = NULL;
2136	}
2137}
2138/*
2139 * Initialize readahead param cache
2140 */
2141int
2142nfsd_racache_init(int cache_size)
2143{
2144	int	i;
2145	int	j = 0;
2146	int	nperbucket;
2147	struct raparms **raparm = NULL;
2148
2149
2150	if (raparm_hash[0].pb_head)
2151		return 0;
2152	nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
2153	if (nperbucket < 2)
2154		nperbucket = 2;
2155	cache_size = nperbucket * RAPARM_HASH_SIZE;
2156
2157	dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
2158
2159	for (i = 0; i < RAPARM_HASH_SIZE; i++) {
2160		spin_lock_init(&raparm_hash[i].pb_lock);
2161
2162		raparm = &raparm_hash[i].pb_head;
2163		for (j = 0; j < nperbucket; j++) {
2164			*raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
2165			if (!*raparm)
2166				goto out_nomem;
2167			raparm = &(*raparm)->p_next;
2168		}
2169		*raparm = NULL;
2170	}
2171
2172	nfsdstats.ra_size = cache_size;
2173	return 0;
2174
2175out_nomem:
2176	dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
2177	nfsd_racache_shutdown();
2178	return -ENOMEM;
2179}
2180
2181#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
2182struct posix_acl *
2183nfsd_get_posix_acl(struct svc_fh *fhp, int type)
2184{
2185	struct inode *inode = fhp->fh_dentry->d_inode;
2186	char *name;
2187	void *value = NULL;
2188	ssize_t size;
2189	struct posix_acl *acl;
2190
2191	if (!IS_POSIXACL(inode))
2192		return ERR_PTR(-EOPNOTSUPP);
2193
2194	switch (type) {
2195	case ACL_TYPE_ACCESS:
2196		name = POSIX_ACL_XATTR_ACCESS;
2197		break;
2198	case ACL_TYPE_DEFAULT:
2199		name = POSIX_ACL_XATTR_DEFAULT;
2200		break;
2201	default:
2202		return ERR_PTR(-EOPNOTSUPP);
2203	}
2204
2205	size = nfsd_getxattr(fhp->fh_dentry, name, &value);
2206	if (size < 0)
2207		return ERR_PTR(size);
2208
2209	acl = posix_acl_from_xattr(value, size);
2210	kfree(value);
2211	return acl;
2212}
2213
2214int
2215nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
2216{
2217	struct inode *inode = fhp->fh_dentry->d_inode;
2218	char *name;
2219	void *value = NULL;
2220	size_t size;
2221	int error;
2222
2223	if (!IS_POSIXACL(inode) ||
2224	    !inode->i_op->setxattr || !inode->i_op->removexattr)
2225		return -EOPNOTSUPP;
2226	switch(type) {
2227		case ACL_TYPE_ACCESS:
2228			name = POSIX_ACL_XATTR_ACCESS;
2229			break;
2230		case ACL_TYPE_DEFAULT:
2231			name = POSIX_ACL_XATTR_DEFAULT;
2232			break;
2233		default:
2234			return -EOPNOTSUPP;
2235	}
2236
2237	if (acl && acl->a_count) {
2238		size = posix_acl_xattr_size(acl->a_count);
2239		value = kmalloc(size, GFP_KERNEL);
2240		if (!value)
2241			return -ENOMEM;
2242		error = posix_acl_to_xattr(acl, value, size);
2243		if (error < 0)
2244			goto getout;
2245		size = error;
2246	} else
2247		size = 0;
2248
2249	error = mnt_want_write(fhp->fh_export->ex_path.mnt);
2250	if (error)
2251		goto getout;
2252	if (size)
2253		error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
2254	else {
2255		if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT)
2256			error = 0;
2257		else {
2258			error = vfs_removexattr(fhp->fh_dentry, name);
2259			if (error == -ENODATA)
2260				error = 0;
2261		}
2262	}
2263	mnt_drop_write(fhp->fh_export->ex_path.mnt);
2264
2265getout:
2266	kfree(value);
2267	return error;
2268}
2269#endif  /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
2270