1// SPDX-License-Identifier: GPL-2.0
2#include <linux/ceph/ceph_debug.h>
3
4#include <linux/exportfs.h>
5#include <linux/slab.h>
6#include <asm/unaligned.h>
7
8#include "super.h"
9#include "mds_client.h"
10#include "crypto.h"
11
12/*
13 * Basic fh
14 */
15struct ceph_nfs_fh {
16	u64 ino;
17} __attribute__ ((packed));
18
19/*
20 * Larger fh that includes parent ino.
21 */
22struct ceph_nfs_confh {
23	u64 ino, parent_ino;
24} __attribute__ ((packed));
25
26/*
27 * fh for snapped inode
28 */
29struct ceph_nfs_snapfh {
30	u64 ino;
31	u64 snapid;
32	u64 parent_ino;
33	u32 hash;
34} __attribute__ ((packed));
35
36static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
37			      struct inode *parent_inode)
38{
39	struct ceph_client *cl = ceph_inode_to_client(inode);
40	static const int snap_handle_length =
41		sizeof(struct ceph_nfs_snapfh) >> 2;
42	struct ceph_nfs_snapfh *sfh = (void *)rawfh;
43	u64 snapid = ceph_snap(inode);
44	int ret;
45	bool no_parent = true;
46
47	if (*max_len < snap_handle_length) {
48		*max_len = snap_handle_length;
49		ret = FILEID_INVALID;
50		goto out;
51	}
52
53	ret =  -EINVAL;
54	if (snapid != CEPH_SNAPDIR) {
55		struct inode *dir;
56		struct dentry *dentry = d_find_alias(inode);
57		if (!dentry)
58			goto out;
59
60		rcu_read_lock();
61		dir = d_inode_rcu(dentry->d_parent);
62		if (ceph_snap(dir) != CEPH_SNAPDIR) {
63			sfh->parent_ino = ceph_ino(dir);
64			sfh->hash = ceph_dentry_hash(dir, dentry);
65			no_parent = false;
66		}
67		rcu_read_unlock();
68		dput(dentry);
69	}
70
71	if (no_parent) {
72		if (!S_ISDIR(inode->i_mode))
73			goto out;
74		sfh->parent_ino = sfh->ino;
75		sfh->hash = 0;
76	}
77	sfh->ino = ceph_ino(inode);
78	sfh->snapid = snapid;
79
80	*max_len = snap_handle_length;
81	ret = FILEID_BTRFS_WITH_PARENT;
82out:
83	doutc(cl, "%p %llx.%llx ret=%d\n", inode, ceph_vinop(inode), ret);
84	return ret;
85}
86
87static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
88			  struct inode *parent_inode)
89{
90	struct ceph_client *cl = ceph_inode_to_client(inode);
91	static const int handle_length =
92		sizeof(struct ceph_nfs_fh) >> 2;
93	static const int connected_handle_length =
94		sizeof(struct ceph_nfs_confh) >> 2;
95	int type;
96
97	if (ceph_snap(inode) != CEPH_NOSNAP)
98		return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode);
99
100	if (parent_inode && (*max_len < connected_handle_length)) {
101		*max_len = connected_handle_length;
102		return FILEID_INVALID;
103	} else if (*max_len < handle_length) {
104		*max_len = handle_length;
105		return FILEID_INVALID;
106	}
107
108	if (parent_inode) {
109		struct ceph_nfs_confh *cfh = (void *)rawfh;
110		doutc(cl, "%p %llx.%llx with parent %p %llx.%llx\n", inode,
111		      ceph_vinop(inode), parent_inode, ceph_vinop(parent_inode));
112		cfh->ino = ceph_ino(inode);
113		cfh->parent_ino = ceph_ino(parent_inode);
114		*max_len = connected_handle_length;
115		type = FILEID_INO32_GEN_PARENT;
116	} else {
117		struct ceph_nfs_fh *fh = (void *)rawfh;
118		doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode));
119		fh->ino = ceph_ino(inode);
120		*max_len = handle_length;
121		type = FILEID_INO32_GEN;
122	}
123	return type;
124}
125
126static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
127{
128	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc;
129	struct inode *inode;
130	struct ceph_vino vino;
131	int err;
132
133	vino.ino = ino;
134	vino.snap = CEPH_NOSNAP;
135
136	if (ceph_vino_is_reserved(vino))
137		return ERR_PTR(-ESTALE);
138
139	inode = ceph_find_inode(sb, vino);
140	if (!inode) {
141		struct ceph_mds_request *req;
142		int mask;
143
144		req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
145					       USE_ANY_MDS);
146		if (IS_ERR(req))
147			return ERR_CAST(req);
148
149		mask = CEPH_STAT_CAP_INODE;
150		if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
151			mask |= CEPH_CAP_XATTR_SHARED;
152		req->r_args.lookupino.mask = cpu_to_le32(mask);
153
154		req->r_ino1 = vino;
155		req->r_num_caps = 1;
156		err = ceph_mdsc_do_request(mdsc, NULL, req);
157		inode = req->r_target_inode;
158		if (inode)
159			ihold(inode);
160		ceph_mdsc_put_request(req);
161		if (!inode)
162			return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
163	} else {
164		if (ceph_inode_is_shutdown(inode)) {
165			iput(inode);
166			return ERR_PTR(-ESTALE);
167		}
168	}
169	return inode;
170}
171
172struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
173{
174	struct inode *inode = __lookup_inode(sb, ino);
175	if (IS_ERR(inode))
176		return inode;
177	if (inode->i_nlink == 0) {
178		iput(inode);
179		return ERR_PTR(-ESTALE);
180	}
181	return inode;
182}
183
184static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
185{
186	struct inode *inode = __lookup_inode(sb, ino);
187	struct ceph_inode_info *ci = ceph_inode(inode);
188	int err;
189
190	if (IS_ERR(inode))
191		return ERR_CAST(inode);
192	/* We need LINK caps to reliably check i_nlink */
193	err = ceph_do_getattr(inode, CEPH_CAP_LINK_SHARED, false);
194	if (err) {
195		iput(inode);
196		return ERR_PTR(err);
197	}
198	/* -ESTALE if inode as been unlinked and no file is open */
199	if ((inode->i_nlink == 0) && !__ceph_is_file_opened(ci)) {
200		iput(inode);
201		return ERR_PTR(-ESTALE);
202	}
203	return d_obtain_alias(inode);
204}
205
206static struct dentry *__snapfh_to_dentry(struct super_block *sb,
207					  struct ceph_nfs_snapfh *sfh,
208					  bool want_parent)
209{
210	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc;
211	struct ceph_client *cl = mdsc->fsc->client;
212	struct ceph_mds_request *req;
213	struct inode *inode;
214	struct ceph_vino vino;
215	int mask;
216	int err;
217	bool unlinked = false;
218
219	if (want_parent) {
220		vino.ino = sfh->parent_ino;
221		if (sfh->snapid == CEPH_SNAPDIR)
222			vino.snap = CEPH_NOSNAP;
223		else if (sfh->ino == sfh->parent_ino)
224			vino.snap = CEPH_SNAPDIR;
225		else
226			vino.snap = sfh->snapid;
227	} else {
228		vino.ino = sfh->ino;
229		vino.snap = sfh->snapid;
230	}
231
232	if (ceph_vino_is_reserved(vino))
233		return ERR_PTR(-ESTALE);
234
235	inode = ceph_find_inode(sb, vino);
236	if (inode) {
237		if (ceph_inode_is_shutdown(inode)) {
238			iput(inode);
239			return ERR_PTR(-ESTALE);
240		}
241		return d_obtain_alias(inode);
242	}
243
244	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
245				       USE_ANY_MDS);
246	if (IS_ERR(req))
247		return ERR_CAST(req);
248
249	mask = CEPH_STAT_CAP_INODE;
250	if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
251		mask |= CEPH_CAP_XATTR_SHARED;
252	req->r_args.lookupino.mask = cpu_to_le32(mask);
253	if (vino.snap < CEPH_NOSNAP) {
254		req->r_args.lookupino.snapid = cpu_to_le64(vino.snap);
255		if (!want_parent && sfh->ino != sfh->parent_ino) {
256			req->r_args.lookupino.parent =
257					cpu_to_le64(sfh->parent_ino);
258			req->r_args.lookupino.hash =
259					cpu_to_le32(sfh->hash);
260		}
261	}
262
263	req->r_ino1 = vino;
264	req->r_num_caps = 1;
265	err = ceph_mdsc_do_request(mdsc, NULL, req);
266	inode = req->r_target_inode;
267	if (inode) {
268		if (vino.snap == CEPH_SNAPDIR) {
269			if (inode->i_nlink == 0)
270				unlinked = true;
271			inode = ceph_get_snapdir(inode);
272		} else if (ceph_snap(inode) == vino.snap) {
273			ihold(inode);
274		} else {
275			/* mds does not support lookup snapped inode */
276			inode = ERR_PTR(-EOPNOTSUPP);
277		}
278	} else {
279		inode = ERR_PTR(-ESTALE);
280	}
281	ceph_mdsc_put_request(req);
282
283	if (want_parent) {
284		doutc(cl, "%llx.%llx\n err=%d\n", vino.ino, vino.snap, err);
285	} else {
286		doutc(cl, "%llx.%llx parent %llx hash %x err=%d", vino.ino,
287		      vino.snap, sfh->parent_ino, sfh->hash, err);
288	}
289	/* see comments in ceph_get_parent() */
290	return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
291}
292
293/*
294 * convert regular fh to dentry
295 */
296static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
297					struct fid *fid,
298					int fh_len, int fh_type)
299{
300	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
301	struct ceph_nfs_fh *fh = (void *)fid->raw;
302
303	if (fh_type == FILEID_BTRFS_WITH_PARENT) {
304		struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
305		return __snapfh_to_dentry(sb, sfh, false);
306	}
307
308	if (fh_type != FILEID_INO32_GEN  &&
309	    fh_type != FILEID_INO32_GEN_PARENT)
310		return NULL;
311	if (fh_len < sizeof(*fh) / 4)
312		return NULL;
313
314	doutc(fsc->client, "%llx\n", fh->ino);
315	return __fh_to_dentry(sb, fh->ino);
316}
317
318static struct dentry *__get_parent(struct super_block *sb,
319				   struct dentry *child, u64 ino)
320{
321	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc;
322	struct ceph_mds_request *req;
323	struct inode *inode;
324	int mask;
325	int err;
326
327	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
328				       USE_ANY_MDS);
329	if (IS_ERR(req))
330		return ERR_CAST(req);
331
332	if (child) {
333		req->r_inode = d_inode(child);
334		ihold(d_inode(child));
335	} else {
336		req->r_ino1 = (struct ceph_vino) {
337			.ino = ino,
338			.snap = CEPH_NOSNAP,
339		};
340	}
341
342	mask = CEPH_STAT_CAP_INODE;
343	if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
344		mask |= CEPH_CAP_XATTR_SHARED;
345	req->r_args.getattr.mask = cpu_to_le32(mask);
346
347	req->r_num_caps = 1;
348	err = ceph_mdsc_do_request(mdsc, NULL, req);
349	if (err) {
350		ceph_mdsc_put_request(req);
351		return ERR_PTR(err);
352	}
353
354	inode = req->r_target_inode;
355	if (inode)
356		ihold(inode);
357	ceph_mdsc_put_request(req);
358	if (!inode)
359		return ERR_PTR(-ENOENT);
360
361	return d_obtain_alias(inode);
362}
363
364static struct dentry *ceph_get_parent(struct dentry *child)
365{
366	struct inode *inode = d_inode(child);
367	struct ceph_client *cl = ceph_inode_to_client(inode);
368	struct dentry *dn;
369
370	if (ceph_snap(inode) != CEPH_NOSNAP) {
371		struct inode* dir;
372		bool unlinked = false;
373		/* do not support non-directory */
374		if (!d_is_dir(child)) {
375			dn = ERR_PTR(-EINVAL);
376			goto out;
377		}
378		dir = __lookup_inode(inode->i_sb, ceph_ino(inode));
379		if (IS_ERR(dir)) {
380			dn = ERR_CAST(dir);
381			goto out;
382		}
383		/* There can be multiple paths to access snapped inode.
384		 * For simplicity, treat snapdir of head inode as parent */
385		if (ceph_snap(inode) != CEPH_SNAPDIR) {
386			struct inode *snapdir = ceph_get_snapdir(dir);
387			if (dir->i_nlink == 0)
388				unlinked = true;
389			iput(dir);
390			if (IS_ERR(snapdir)) {
391				dn = ERR_CAST(snapdir);
392				goto out;
393			}
394			dir = snapdir;
395		}
396		/* If directory has already been deleted, futher get_parent
397		 * will fail. Do not mark snapdir dentry as disconnected,
398		 * this prevent exportfs from doing futher get_parent. */
399		if (unlinked)
400			dn = d_obtain_root(dir);
401		else
402			dn = d_obtain_alias(dir);
403	} else {
404		dn = __get_parent(child->d_sb, child, 0);
405	}
406out:
407	doutc(cl, "child %p %p %llx.%llx err=%ld\n", child, inode,
408	      ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn));
409	return dn;
410}
411
412/*
413 * convert regular fh to parent
414 */
415static struct dentry *ceph_fh_to_parent(struct super_block *sb,
416					struct fid *fid,
417					int fh_len, int fh_type)
418{
419	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
420	struct ceph_nfs_confh *cfh = (void *)fid->raw;
421	struct dentry *dentry;
422
423	if (fh_type == FILEID_BTRFS_WITH_PARENT) {
424		struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
425		return __snapfh_to_dentry(sb, sfh, true);
426	}
427
428	if (fh_type != FILEID_INO32_GEN_PARENT)
429		return NULL;
430	if (fh_len < sizeof(*cfh) / 4)
431		return NULL;
432
433	doutc(fsc->client, "%llx\n", cfh->parent_ino);
434	dentry = __get_parent(sb, NULL, cfh->ino);
435	if (unlikely(dentry == ERR_PTR(-ENOENT)))
436		dentry = __fh_to_dentry(sb, cfh->parent_ino);
437	return dentry;
438}
439
440static int __get_snap_name(struct dentry *parent, char *name,
441			   struct dentry *child)
442{
443	struct inode *inode = d_inode(child);
444	struct inode *dir = d_inode(parent);
445	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
446	struct ceph_mds_request *req = NULL;
447	char *last_name = NULL;
448	unsigned next_offset = 2;
449	int err = -EINVAL;
450
451	if (ceph_ino(inode) != ceph_ino(dir))
452		goto out;
453	if (ceph_snap(inode) == CEPH_SNAPDIR) {
454		if (ceph_snap(dir) == CEPH_NOSNAP) {
455			strcpy(name, fsc->mount_options->snapdir_name);
456			err = 0;
457		}
458		goto out;
459	}
460	if (ceph_snap(dir) != CEPH_SNAPDIR)
461		goto out;
462
463	while (1) {
464		struct ceph_mds_reply_info_parsed *rinfo;
465		struct ceph_mds_reply_dir_entry *rde;
466		int i;
467
468		req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP,
469					       USE_AUTH_MDS);
470		if (IS_ERR(req)) {
471			err = PTR_ERR(req);
472			req = NULL;
473			goto out;
474		}
475		err = ceph_alloc_readdir_reply_buffer(req, inode);
476		if (err)
477			goto out;
478
479		req->r_direct_mode = USE_AUTH_MDS;
480		req->r_readdir_offset = next_offset;
481		req->r_args.readdir.flags =
482				cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
483		if (last_name) {
484			req->r_path2 = last_name;
485			last_name = NULL;
486		}
487
488		req->r_inode = dir;
489		ihold(dir);
490		req->r_dentry = dget(parent);
491
492		inode_lock(dir);
493		err = ceph_mdsc_do_request(fsc->mdsc, NULL, req);
494		inode_unlock(dir);
495
496		if (err < 0)
497			goto out;
498
499		rinfo = &req->r_reply_info;
500		for (i = 0; i < rinfo->dir_nr; i++) {
501			rde = rinfo->dir_entries + i;
502			BUG_ON(!rde->inode.in);
503			if (ceph_snap(inode) ==
504			    le64_to_cpu(rde->inode.in->snapid)) {
505				memcpy(name, rde->name, rde->name_len);
506				name[rde->name_len] = '\0';
507				err = 0;
508				goto out;
509			}
510		}
511
512		if (rinfo->dir_end)
513			break;
514
515		BUG_ON(rinfo->dir_nr <= 0);
516		rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
517		next_offset += rinfo->dir_nr;
518		last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
519		if (!last_name) {
520			err = -ENOMEM;
521			goto out;
522		}
523
524		ceph_mdsc_put_request(req);
525		req = NULL;
526	}
527	err = -ENOENT;
528out:
529	if (req)
530		ceph_mdsc_put_request(req);
531	kfree(last_name);
532	doutc(fsc->client, "child dentry %p %p %llx.%llx err=%d\n", child,
533	      inode, ceph_vinop(inode), err);
534	return err;
535}
536
537static int ceph_get_name(struct dentry *parent, char *name,
538			 struct dentry *child)
539{
540	struct ceph_mds_client *mdsc;
541	struct ceph_mds_request *req;
542	struct inode *dir = d_inode(parent);
543	struct inode *inode = d_inode(child);
544	struct ceph_mds_reply_info_parsed *rinfo;
545	int err;
546
547	if (ceph_snap(inode) != CEPH_NOSNAP)
548		return __get_snap_name(parent, name, child);
549
550	mdsc = ceph_inode_to_fs_client(inode)->mdsc;
551	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
552				       USE_ANY_MDS);
553	if (IS_ERR(req))
554		return PTR_ERR(req);
555
556	inode_lock(dir);
557	req->r_inode = inode;
558	ihold(inode);
559	req->r_ino2 = ceph_vino(d_inode(parent));
560	req->r_parent = dir;
561	ihold(dir);
562	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
563	req->r_num_caps = 2;
564	err = ceph_mdsc_do_request(mdsc, NULL, req);
565	inode_unlock(dir);
566
567	if (err)
568		goto out;
569
570	rinfo = &req->r_reply_info;
571	if (!IS_ENCRYPTED(dir)) {
572		memcpy(name, rinfo->dname, rinfo->dname_len);
573		name[rinfo->dname_len] = 0;
574	} else {
575		struct fscrypt_str oname = FSTR_INIT(NULL, 0);
576		struct ceph_fname fname = { .dir	= dir,
577					    .name	= rinfo->dname,
578					    .ctext	= rinfo->altname,
579					    .name_len	= rinfo->dname_len,
580					    .ctext_len	= rinfo->altname_len };
581
582		err = ceph_fname_alloc_buffer(dir, &oname);
583		if (err < 0)
584			goto out;
585
586		err = ceph_fname_to_usr(&fname, NULL, &oname, NULL);
587		if (!err) {
588			memcpy(name, oname.name, oname.len);
589			name[oname.len] = 0;
590		}
591		ceph_fname_free_buffer(dir, &oname);
592	}
593out:
594	doutc(mdsc->fsc->client, "child dentry %p %p %llx.%llx err %d %s%s\n",
595	      child, inode, ceph_vinop(inode), err, err ? "" : "name ",
596	      err ? "" : name);
597	ceph_mdsc_put_request(req);
598	return err;
599}
600
601const struct export_operations ceph_export_ops = {
602	.encode_fh = ceph_encode_fh,
603	.fh_to_dentry = ceph_fh_to_dentry,
604	.fh_to_parent = ceph_fh_to_parent,
605	.get_parent = ceph_get_parent,
606	.get_name = ceph_get_name,
607};
608