1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2011 Novell Inc.
4 * Copyright (C) 2016 Red Hat, Inc.
5 */
6
7#include <linux/fs.h>
8#include <linux/cred.h>
9#include <linux/ctype.h>
10#include <linux/namei.h>
11#include <linux/xattr.h>
12#include <linux/ratelimit.h>
13#include <linux/mount.h>
14#include <linux/exportfs.h>
15#include "overlayfs.h"
16
17#include "../internal.h"	/* for vfs_path_lookup */
18
19struct ovl_lookup_data {
20	struct super_block *sb;
21	const struct ovl_layer *layer;
22	struct qstr name;
23	bool is_dir;
24	bool opaque;
25	bool xwhiteouts;
26	bool stop;
27	bool last;
28	char *redirect;
29	int metacopy;
30	/* Referring to last redirect xattr */
31	bool absolute_redirect;
32};
33
34static int ovl_check_redirect(const struct path *path, struct ovl_lookup_data *d,
35			      size_t prelen, const char *post)
36{
37	int res;
38	char *buf;
39	struct ovl_fs *ofs = OVL_FS(d->sb);
40
41	d->absolute_redirect = false;
42	buf = ovl_get_redirect_xattr(ofs, path, prelen + strlen(post));
43	if (IS_ERR_OR_NULL(buf))
44		return PTR_ERR(buf);
45
46	if (buf[0] == '/') {
47		d->absolute_redirect = true;
48		/*
49		 * One of the ancestor path elements in an absolute path
50		 * lookup in ovl_lookup_layer() could have been opaque and
51		 * that will stop further lookup in lower layers (d->stop=true)
52		 * But we have found an absolute redirect in descendant path
53		 * element and that should force continue lookup in lower
54		 * layers (reset d->stop).
55		 */
56		d->stop = false;
57	} else {
58		res = strlen(buf) + 1;
59		memmove(buf + prelen, buf, res);
60		memcpy(buf, d->name.name, prelen);
61	}
62
63	strcat(buf, post);
64	kfree(d->redirect);
65	d->redirect = buf;
66	d->name.name = d->redirect;
67	d->name.len = strlen(d->redirect);
68
69	return 0;
70}
71
72static int ovl_acceptable(void *ctx, struct dentry *dentry)
73{
74	/*
75	 * A non-dir origin may be disconnected, which is fine, because
76	 * we only need it for its unique inode number.
77	 */
78	if (!d_is_dir(dentry))
79		return 1;
80
81	/* Don't decode a deleted empty directory */
82	if (d_unhashed(dentry))
83		return 0;
84
85	/* Check if directory belongs to the layer we are decoding from */
86	return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
87}
88
89/*
90 * Check validity of an overlay file handle buffer.
91 *
92 * Return 0 for a valid file handle.
93 * Return -ENODATA for "origin unknown".
94 * Return <0 for an invalid file handle.
95 */
96int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
97{
98	if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
99		return -EINVAL;
100
101	if (fb->magic != OVL_FH_MAGIC)
102		return -EINVAL;
103
104	/* Treat larger version and unknown flags as "origin unknown" */
105	if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
106		return -ENODATA;
107
108	/* Treat endianness mismatch as "origin unknown" */
109	if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
110	    (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
111		return -ENODATA;
112
113	return 0;
114}
115
116static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *upperdentry,
117				 enum ovl_xattr ox)
118{
119	int res, err;
120	struct ovl_fh *fh = NULL;
121
122	res = ovl_getxattr_upper(ofs, upperdentry, ox, NULL, 0);
123	if (res < 0) {
124		if (res == -ENODATA || res == -EOPNOTSUPP)
125			return NULL;
126		goto fail;
127	}
128	/* Zero size value means "copied up but origin unknown" */
129	if (res == 0)
130		return NULL;
131
132	fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
133	if (!fh)
134		return ERR_PTR(-ENOMEM);
135
136	res = ovl_getxattr_upper(ofs, upperdentry, ox, fh->buf, res);
137	if (res < 0)
138		goto fail;
139
140	err = ovl_check_fb_len(&fh->fb, res);
141	if (err < 0) {
142		if (err == -ENODATA)
143			goto out;
144		goto invalid;
145	}
146
147	return fh;
148
149out:
150	kfree(fh);
151	return NULL;
152
153fail:
154	pr_warn_ratelimited("failed to get origin (%i)\n", res);
155	goto out;
156invalid:
157	pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
158	goto out;
159}
160
161struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
162				  struct vfsmount *mnt, bool connected)
163{
164	struct dentry *real;
165	int bytes;
166
167	if (!capable(CAP_DAC_READ_SEARCH))
168		return NULL;
169
170	/*
171	 * Make sure that the stored uuid matches the uuid of the lower
172	 * layer where file handle will be decoded.
173	 * In case of uuid=off option just make sure that stored uuid is null.
174	 */
175	if (ovl_origin_uuid(ofs) ?
176	    !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) :
177	    !uuid_is_null(&fh->fb.uuid))
178		return NULL;
179
180	bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
181	real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
182				  bytes >> 2, (int)fh->fb.type,
183				  connected ? ovl_acceptable : NULL, mnt);
184	if (IS_ERR(real)) {
185		/*
186		 * Treat stale file handle to lower file as "origin unknown".
187		 * upper file handle could become stale when upper file is
188		 * unlinked and this information is needed to handle stale
189		 * index entries correctly.
190		 */
191		if (real == ERR_PTR(-ESTALE) &&
192		    !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
193			real = NULL;
194		return real;
195	}
196
197	if (ovl_dentry_weird(real)) {
198		dput(real);
199		return NULL;
200	}
201
202	return real;
203}
204
205static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
206						   const char *name,
207						   struct dentry *base, int len,
208						   bool drop_negative)
209{
210	struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt), name,
211						 base, len);
212
213	if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
214		if (drop_negative && ret->d_lockref.count == 1) {
215			spin_lock(&ret->d_lock);
216			/* Recheck condition under lock */
217			if (d_is_negative(ret) && ret->d_lockref.count == 1)
218				__d_drop(ret);
219			spin_unlock(&ret->d_lock);
220		}
221		dput(ret);
222		ret = ERR_PTR(-ENOENT);
223	}
224	return ret;
225}
226
227static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
228			     const char *name, unsigned int namelen,
229			     size_t prelen, const char *post,
230			     struct dentry **ret, bool drop_negative)
231{
232	struct ovl_fs *ofs = OVL_FS(d->sb);
233	struct dentry *this;
234	struct path path;
235	int err;
236	bool last_element = !post[0];
237	bool is_upper = d->layer->idx == 0;
238	char val;
239
240	this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative);
241	if (IS_ERR(this)) {
242		err = PTR_ERR(this);
243		this = NULL;
244		if (err == -ENOENT || err == -ENAMETOOLONG)
245			goto out;
246		goto out_err;
247	}
248
249	if (ovl_dentry_weird(this)) {
250		/* Don't support traversing automounts and other weirdness */
251		err = -EREMOTE;
252		goto out_err;
253	}
254
255	path.dentry = this;
256	path.mnt = d->layer->mnt;
257	if (ovl_path_is_whiteout(ofs, &path)) {
258		d->stop = d->opaque = true;
259		goto put_and_out;
260	}
261	/*
262	 * This dentry should be a regular file if previous layer lookup
263	 * found a metacopy dentry.
264	 */
265	if (last_element && d->metacopy && !d_is_reg(this)) {
266		d->stop = true;
267		goto put_and_out;
268	}
269
270	if (!d_can_lookup(this)) {
271		if (d->is_dir || !last_element) {
272			d->stop = true;
273			goto put_and_out;
274		}
275		err = ovl_check_metacopy_xattr(ofs, &path, NULL);
276		if (err < 0)
277			goto out_err;
278
279		d->metacopy = err;
280		d->stop = !d->metacopy;
281		if (!d->metacopy || d->last)
282			goto out;
283	} else {
284		if (ovl_lookup_trap_inode(d->sb, this)) {
285			/* Caught in a trap of overlapping layers */
286			err = -ELOOP;
287			goto out_err;
288		}
289
290		if (last_element)
291			d->is_dir = true;
292		if (d->last)
293			goto out;
294
295		/* overlay.opaque=x means xwhiteouts directory */
296		val = ovl_get_opaquedir_val(ofs, &path);
297		if (last_element && !is_upper && val == 'x') {
298			d->xwhiteouts = true;
299			ovl_layer_set_xwhiteouts(ofs, d->layer);
300		} else if (val == 'y') {
301			d->stop = true;
302			if (last_element)
303				d->opaque = true;
304			goto out;
305		}
306	}
307	err = ovl_check_redirect(&path, d, prelen, post);
308	if (err)
309		goto out_err;
310out:
311	*ret = this;
312	return 0;
313
314put_and_out:
315	dput(this);
316	this = NULL;
317	goto out;
318
319out_err:
320	dput(this);
321	return err;
322}
323
324static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
325			    struct dentry **ret, bool drop_negative)
326{
327	/* Counting down from the end, since the prefix can change */
328	size_t rem = d->name.len - 1;
329	struct dentry *dentry = NULL;
330	int err;
331
332	if (d->name.name[0] != '/')
333		return ovl_lookup_single(base, d, d->name.name, d->name.len,
334					 0, "", ret, drop_negative);
335
336	while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
337		const char *s = d->name.name + d->name.len - rem;
338		const char *next = strchrnul(s, '/');
339		size_t thislen = next - s;
340		bool end = !next[0];
341
342		/* Verify we did not go off the rails */
343		if (WARN_ON(s[-1] != '/'))
344			return -EIO;
345
346		err = ovl_lookup_single(base, d, s, thislen,
347					d->name.len - rem, next, &base,
348					drop_negative);
349		dput(dentry);
350		if (err)
351			return err;
352		dentry = base;
353		if (end)
354			break;
355
356		rem -= thislen + 1;
357
358		if (WARN_ON(rem >= d->name.len))
359			return -EIO;
360	}
361	*ret = dentry;
362	return 0;
363}
364
365static int ovl_lookup_data_layer(struct dentry *dentry, const char *redirect,
366				 const struct ovl_layer *layer,
367				 struct path *datapath)
368{
369	int err;
370
371	err = vfs_path_lookup(layer->mnt->mnt_root, layer->mnt, redirect,
372			LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS | LOOKUP_NO_XDEV,
373			datapath);
374	pr_debug("lookup lowerdata (%pd2, redirect=\"%s\", layer=%d, err=%i)\n",
375		 dentry, redirect, layer->idx, err);
376
377	if (err)
378		return err;
379
380	err = -EREMOTE;
381	if (ovl_dentry_weird(datapath->dentry))
382		goto out_path_put;
383
384	err = -ENOENT;
385	/* Only regular file is acceptable as lower data */
386	if (!d_is_reg(datapath->dentry))
387		goto out_path_put;
388
389	return 0;
390
391out_path_put:
392	path_put(datapath);
393
394	return err;
395}
396
397/* Lookup in data-only layers by absolute redirect to layer root */
398static int ovl_lookup_data_layers(struct dentry *dentry, const char *redirect,
399				  struct ovl_path *lowerdata)
400{
401	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
402	const struct ovl_layer *layer;
403	struct path datapath;
404	int err = -ENOENT;
405	int i;
406
407	layer = &ofs->layers[ofs->numlayer - ofs->numdatalayer];
408	for (i = 0; i < ofs->numdatalayer; i++, layer++) {
409		err = ovl_lookup_data_layer(dentry, redirect, layer, &datapath);
410		if (!err) {
411			mntput(datapath.mnt);
412			lowerdata->dentry = datapath.dentry;
413			lowerdata->layer = layer;
414			return 0;
415		}
416	}
417
418	return err;
419}
420
421int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
422			struct dentry *upperdentry, struct ovl_path **stackp)
423{
424	struct dentry *origin = NULL;
425	int i;
426
427	for (i = 1; i <= ovl_numlowerlayer(ofs); i++) {
428		/*
429		 * If lower fs uuid is not unique among lower fs we cannot match
430		 * fh->uuid to layer.
431		 */
432		if (ofs->layers[i].fsid &&
433		    ofs->layers[i].fs->bad_uuid)
434			continue;
435
436		origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt,
437					    connected);
438		if (origin)
439			break;
440	}
441
442	if (!origin)
443		return -ESTALE;
444	else if (IS_ERR(origin))
445		return PTR_ERR(origin);
446
447	if (upperdentry && !ovl_upper_is_whiteout(ofs, upperdentry) &&
448	    inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
449		goto invalid;
450
451	if (!*stackp)
452		*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
453	if (!*stackp) {
454		dput(origin);
455		return -ENOMEM;
456	}
457	**stackp = (struct ovl_path){
458		.dentry = origin,
459		.layer = &ofs->layers[i]
460	};
461
462	return 0;
463
464invalid:
465	pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
466			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
467			    d_inode(origin)->i_mode & S_IFMT);
468	dput(origin);
469	return -ESTALE;
470}
471
472static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
473			    struct ovl_path **stackp)
474{
475	struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN);
476	int err;
477
478	if (IS_ERR_OR_NULL(fh))
479		return PTR_ERR(fh);
480
481	err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
482	kfree(fh);
483
484	if (err) {
485		if (err == -ESTALE)
486			return 0;
487		return err;
488	}
489
490	return 0;
491}
492
493/*
494 * Verify that @fh matches the file handle stored in xattr @name.
495 * Return 0 on match, -ESTALE on mismatch, < 0 on error.
496 */
497static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
498			 enum ovl_xattr ox, const struct ovl_fh *fh)
499{
500	struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox);
501	int err = 0;
502
503	if (!ofh)
504		return -ENODATA;
505
506	if (IS_ERR(ofh))
507		return PTR_ERR(ofh);
508
509	if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
510		err = -ESTALE;
511
512	kfree(ofh);
513	return err;
514}
515
516int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
517		      enum ovl_xattr ox, const struct ovl_fh *fh,
518		      bool is_upper, bool set)
519{
520	int err;
521
522	err = ovl_verify_fh(ofs, dentry, ox, fh);
523	if (set && err == -ENODATA)
524		err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
525
526	return err;
527}
528
529/*
530 * Verify that @real dentry matches the file handle stored in xattr @name.
531 *
532 * If @set is true and there is no stored file handle, encode @real and store
533 * file handle in xattr @name.
534 *
535 * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
536 */
537int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
538			    enum ovl_xattr ox, struct dentry *real,
539			    bool is_upper, bool set)
540{
541	struct inode *inode;
542	struct ovl_fh *fh;
543	int err;
544
545	fh = ovl_encode_real_fh(ofs, real, is_upper);
546	err = PTR_ERR(fh);
547	if (IS_ERR(fh)) {
548		fh = NULL;
549		goto fail;
550	}
551
552	err = ovl_verify_set_fh(ofs, dentry, ox, fh, is_upper, set);
553	if (err)
554		goto fail;
555
556out:
557	kfree(fh);
558	return err;
559
560fail:
561	inode = d_inode(real);
562	pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
563			    is_upper ? "upper" : "origin", real,
564			    inode ? inode->i_ino : 0, err);
565	goto out;
566}
567
568
569/* Get upper dentry from index */
570struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
571			       bool connected)
572{
573	struct ovl_fh *fh;
574	struct dentry *upper;
575
576	if (!d_is_dir(index))
577		return dget(index);
578
579	fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER);
580	if (IS_ERR_OR_NULL(fh))
581		return ERR_CAST(fh);
582
583	upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), connected);
584	kfree(fh);
585
586	if (IS_ERR_OR_NULL(upper))
587		return upper ?: ERR_PTR(-ESTALE);
588
589	if (!d_is_dir(upper)) {
590		pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
591				    index, upper);
592		dput(upper);
593		return ERR_PTR(-EIO);
594	}
595
596	return upper;
597}
598
599/*
600 * Verify that an index entry name matches the origin file handle stored in
601 * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
602 * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
603 */
604int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
605{
606	struct ovl_fh *fh = NULL;
607	size_t len;
608	struct ovl_path origin = { };
609	struct ovl_path *stack = &origin;
610	struct dentry *upper = NULL;
611	int err;
612
613	if (!d_inode(index))
614		return 0;
615
616	err = -EINVAL;
617	if (index->d_name.len < sizeof(struct ovl_fb)*2)
618		goto fail;
619
620	err = -ENOMEM;
621	len = index->d_name.len / 2;
622	fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
623	if (!fh)
624		goto fail;
625
626	err = -EINVAL;
627	if (hex2bin(fh->buf, index->d_name.name, len))
628		goto fail;
629
630	err = ovl_check_fb_len(&fh->fb, len);
631	if (err)
632		goto fail;
633
634	/*
635	 * Whiteout index entries are used as an indication that an exported
636	 * overlay file handle should be treated as stale (i.e. after unlink
637	 * of the overlay inode). These entries contain no origin xattr.
638	 */
639	if (ovl_is_whiteout(index))
640		goto out;
641
642	/*
643	 * Verifying directory index entries are not stale is expensive, so
644	 * only verify stale dir index if NFS export is enabled.
645	 */
646	if (d_is_dir(index) && !ofs->config.nfs_export)
647		goto out;
648
649	/*
650	 * Directory index entries should have 'upper' xattr pointing to the
651	 * real upper dir. Non-dir index entries are hardlinks to the upper
652	 * real inode. For non-dir index, we can read the copy up origin xattr
653	 * directly from the index dentry, but for dir index we first need to
654	 * decode the upper directory.
655	 */
656	upper = ovl_index_upper(ofs, index, false);
657	if (IS_ERR_OR_NULL(upper)) {
658		err = PTR_ERR(upper);
659		/*
660		 * Directory index entries with no 'upper' xattr need to be
661		 * removed. When dir index entry has a stale 'upper' xattr,
662		 * we assume that upper dir was removed and we treat the dir
663		 * index as orphan entry that needs to be whited out.
664		 */
665		if (err == -ESTALE)
666			goto orphan;
667		else if (!err)
668			err = -ESTALE;
669		goto fail;
670	}
671
672	err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh);
673	dput(upper);
674	if (err)
675		goto fail;
676
677	/* Check if non-dir index is orphan and don't warn before cleaning it */
678	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
679		err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
680		if (err)
681			goto fail;
682
683		if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0)
684			goto orphan;
685	}
686
687out:
688	dput(origin.dentry);
689	kfree(fh);
690	return err;
691
692fail:
693	pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
694			    index, d_inode(index)->i_mode & S_IFMT, err);
695	goto out;
696
697orphan:
698	pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
699			    index, d_inode(index)->i_mode & S_IFMT,
700			    d_inode(index)->i_nlink);
701	err = -ENOENT;
702	goto out;
703}
704
705int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name)
706{
707	char *n, *s;
708
709	n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
710	if (!n)
711		return -ENOMEM;
712
713	s  = bin2hex(n, fh->buf, fh->fb.len);
714	*name = (struct qstr) QSTR_INIT(n, s - n);
715
716	return 0;
717
718}
719
720/*
721 * Lookup in indexdir for the index entry of a lower real inode or a copy up
722 * origin inode. The index entry name is the hex representation of the lower
723 * inode file handle.
724 *
725 * If the index dentry in negative, then either no lower aliases have been
726 * copied up yet, or aliases have been copied up in older kernels and are
727 * not indexed.
728 *
729 * If the index dentry for a copy up origin inode is positive, but points
730 * to an inode different than the upper inode, then either the upper inode
731 * has been copied up and not indexed or it was indexed, but since then
732 * index dir was cleared. Either way, that index cannot be used to identify
733 * the overlay inode.
734 */
735int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
736		       struct qstr *name)
737{
738	struct ovl_fh *fh;
739	int err;
740
741	fh = ovl_encode_real_fh(ofs, origin, false);
742	if (IS_ERR(fh))
743		return PTR_ERR(fh);
744
745	err = ovl_get_index_name_fh(fh, name);
746
747	kfree(fh);
748	return err;
749}
750
751/* Lookup index by file handle for NFS export */
752struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
753{
754	struct dentry *index;
755	struct qstr name;
756	int err;
757
758	err = ovl_get_index_name_fh(fh, &name);
759	if (err)
760		return ERR_PTR(err);
761
762	index = lookup_positive_unlocked(name.name, ofs->workdir, name.len);
763	kfree(name.name);
764	if (IS_ERR(index)) {
765		if (PTR_ERR(index) == -ENOENT)
766			index = NULL;
767		return index;
768	}
769
770	if (ovl_is_whiteout(index))
771		err = -ESTALE;
772	else if (ovl_dentry_weird(index))
773		err = -EIO;
774	else
775		return index;
776
777	dput(index);
778	return ERR_PTR(err);
779}
780
781struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
782				struct dentry *origin, bool verify)
783{
784	struct dentry *index;
785	struct inode *inode;
786	struct qstr name;
787	bool is_dir = d_is_dir(origin);
788	int err;
789
790	err = ovl_get_index_name(ofs, origin, &name);
791	if (err)
792		return ERR_PTR(err);
793
794	index = lookup_one_positive_unlocked(ovl_upper_mnt_idmap(ofs), name.name,
795					     ofs->workdir, name.len);
796	if (IS_ERR(index)) {
797		err = PTR_ERR(index);
798		if (err == -ENOENT) {
799			index = NULL;
800			goto out;
801		}
802		pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
803				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
804				    d_inode(origin)->i_ino, name.len, name.name,
805				    err);
806		goto out;
807	}
808
809	inode = d_inode(index);
810	if (ovl_is_whiteout(index) && !verify) {
811		/*
812		 * When index lookup is called with !verify for decoding an
813		 * overlay file handle, a whiteout index implies that decode
814		 * should treat file handle as stale and no need to print a
815		 * warning about it.
816		 */
817		dput(index);
818		index = ERR_PTR(-ESTALE);
819		goto out;
820	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
821		   inode_wrong_type(inode, d_inode(origin)->i_mode)) {
822		/*
823		 * Index should always be of the same file type as origin
824		 * except for the case of a whiteout index. A whiteout
825		 * index should only exist if all lower aliases have been
826		 * unlinked, which means that finding a lower origin on lookup
827		 * whose index is a whiteout should be treated as an error.
828		 */
829		pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
830				    index, d_inode(index)->i_mode & S_IFMT,
831				    d_inode(origin)->i_mode & S_IFMT);
832		goto fail;
833	} else if (is_dir && verify) {
834		if (!upper) {
835			pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
836					    origin, index);
837			goto fail;
838		}
839
840		/* Verify that dir index 'upper' xattr points to upper dir */
841		err = ovl_verify_upper(ofs, index, upper, false);
842		if (err) {
843			if (err == -ESTALE) {
844				pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
845						    upper, origin, index);
846			}
847			goto fail;
848		}
849	} else if (upper && d_inode(upper) != inode) {
850		goto out_dput;
851	}
852out:
853	kfree(name.name);
854	return index;
855
856out_dput:
857	dput(index);
858	index = NULL;
859	goto out;
860
861fail:
862	dput(index);
863	index = ERR_PTR(-EIO);
864	goto out;
865}
866
867/*
868 * Returns next layer in stack starting from top.
869 * Returns -1 if this is the last layer.
870 */
871int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
872		  const struct ovl_layer **layer)
873{
874	struct ovl_entry *oe = OVL_E(dentry);
875	struct ovl_path *lowerstack = ovl_lowerstack(oe);
876
877	BUG_ON(idx < 0);
878	if (idx == 0) {
879		ovl_path_upper(dentry, path);
880		if (path->dentry) {
881			*layer = &OVL_FS(dentry->d_sb)->layers[0];
882			return ovl_numlower(oe) ? 1 : -1;
883		}
884		idx++;
885	}
886	BUG_ON(idx > ovl_numlower(oe));
887	path->dentry = lowerstack[idx - 1].dentry;
888	*layer = lowerstack[idx - 1].layer;
889	path->mnt = (*layer)->mnt;
890
891	return (idx < ovl_numlower(oe)) ? idx + 1 : -1;
892}
893
894/* Fix missing 'origin' xattr */
895static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
896			  struct dentry *lower, struct dentry *upper)
897{
898	const struct ovl_fh *fh;
899	int err;
900
901	if (ovl_check_origin_xattr(ofs, upper))
902		return 0;
903
904	fh = ovl_get_origin_fh(ofs, lower);
905	if (IS_ERR(fh))
906		return PTR_ERR(fh);
907
908	err = ovl_want_write(dentry);
909	if (err)
910		goto out;
911
912	err = ovl_set_origin_fh(ofs, fh, upper);
913	if (!err)
914		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
915
916	ovl_drop_write(dentry);
917out:
918	kfree(fh);
919	return err;
920}
921
922static int ovl_maybe_validate_verity(struct dentry *dentry)
923{
924	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
925	struct inode *inode = d_inode(dentry);
926	struct path datapath, metapath;
927	int err;
928
929	if (!ofs->config.verity_mode ||
930	    !ovl_is_metacopy_dentry(dentry) ||
931	    ovl_test_flag(OVL_VERIFIED_DIGEST, inode))
932		return 0;
933
934	if (!ovl_test_flag(OVL_HAS_DIGEST, inode)) {
935		if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) {
936			pr_warn_ratelimited("metacopy file '%pd' has no digest specified\n",
937					    dentry);
938			return -EIO;
939		}
940		return 0;
941	}
942
943	ovl_path_lowerdata(dentry, &datapath);
944	if (!datapath.dentry)
945		return -EIO;
946
947	ovl_path_real(dentry, &metapath);
948	if (!metapath.dentry)
949		return -EIO;
950
951	err = ovl_inode_lock_interruptible(inode);
952	if (err)
953		return err;
954
955	if (!ovl_test_flag(OVL_VERIFIED_DIGEST, inode)) {
956		const struct cred *old_cred;
957
958		old_cred = ovl_override_creds(dentry->d_sb);
959
960		err = ovl_validate_verity(ofs, &metapath, &datapath);
961		if (err == 0)
962			ovl_set_flag(OVL_VERIFIED_DIGEST, inode);
963
964		revert_creds(old_cred);
965	}
966
967	ovl_inode_unlock(inode);
968
969	return err;
970}
971
972/* Lazy lookup of lowerdata */
973static int ovl_maybe_lookup_lowerdata(struct dentry *dentry)
974{
975	struct inode *inode = d_inode(dentry);
976	const char *redirect = ovl_lowerdata_redirect(inode);
977	struct ovl_path datapath = {};
978	const struct cred *old_cred;
979	int err;
980
981	if (!redirect || ovl_dentry_lowerdata(dentry))
982		return 0;
983
984	if (redirect[0] != '/')
985		return -EIO;
986
987	err = ovl_inode_lock_interruptible(inode);
988	if (err)
989		return err;
990
991	err = 0;
992	/* Someone got here before us? */
993	if (ovl_dentry_lowerdata(dentry))
994		goto out;
995
996	old_cred = ovl_override_creds(dentry->d_sb);
997	err = ovl_lookup_data_layers(dentry, redirect, &datapath);
998	revert_creds(old_cred);
999	if (err)
1000		goto out_err;
1001
1002	err = ovl_dentry_set_lowerdata(dentry, &datapath);
1003	if (err)
1004		goto out_err;
1005
1006out:
1007	ovl_inode_unlock(inode);
1008	dput(datapath.dentry);
1009
1010	return err;
1011
1012out_err:
1013	pr_warn_ratelimited("lazy lowerdata lookup failed (%pd2, err=%i)\n",
1014			    dentry, err);
1015	goto out;
1016}
1017
1018int ovl_verify_lowerdata(struct dentry *dentry)
1019{
1020	int err;
1021
1022	err = ovl_maybe_lookup_lowerdata(dentry);
1023	if (err)
1024		return err;
1025
1026	return ovl_maybe_validate_verity(dentry);
1027}
1028
1029struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
1030			  unsigned int flags)
1031{
1032	struct ovl_entry *oe = NULL;
1033	const struct cred *old_cred;
1034	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
1035	struct ovl_entry *poe = OVL_E(dentry->d_parent);
1036	struct ovl_entry *roe = OVL_E(dentry->d_sb->s_root);
1037	struct ovl_path *stack = NULL, *origin_path = NULL;
1038	struct dentry *upperdir, *upperdentry = NULL;
1039	struct dentry *origin = NULL;
1040	struct dentry *index = NULL;
1041	unsigned int ctr = 0;
1042	struct inode *inode = NULL;
1043	bool upperopaque = false;
1044	char *upperredirect = NULL;
1045	struct dentry *this;
1046	unsigned int i;
1047	int err;
1048	bool uppermetacopy = false;
1049	int metacopy_size = 0;
1050	struct ovl_lookup_data d = {
1051		.sb = dentry->d_sb,
1052		.name = dentry->d_name,
1053		.is_dir = false,
1054		.opaque = false,
1055		.stop = false,
1056		.last = ovl_redirect_follow(ofs) ? false : !ovl_numlower(poe),
1057		.redirect = NULL,
1058		.metacopy = 0,
1059	};
1060
1061	if (dentry->d_name.len > ofs->namelen)
1062		return ERR_PTR(-ENAMETOOLONG);
1063
1064	old_cred = ovl_override_creds(dentry->d_sb);
1065	upperdir = ovl_dentry_upper(dentry->d_parent);
1066	if (upperdir) {
1067		d.layer = &ofs->layers[0];
1068		err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
1069		if (err)
1070			goto out;
1071
1072		if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) {
1073			dput(upperdentry);
1074			err = -EREMOTE;
1075			goto out;
1076		}
1077		if (upperdentry && !d.is_dir) {
1078			/*
1079			 * Lookup copy up origin by decoding origin file handle.
1080			 * We may get a disconnected dentry, which is fine,
1081			 * because we only need to hold the origin inode in
1082			 * cache and use its inode number.  We may even get a
1083			 * connected dentry, that is not under any of the lower
1084			 * layers root.  That is also fine for using it's inode
1085			 * number - it's the same as if we held a reference
1086			 * to a dentry in lower layer that was moved under us.
1087			 */
1088			err = ovl_check_origin(ofs, upperdentry, &origin_path);
1089			if (err)
1090				goto out_put_upper;
1091
1092			if (d.metacopy)
1093				uppermetacopy = true;
1094			metacopy_size = d.metacopy;
1095		}
1096
1097		if (d.redirect) {
1098			err = -ENOMEM;
1099			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
1100			if (!upperredirect)
1101				goto out_put_upper;
1102			if (d.redirect[0] == '/')
1103				poe = roe;
1104		}
1105		upperopaque = d.opaque;
1106	}
1107
1108	if (!d.stop && ovl_numlower(poe)) {
1109		err = -ENOMEM;
1110		stack = ovl_stack_alloc(ofs->numlayer - 1);
1111		if (!stack)
1112			goto out_put_upper;
1113	}
1114
1115	for (i = 0; !d.stop && i < ovl_numlower(poe); i++) {
1116		struct ovl_path lower = ovl_lowerstack(poe)[i];
1117
1118		if (!ovl_redirect_follow(ofs))
1119			d.last = i == ovl_numlower(poe) - 1;
1120		else if (d.is_dir || !ofs->numdatalayer)
1121			d.last = lower.layer->idx == ovl_numlower(roe);
1122
1123		d.layer = lower.layer;
1124		err = ovl_lookup_layer(lower.dentry, &d, &this, false);
1125		if (err)
1126			goto out_put;
1127
1128		if (!this)
1129			continue;
1130
1131		if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) {
1132			dput(this);
1133			err = -EPERM;
1134			pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
1135			goto out_put;
1136		}
1137
1138		/*
1139		 * If no origin fh is stored in upper of a merge dir, store fh
1140		 * of lower dir and set upper parent "impure".
1141		 */
1142		if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
1143			err = ovl_fix_origin(ofs, dentry, this, upperdentry);
1144			if (err) {
1145				dput(this);
1146				goto out_put;
1147			}
1148		}
1149
1150		/*
1151		 * When "verify_lower" feature is enabled, do not merge with a
1152		 * lower dir that does not match a stored origin xattr. In any
1153		 * case, only verified origin is used for index lookup.
1154		 *
1155		 * For non-dir dentry, if index=on, then ensure origin
1156		 * matches the dentry found using path based lookup,
1157		 * otherwise error out.
1158		 */
1159		if (upperdentry && !ctr &&
1160		    ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
1161		     (!d.is_dir && ofs->config.index && origin_path))) {
1162			err = ovl_verify_origin(ofs, upperdentry, this, false);
1163			if (err) {
1164				dput(this);
1165				if (d.is_dir)
1166					break;
1167				goto out_put;
1168			}
1169			origin = this;
1170		}
1171
1172		if (!upperdentry && !d.is_dir && !ctr && d.metacopy)
1173			metacopy_size = d.metacopy;
1174
1175		if (d.metacopy && ctr) {
1176			/*
1177			 * Do not store intermediate metacopy dentries in
1178			 * lower chain, except top most lower metacopy dentry.
1179			 * Continue the loop so that if there is an absolute
1180			 * redirect on this dentry, poe can be reset to roe.
1181			 */
1182			dput(this);
1183			this = NULL;
1184		} else {
1185			stack[ctr].dentry = this;
1186			stack[ctr].layer = lower.layer;
1187			ctr++;
1188		}
1189
1190		/*
1191		 * Following redirects can have security consequences: it's like
1192		 * a symlink into the lower layer without the permission checks.
1193		 * This is only a problem if the upper layer is untrusted (e.g
1194		 * comes from an USB drive).  This can allow a non-readable file
1195		 * or directory to become readable.
1196		 *
1197		 * Only following redirects when redirects are enabled disables
1198		 * this attack vector when not necessary.
1199		 */
1200		err = -EPERM;
1201		if (d.redirect && !ovl_redirect_follow(ofs)) {
1202			pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
1203					    dentry);
1204			goto out_put;
1205		}
1206
1207		if (d.stop)
1208			break;
1209
1210		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
1211			poe = roe;
1212			/* Find the current layer on the root dentry */
1213			i = lower.layer->idx - 1;
1214		}
1215	}
1216
1217	/* Defer lookup of lowerdata in data-only layers to first access */
1218	if (d.metacopy && ctr && ofs->numdatalayer && d.absolute_redirect) {
1219		d.metacopy = 0;
1220		ctr++;
1221	}
1222
1223	/*
1224	 * For regular non-metacopy upper dentries, there is no lower
1225	 * path based lookup, hence ctr will be zero. If a dentry is found
1226	 * using ORIGIN xattr on upper, install it in stack.
1227	 *
1228	 * For metacopy dentry, path based lookup will find lower dentries.
1229	 * Just make sure a corresponding data dentry has been found.
1230	 */
1231	if (d.metacopy || (uppermetacopy && !ctr)) {
1232		pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
1233				    dentry);
1234		err = -EIO;
1235		goto out_put;
1236	} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
1237		if (WARN_ON(stack != NULL)) {
1238			err = -EIO;
1239			goto out_put;
1240		}
1241		stack = origin_path;
1242		ctr = 1;
1243		origin = origin_path->dentry;
1244		origin_path = NULL;
1245	}
1246
1247	/*
1248	 * Always lookup index if there is no-upperdentry.
1249	 *
1250	 * For the case of upperdentry, we have set origin by now if it
1251	 * needed to be set. There are basically three cases.
1252	 *
1253	 * For directories, lookup index by lower inode and verify it matches
1254	 * upper inode. We only trust dir index if we verified that lower dir
1255	 * matches origin, otherwise dir index entries may be inconsistent
1256	 * and we ignore them.
1257	 *
1258	 * For regular upper, we already set origin if upper had ORIGIN
1259	 * xattr. There is no verification though as there is no path
1260	 * based dentry lookup in lower in this case.
1261	 *
1262	 * For metacopy upper, we set a verified origin already if index
1263	 * is enabled and if upper had an ORIGIN xattr.
1264	 *
1265	 */
1266	if (!upperdentry && ctr)
1267		origin = stack[0].dentry;
1268
1269	if (origin && ovl_indexdir(dentry->d_sb) &&
1270	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
1271		index = ovl_lookup_index(ofs, upperdentry, origin, true);
1272		if (IS_ERR(index)) {
1273			err = PTR_ERR(index);
1274			index = NULL;
1275			goto out_put;
1276		}
1277	}
1278
1279	if (ctr) {
1280		oe = ovl_alloc_entry(ctr);
1281		err = -ENOMEM;
1282		if (!oe)
1283			goto out_put;
1284
1285		ovl_stack_cpy(ovl_lowerstack(oe), stack, ctr);
1286	}
1287
1288	if (upperopaque)
1289		ovl_dentry_set_opaque(dentry);
1290	if (d.xwhiteouts)
1291		ovl_dentry_set_xwhiteouts(dentry);
1292
1293	if (upperdentry)
1294		ovl_dentry_set_upper_alias(dentry);
1295	else if (index) {
1296		struct path upperpath = {
1297			.dentry = upperdentry = dget(index),
1298			.mnt = ovl_upper_mnt(ofs),
1299		};
1300
1301		/*
1302		 * It's safe to assign upperredirect here: the previous
1303		 * assignment of happens only if upperdentry is non-NULL, and
1304		 * this one only if upperdentry is NULL.
1305		 */
1306		upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0);
1307		if (IS_ERR(upperredirect)) {
1308			err = PTR_ERR(upperredirect);
1309			upperredirect = NULL;
1310			goto out_free_oe;
1311		}
1312		err = ovl_check_metacopy_xattr(ofs, &upperpath, NULL);
1313		if (err < 0)
1314			goto out_free_oe;
1315		uppermetacopy = err;
1316		metacopy_size = err;
1317	}
1318
1319	if (upperdentry || ctr) {
1320		struct ovl_inode_params oip = {
1321			.upperdentry = upperdentry,
1322			.oe = oe,
1323			.index = index,
1324			.redirect = upperredirect,
1325		};
1326
1327		/* Store lowerdata redirect for lazy lookup */
1328		if (ctr > 1 && !d.is_dir && !stack[ctr - 1].dentry) {
1329			oip.lowerdata_redirect = d.redirect;
1330			d.redirect = NULL;
1331		}
1332		inode = ovl_get_inode(dentry->d_sb, &oip);
1333		err = PTR_ERR(inode);
1334		if (IS_ERR(inode))
1335			goto out_free_oe;
1336		if (upperdentry && !uppermetacopy)
1337			ovl_set_flag(OVL_UPPERDATA, inode);
1338
1339		if (metacopy_size > OVL_METACOPY_MIN_SIZE)
1340			ovl_set_flag(OVL_HAS_DIGEST, inode);
1341	}
1342
1343	ovl_dentry_init_reval(dentry, upperdentry, OVL_I_E(inode));
1344
1345	revert_creds(old_cred);
1346	if (origin_path) {
1347		dput(origin_path->dentry);
1348		kfree(origin_path);
1349	}
1350	dput(index);
1351	ovl_stack_free(stack, ctr);
1352	kfree(d.redirect);
1353	return d_splice_alias(inode, dentry);
1354
1355out_free_oe:
1356	ovl_free_entry(oe);
1357out_put:
1358	dput(index);
1359	ovl_stack_free(stack, ctr);
1360out_put_upper:
1361	if (origin_path) {
1362		dput(origin_path->dentry);
1363		kfree(origin_path);
1364	}
1365	dput(upperdentry);
1366	kfree(upperredirect);
1367out:
1368	kfree(d.redirect);
1369	revert_creds(old_cred);
1370	return ERR_PTR(err);
1371}
1372
1373bool ovl_lower_positive(struct dentry *dentry)
1374{
1375	struct ovl_entry *poe = OVL_E(dentry->d_parent);
1376	const struct qstr *name = &dentry->d_name;
1377	const struct cred *old_cred;
1378	unsigned int i;
1379	bool positive = false;
1380	bool done = false;
1381
1382	/*
1383	 * If dentry is negative, then lower is positive iff this is a
1384	 * whiteout.
1385	 */
1386	if (!dentry->d_inode)
1387		return ovl_dentry_is_opaque(dentry);
1388
1389	/* Negative upper -> positive lower */
1390	if (!ovl_dentry_upper(dentry))
1391		return true;
1392
1393	old_cred = ovl_override_creds(dentry->d_sb);
1394	/* Positive upper -> have to look up lower to see whether it exists */
1395	for (i = 0; !done && !positive && i < ovl_numlower(poe); i++) {
1396		struct dentry *this;
1397		struct ovl_path *parentpath = &ovl_lowerstack(poe)[i];
1398
1399		this = lookup_one_positive_unlocked(
1400				mnt_idmap(parentpath->layer->mnt),
1401				name->name, parentpath->dentry, name->len);
1402		if (IS_ERR(this)) {
1403			switch (PTR_ERR(this)) {
1404			case -ENOENT:
1405			case -ENAMETOOLONG:
1406				break;
1407
1408			default:
1409				/*
1410				 * Assume something is there, we just couldn't
1411				 * access it.
1412				 */
1413				positive = true;
1414				break;
1415			}
1416		} else {
1417			struct path path = {
1418				.dentry = this,
1419				.mnt = parentpath->layer->mnt,
1420			};
1421			positive = !ovl_path_is_whiteout(OVL_FS(dentry->d_sb), &path);
1422			done = true;
1423			dput(this);
1424		}
1425	}
1426	revert_creds(old_cred);
1427
1428	return positive;
1429}
1430