1// SPDX-License-Identifier: GPL-2.0
2/*
3 * (C) 2001 Clemson University and The University of Chicago
4 * Copyright 2018 Omnibond Systems, L.L.C.
5 *
6 * See COPYING in top-level directory.
7 */
8#include <linux/kernel.h>
9#include "protocol.h"
10#include "orangefs-kernel.h"
11#include "orangefs-dev-proto.h"
12#include "orangefs-bufmap.h"
13
14__s32 fsid_of_op(struct orangefs_kernel_op_s *op)
15{
16	__s32 fsid = ORANGEFS_FS_ID_NULL;
17
18	if (op) {
19		switch (op->upcall.type) {
20		case ORANGEFS_VFS_OP_FILE_IO:
21			fsid = op->upcall.req.io.refn.fs_id;
22			break;
23		case ORANGEFS_VFS_OP_LOOKUP:
24			fsid = op->upcall.req.lookup.parent_refn.fs_id;
25			break;
26		case ORANGEFS_VFS_OP_CREATE:
27			fsid = op->upcall.req.create.parent_refn.fs_id;
28			break;
29		case ORANGEFS_VFS_OP_GETATTR:
30			fsid = op->upcall.req.getattr.refn.fs_id;
31			break;
32		case ORANGEFS_VFS_OP_REMOVE:
33			fsid = op->upcall.req.remove.parent_refn.fs_id;
34			break;
35		case ORANGEFS_VFS_OP_MKDIR:
36			fsid = op->upcall.req.mkdir.parent_refn.fs_id;
37			break;
38		case ORANGEFS_VFS_OP_READDIR:
39			fsid = op->upcall.req.readdir.refn.fs_id;
40			break;
41		case ORANGEFS_VFS_OP_SETATTR:
42			fsid = op->upcall.req.setattr.refn.fs_id;
43			break;
44		case ORANGEFS_VFS_OP_SYMLINK:
45			fsid = op->upcall.req.sym.parent_refn.fs_id;
46			break;
47		case ORANGEFS_VFS_OP_RENAME:
48			fsid = op->upcall.req.rename.old_parent_refn.fs_id;
49			break;
50		case ORANGEFS_VFS_OP_STATFS:
51			fsid = op->upcall.req.statfs.fs_id;
52			break;
53		case ORANGEFS_VFS_OP_TRUNCATE:
54			fsid = op->upcall.req.truncate.refn.fs_id;
55			break;
56		case ORANGEFS_VFS_OP_RA_FLUSH:
57			fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
58			break;
59		case ORANGEFS_VFS_OP_FS_UMOUNT:
60			fsid = op->upcall.req.fs_umount.fs_id;
61			break;
62		case ORANGEFS_VFS_OP_GETXATTR:
63			fsid = op->upcall.req.getxattr.refn.fs_id;
64			break;
65		case ORANGEFS_VFS_OP_SETXATTR:
66			fsid = op->upcall.req.setxattr.refn.fs_id;
67			break;
68		case ORANGEFS_VFS_OP_LISTXATTR:
69			fsid = op->upcall.req.listxattr.refn.fs_id;
70			break;
71		case ORANGEFS_VFS_OP_REMOVEXATTR:
72			fsid = op->upcall.req.removexattr.refn.fs_id;
73			break;
74		case ORANGEFS_VFS_OP_FSYNC:
75			fsid = op->upcall.req.fsync.refn.fs_id;
76			break;
77		default:
78			break;
79		}
80	}
81	return fsid;
82}
83
84static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
85{
86	int flags = 0;
87	if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
88		flags |= S_IMMUTABLE;
89	else
90		flags &= ~S_IMMUTABLE;
91	if (attrs->flags & ORANGEFS_APPEND_FL)
92		flags |= S_APPEND;
93	else
94		flags &= ~S_APPEND;
95	if (attrs->flags & ORANGEFS_NOATIME_FL)
96		flags |= S_NOATIME;
97	else
98		flags &= ~S_NOATIME;
99	return flags;
100}
101
102static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
103{
104	int perm_mode = 0;
105
106	if (attrs->perms & ORANGEFS_O_EXECUTE)
107		perm_mode |= S_IXOTH;
108	if (attrs->perms & ORANGEFS_O_WRITE)
109		perm_mode |= S_IWOTH;
110	if (attrs->perms & ORANGEFS_O_READ)
111		perm_mode |= S_IROTH;
112
113	if (attrs->perms & ORANGEFS_G_EXECUTE)
114		perm_mode |= S_IXGRP;
115	if (attrs->perms & ORANGEFS_G_WRITE)
116		perm_mode |= S_IWGRP;
117	if (attrs->perms & ORANGEFS_G_READ)
118		perm_mode |= S_IRGRP;
119
120	if (attrs->perms & ORANGEFS_U_EXECUTE)
121		perm_mode |= S_IXUSR;
122	if (attrs->perms & ORANGEFS_U_WRITE)
123		perm_mode |= S_IWUSR;
124	if (attrs->perms & ORANGEFS_U_READ)
125		perm_mode |= S_IRUSR;
126
127	if (attrs->perms & ORANGEFS_G_SGID)
128		perm_mode |= S_ISGID;
129	if (attrs->perms & ORANGEFS_U_SUID)
130		perm_mode |= S_ISUID;
131
132	return perm_mode;
133}
134
135/*
136 * NOTE: in kernel land, we never use the sys_attr->link_target for
137 * anything, so don't bother copying it into the sys_attr object here.
138 */
139static inline void copy_attributes_from_inode(struct inode *inode,
140    struct ORANGEFS_sys_attr_s *attrs)
141{
142	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
143	attrs->mask = 0;
144	if (orangefs_inode->attr_valid & ATTR_UID) {
145		attrs->owner = from_kuid(&init_user_ns, inode->i_uid);
146		attrs->mask |= ORANGEFS_ATTR_SYS_UID;
147		gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
148	}
149	if (orangefs_inode->attr_valid & ATTR_GID) {
150		attrs->group = from_kgid(&init_user_ns, inode->i_gid);
151		attrs->mask |= ORANGEFS_ATTR_SYS_GID;
152		gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
153	}
154
155	if (orangefs_inode->attr_valid & ATTR_ATIME) {
156		attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
157		if (orangefs_inode->attr_valid & ATTR_ATIME_SET) {
158			attrs->atime = (time64_t) inode_get_atime_sec(inode);
159			attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
160		}
161	}
162	if (orangefs_inode->attr_valid & ATTR_MTIME) {
163		attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
164		if (orangefs_inode->attr_valid & ATTR_MTIME_SET) {
165			attrs->mtime = (time64_t) inode_get_mtime_sec(inode);
166			attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
167		}
168	}
169	if (orangefs_inode->attr_valid & ATTR_CTIME)
170		attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
171
172	/*
173	 * ORANGEFS cannot set size with a setattr operation. Probably not
174	 * likely to be requested through the VFS, but just in case, don't
175	 * worry about ATTR_SIZE
176	 */
177
178	if (orangefs_inode->attr_valid & ATTR_MODE) {
179		attrs->perms = ORANGEFS_util_translate_mode(inode->i_mode);
180		attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
181	}
182}
183
184static int orangefs_inode_type(enum orangefs_ds_type objtype)
185{
186	if (objtype == ORANGEFS_TYPE_METAFILE)
187		return S_IFREG;
188	else if (objtype == ORANGEFS_TYPE_DIRECTORY)
189		return S_IFDIR;
190	else if (objtype == ORANGEFS_TYPE_SYMLINK)
191		return S_IFLNK;
192	else
193		return -1;
194}
195
196static void orangefs_make_bad_inode(struct inode *inode)
197{
198	if (is_root_handle(inode)) {
199		/*
200		 * if this occurs, the pvfs2-client-core was killed but we
201		 * can't afford to lose the inode operations and such
202		 * associated with the root handle in any case.
203		 */
204		gossip_debug(GOSSIP_UTILS_DEBUG,
205			     "*** NOT making bad root inode %pU\n",
206			     get_khandle_from_ino(inode));
207	} else {
208		gossip_debug(GOSSIP_UTILS_DEBUG,
209			     "*** making bad inode %pU\n",
210			     get_khandle_from_ino(inode));
211		make_bad_inode(inode);
212	}
213}
214
215static int orangefs_inode_is_stale(struct inode *inode,
216    struct ORANGEFS_sys_attr_s *attrs, char *link_target)
217{
218	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
219	int type = orangefs_inode_type(attrs->objtype);
220	/*
221	 * If the inode type or symlink target have changed then this
222	 * inode is stale.
223	 */
224	if (type == -1 || inode_wrong_type(inode, type)) {
225		orangefs_make_bad_inode(inode);
226		return 1;
227	}
228	if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
229	    link_target, ORANGEFS_NAME_MAX)) {
230		orangefs_make_bad_inode(inode);
231		return 1;
232	}
233	return 0;
234}
235
236int orangefs_inode_getattr(struct inode *inode, int flags)
237{
238	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
239	struct orangefs_kernel_op_s *new_op;
240	loff_t inode_size;
241	int ret, type;
242
243	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU flags %d\n",
244	    __func__, get_khandle_from_ino(inode), flags);
245
246again:
247	spin_lock(&inode->i_lock);
248	/* Must have all the attributes in the mask and be within cache time. */
249	if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) ||
250	    orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) {
251		if (orangefs_inode->attr_valid) {
252			spin_unlock(&inode->i_lock);
253			write_inode_now(inode, 1);
254			goto again;
255		}
256		spin_unlock(&inode->i_lock);
257		return 0;
258	}
259	spin_unlock(&inode->i_lock);
260
261	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
262	if (!new_op)
263		return -ENOMEM;
264	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
265	/*
266	 * Size is the hardest attribute to get.  The incremental cost of any
267	 * other attribute is essentially zero.
268	 */
269	if (flags)
270		new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
271	else
272		new_op->upcall.req.getattr.mask =
273		    ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
274
275	ret = service_operation(new_op, __func__,
276	    get_interruptible_flag(inode));
277	if (ret != 0)
278		goto out;
279
280again2:
281	spin_lock(&inode->i_lock);
282	/* Must have all the attributes in the mask and be within cache time. */
283	if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) ||
284	    orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) {
285		if (orangefs_inode->attr_valid) {
286			spin_unlock(&inode->i_lock);
287			write_inode_now(inode, 1);
288			goto again2;
289		}
290		if (inode->i_state & I_DIRTY_PAGES) {
291			ret = 0;
292			goto out_unlock;
293		}
294		gossip_debug(GOSSIP_UTILS_DEBUG, "%s: in cache or dirty\n",
295		    __func__);
296		ret = 0;
297		goto out_unlock;
298	}
299
300	if (!(flags & ORANGEFS_GETATTR_NEW)) {
301		ret = orangefs_inode_is_stale(inode,
302		    &new_op->downcall.resp.getattr.attributes,
303		    new_op->downcall.resp.getattr.link_target);
304		if (ret) {
305			ret = -ESTALE;
306			goto out_unlock;
307		}
308	}
309
310	type = orangefs_inode_type(new_op->
311	    downcall.resp.getattr.attributes.objtype);
312	switch (type) {
313	case S_IFREG:
314		inode->i_flags = orangefs_inode_flags(&new_op->
315		    downcall.resp.getattr.attributes);
316		if (flags) {
317			inode_size = (loff_t)new_op->
318			    downcall.resp.getattr.attributes.size;
319			inode->i_size = inode_size;
320			inode->i_blkbits = ffs(new_op->downcall.resp.getattr.
321			    attributes.blksize);
322			inode->i_bytes = inode_size;
323			inode->i_blocks =
324			    (inode_size + 512 - inode_size % 512)/512;
325		}
326		break;
327	case S_IFDIR:
328		if (flags) {
329			inode->i_size = PAGE_SIZE;
330			inode_set_bytes(inode, inode->i_size);
331		}
332		set_nlink(inode, 1);
333		break;
334	case S_IFLNK:
335		if (flags & ORANGEFS_GETATTR_NEW) {
336			inode->i_size = (loff_t)strlen(new_op->
337			    downcall.resp.getattr.link_target);
338			ret = strscpy(orangefs_inode->link_target,
339			    new_op->downcall.resp.getattr.link_target,
340			    ORANGEFS_NAME_MAX);
341			if (ret == -E2BIG) {
342				ret = -EIO;
343				goto out_unlock;
344			}
345			inode->i_link = orangefs_inode->link_target;
346		}
347		break;
348	/* i.e. -1 */
349	default:
350		/* XXX: ESTALE?  This is what is done if it is not new. */
351		orangefs_make_bad_inode(inode);
352		ret = -ESTALE;
353		goto out_unlock;
354	}
355
356	inode->i_uid = make_kuid(&init_user_ns, new_op->
357	    downcall.resp.getattr.attributes.owner);
358	inode->i_gid = make_kgid(&init_user_ns, new_op->
359	    downcall.resp.getattr.attributes.group);
360	inode_set_atime(inode,
361			(time64_t)new_op->downcall.resp.getattr.attributes.atime,
362			0);
363	inode_set_mtime(inode,
364			(time64_t)new_op->downcall.resp.getattr.attributes.mtime,
365			0);
366	inode_set_ctime(inode,
367			(time64_t)new_op->downcall.resp.getattr.attributes.ctime,
368			0);
369
370	/* special case: mark the root inode as sticky */
371	inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
372	    orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
373
374	orangefs_inode->getattr_time = jiffies +
375	    orangefs_getattr_timeout_msecs*HZ/1000;
376	ret = 0;
377out_unlock:
378	spin_unlock(&inode->i_lock);
379out:
380	op_release(new_op);
381	return ret;
382}
383
384int orangefs_inode_check_changed(struct inode *inode)
385{
386	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
387	struct orangefs_kernel_op_s *new_op;
388	int ret;
389
390	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
391	    get_khandle_from_ino(inode));
392
393	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
394	if (!new_op)
395		return -ENOMEM;
396	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
397	new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
398	    ORANGEFS_ATTR_SYS_LNK_TARGET;
399
400	ret = service_operation(new_op, __func__,
401	    get_interruptible_flag(inode));
402	if (ret != 0)
403		goto out;
404
405	ret = orangefs_inode_is_stale(inode,
406	    &new_op->downcall.resp.getattr.attributes,
407	    new_op->downcall.resp.getattr.link_target);
408out:
409	op_release(new_op);
410	return ret;
411}
412
413/*
414 * issues a orangefs setattr request to make sure the new attribute values
415 * take effect if successful.  returns 0 on success; -errno otherwise
416 */
417int orangefs_inode_setattr(struct inode *inode)
418{
419	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
420	struct orangefs_kernel_op_s *new_op;
421	int ret;
422
423	new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
424	if (!new_op)
425		return -ENOMEM;
426
427	spin_lock(&inode->i_lock);
428	new_op->upcall.uid = from_kuid(&init_user_ns, orangefs_inode->attr_uid);
429	new_op->upcall.gid = from_kgid(&init_user_ns, orangefs_inode->attr_gid);
430	new_op->upcall.req.setattr.refn = orangefs_inode->refn;
431	copy_attributes_from_inode(inode,
432	    &new_op->upcall.req.setattr.attributes);
433	orangefs_inode->attr_valid = 0;
434	if (!new_op->upcall.req.setattr.attributes.mask) {
435		spin_unlock(&inode->i_lock);
436		op_release(new_op);
437		return 0;
438	}
439	spin_unlock(&inode->i_lock);
440
441	ret = service_operation(new_op, __func__,
442	    get_interruptible_flag(inode) | ORANGEFS_OP_WRITEBACK);
443	gossip_debug(GOSSIP_UTILS_DEBUG,
444	    "orangefs_inode_setattr: returning %d\n", ret);
445	if (ret)
446		orangefs_make_bad_inode(inode);
447
448	op_release(new_op);
449
450	if (ret == 0)
451		orangefs_inode->getattr_time = jiffies - 1;
452	return ret;
453}
454
455/*
456 * The following is a very dirty hack that is now a permanent part of the
457 * ORANGEFS protocol. See protocol.h for more error definitions.
458 */
459
460/* The order matches include/orangefs-types.h in the OrangeFS source. */
461static int PINT_errno_mapping[] = {
462	0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
463	EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
464	EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
465	ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
466	EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
467	EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
468	ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
469	EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
470	ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
471	EACCES, ECONNRESET, ERANGE
472};
473
474int orangefs_normalize_to_errno(__s32 error_code)
475{
476	__u32 i;
477
478	/* Success */
479	if (error_code == 0) {
480		return 0;
481	/*
482	 * This shouldn't ever happen. If it does it should be fixed on the
483	 * server.
484	 */
485	} else if (error_code > 0) {
486		gossip_err("orangefs: error status received.\n");
487		gossip_err("orangefs: assuming error code is inverted.\n");
488		error_code = -error_code;
489	}
490
491	/*
492	 * XXX: This is very bad since error codes from ORANGEFS may not be
493	 * suitable for return into userspace.
494	 */
495
496	/*
497	 * Convert ORANGEFS error values into errno values suitable for return
498	 * from the kernel.
499	 */
500	if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
501		if (((-error_code) &
502		    (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
503		    ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
504			/*
505			 * cancellation error codes generally correspond to
506			 * a timeout from the client's perspective
507			 */
508			error_code = -ETIMEDOUT;
509		} else {
510			/* assume a default error code */
511			gossip_err("%s: bad error code :%d:.\n",
512				__func__,
513				error_code);
514			error_code = -EINVAL;
515		}
516
517	/* Convert ORANGEFS encoded errno values into regular errno values. */
518	} else if ((-error_code) & ORANGEFS_ERROR_BIT) {
519		i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
520		if (i < ARRAY_SIZE(PINT_errno_mapping))
521			error_code = -PINT_errno_mapping[i];
522		else
523			error_code = -EINVAL;
524
525	/*
526	 * Only ORANGEFS protocol error codes should ever come here. Otherwise
527	 * there is a bug somewhere.
528	 */
529	} else {
530		gossip_err("%s: unknown error code.\n", __func__);
531		error_code = -EINVAL;
532	}
533	return error_code;
534}
535
536#define NUM_MODES 11
537__s32 ORANGEFS_util_translate_mode(int mode)
538{
539	int ret = 0;
540	int i = 0;
541	static int modes[NUM_MODES] = {
542		S_IXOTH, S_IWOTH, S_IROTH,
543		S_IXGRP, S_IWGRP, S_IRGRP,
544		S_IXUSR, S_IWUSR, S_IRUSR,
545		S_ISGID, S_ISUID
546	};
547	static int orangefs_modes[NUM_MODES] = {
548		ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
549		ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
550		ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
551		ORANGEFS_G_SGID, ORANGEFS_U_SUID
552	};
553
554	for (i = 0; i < NUM_MODES; i++)
555		if (mode & modes[i])
556			ret |= orangefs_modes[i];
557
558	return ret;
559}
560#undef NUM_MODES
561