msg.c revision 3d65661a
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/ipc/msg.c
4 * Copyright (C) 1992 Krishna Balasubramanian
5 *
6 * Removed all the remaining kerneld mess
7 * Catch the -EFAULT stuff properly
8 * Use GFP_KERNEL for messages as in 1.2
9 * Fixed up the unchecked user space derefs
10 * Copyright (C) 1998 Alan Cox & Andi Kleen
11 *
12 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
13 *
14 * mostly rewritten, threaded and wake-one semantics added
15 * MSGMAX limit removed, sysctl's added
16 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
17 *
18 * support for audit of ipc object properties and permission changes
19 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
20 *
21 * namespaces support
22 * OpenVZ, SWsoft Inc.
23 * Pavel Emelianov <xemul@openvz.org>
24 */
25
26#include <linux/capability.h>
27#include <linux/msg.h>
28#include <linux/spinlock.h>
29#include <linux/init.h>
30#include <linux/mm.h>
31#include <linux/proc_fs.h>
32#include <linux/list.h>
33#include <linux/security.h>
34#include <linux/sched/wake_q.h>
35#include <linux/syscalls.h>
36#include <linux/audit.h>
37#include <linux/seq_file.h>
38#include <linux/rwsem.h>
39#include <linux/nsproxy.h>
40#include <linux/ipc_namespace.h>
41
42#include <asm/current.h>
43#include <linux/uaccess.h>
44#include "util.h"
45
46/* one msg_receiver structure for each sleeping receiver */
47struct msg_receiver {
48	struct list_head	r_list;
49	struct task_struct	*r_tsk;
50
51	int			r_mode;
52	long			r_msgtype;
53	long			r_maxsize;
54
55	struct msg_msg		*r_msg;
56};
57
58/* one msg_sender for each sleeping sender */
59struct msg_sender {
60	struct list_head	list;
61	struct task_struct	*tsk;
62	size_t                  msgsz;
63};
64
65#define SEARCH_ANY		1
66#define SEARCH_EQUAL		2
67#define SEARCH_NOTEQUAL		3
68#define SEARCH_LESSEQUAL	4
69#define SEARCH_NUMBER		5
70
71#define msg_ids(ns)	((ns)->ids[IPC_MSG_IDS])
72
73static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id)
74{
75	struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&msg_ids(ns), id);
76
77	if (IS_ERR(ipcp))
78		return ERR_CAST(ipcp);
79
80	return container_of(ipcp, struct msg_queue, q_perm);
81}
82
83static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns,
84							int id)
85{
86	struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id);
87
88	if (IS_ERR(ipcp))
89		return ERR_CAST(ipcp);
90
91	return container_of(ipcp, struct msg_queue, q_perm);
92}
93
94static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
95{
96	ipc_rmid(&msg_ids(ns), &s->q_perm);
97}
98
99static void msg_rcu_free(struct rcu_head *head)
100{
101	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
102	struct msg_queue *msq = container_of(p, struct msg_queue, q_perm);
103
104	security_msg_queue_free(msq);
105	kvfree(msq);
106}
107
108/**
109 * newque - Create a new msg queue
110 * @ns: namespace
111 * @params: ptr to the structure that contains the key and msgflg
112 *
113 * Called with msg_ids.rwsem held (writer)
114 */
115static int newque(struct ipc_namespace *ns, struct ipc_params *params)
116{
117	struct msg_queue *msq;
118	int retval;
119	key_t key = params->key;
120	int msgflg = params->flg;
121
122	msq = kvmalloc(sizeof(*msq), GFP_KERNEL);
123	if (unlikely(!msq))
124		return -ENOMEM;
125
126	msq->q_perm.mode = msgflg & S_IRWXUGO;
127	msq->q_perm.key = key;
128
129	msq->q_perm.security = NULL;
130	retval = security_msg_queue_alloc(msq);
131	if (retval) {
132		kvfree(msq);
133		return retval;
134	}
135
136	msq->q_stime = msq->q_rtime = 0;
137	msq->q_ctime = ktime_get_real_seconds();
138	msq->q_cbytes = msq->q_qnum = 0;
139	msq->q_qbytes = ns->msg_ctlmnb;
140	msq->q_lspid = msq->q_lrpid = 0;
141	INIT_LIST_HEAD(&msq->q_messages);
142	INIT_LIST_HEAD(&msq->q_receivers);
143	INIT_LIST_HEAD(&msq->q_senders);
144
145	/* ipc_addid() locks msq upon success. */
146	retval = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
147	if (retval < 0) {
148		call_rcu(&msq->q_perm.rcu, msg_rcu_free);
149		return retval;
150	}
151
152	ipc_unlock_object(&msq->q_perm);
153	rcu_read_unlock();
154
155	return msq->q_perm.id;
156}
157
158static inline bool msg_fits_inqueue(struct msg_queue *msq, size_t msgsz)
159{
160	return msgsz + msq->q_cbytes <= msq->q_qbytes &&
161		1 + msq->q_qnum <= msq->q_qbytes;
162}
163
164static inline void ss_add(struct msg_queue *msq,
165			  struct msg_sender *mss, size_t msgsz)
166{
167	mss->tsk = current;
168	mss->msgsz = msgsz;
169	__set_current_state(TASK_INTERRUPTIBLE);
170	list_add_tail(&mss->list, &msq->q_senders);
171}
172
173static inline void ss_del(struct msg_sender *mss)
174{
175	if (mss->list.next)
176		list_del(&mss->list);
177}
178
179static void ss_wakeup(struct msg_queue *msq,
180		      struct wake_q_head *wake_q, bool kill)
181{
182	struct msg_sender *mss, *t;
183	struct task_struct *stop_tsk = NULL;
184	struct list_head *h = &msq->q_senders;
185
186	list_for_each_entry_safe(mss, t, h, list) {
187		if (kill)
188			mss->list.next = NULL;
189
190		/*
191		 * Stop at the first task we don't wakeup,
192		 * we've already iterated the original
193		 * sender queue.
194		 */
195		else if (stop_tsk == mss->tsk)
196			break;
197		/*
198		 * We are not in an EIDRM scenario here, therefore
199		 * verify that we really need to wakeup the task.
200		 * To maintain current semantics and wakeup order,
201		 * move the sender to the tail on behalf of the
202		 * blocked task.
203		 */
204		else if (!msg_fits_inqueue(msq, mss->msgsz)) {
205			if (!stop_tsk)
206				stop_tsk = mss->tsk;
207
208			list_move_tail(&mss->list, &msq->q_senders);
209			continue;
210		}
211
212		wake_q_add(wake_q, mss->tsk);
213	}
214}
215
216static void expunge_all(struct msg_queue *msq, int res,
217			struct wake_q_head *wake_q)
218{
219	struct msg_receiver *msr, *t;
220
221	list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
222		wake_q_add(wake_q, msr->r_tsk);
223		WRITE_ONCE(msr->r_msg, ERR_PTR(res));
224	}
225}
226
227/*
228 * freeque() wakes up waiters on the sender and receiver waiting queue,
229 * removes the message queue from message queue ID IDR, and cleans up all the
230 * messages associated with this queue.
231 *
232 * msg_ids.rwsem (writer) and the spinlock for this message queue are held
233 * before freeque() is called. msg_ids.rwsem remains locked on exit.
234 */
235static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
236{
237	struct msg_msg *msg, *t;
238	struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
239	DEFINE_WAKE_Q(wake_q);
240
241	expunge_all(msq, -EIDRM, &wake_q);
242	ss_wakeup(msq, &wake_q, true);
243	msg_rmid(ns, msq);
244	ipc_unlock_object(&msq->q_perm);
245	wake_up_q(&wake_q);
246	rcu_read_unlock();
247
248	list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
249		atomic_dec(&ns->msg_hdrs);
250		free_msg(msg);
251	}
252	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
253	ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
254}
255
256/*
257 * Called with msg_ids.rwsem and ipcp locked.
258 */
259static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
260{
261	struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
262
263	return security_msg_queue_associate(msq, msgflg);
264}
265
266long ksys_msgget(key_t key, int msgflg)
267{
268	struct ipc_namespace *ns;
269	static const struct ipc_ops msg_ops = {
270		.getnew = newque,
271		.associate = msg_security,
272	};
273	struct ipc_params msg_params;
274
275	ns = current->nsproxy->ipc_ns;
276
277	msg_params.key = key;
278	msg_params.flg = msgflg;
279
280	return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
281}
282
283SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)
284{
285	return ksys_msgget(key, msgflg);
286}
287
288static inline unsigned long
289copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
290{
291	switch (version) {
292	case IPC_64:
293		return copy_to_user(buf, in, sizeof(*in));
294	case IPC_OLD:
295	{
296		struct msqid_ds out;
297
298		memset(&out, 0, sizeof(out));
299
300		ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm);
301
302		out.msg_stime		= in->msg_stime;
303		out.msg_rtime		= in->msg_rtime;
304		out.msg_ctime		= in->msg_ctime;
305
306		if (in->msg_cbytes > USHRT_MAX)
307			out.msg_cbytes	= USHRT_MAX;
308		else
309			out.msg_cbytes	= in->msg_cbytes;
310		out.msg_lcbytes		= in->msg_cbytes;
311
312		if (in->msg_qnum > USHRT_MAX)
313			out.msg_qnum	= USHRT_MAX;
314		else
315			out.msg_qnum	= in->msg_qnum;
316
317		if (in->msg_qbytes > USHRT_MAX)
318			out.msg_qbytes	= USHRT_MAX;
319		else
320			out.msg_qbytes	= in->msg_qbytes;
321		out.msg_lqbytes		= in->msg_qbytes;
322
323		out.msg_lspid		= in->msg_lspid;
324		out.msg_lrpid		= in->msg_lrpid;
325
326		return copy_to_user(buf, &out, sizeof(out));
327	}
328	default:
329		return -EINVAL;
330	}
331}
332
333static inline unsigned long
334copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
335{
336	switch (version) {
337	case IPC_64:
338		if (copy_from_user(out, buf, sizeof(*out)))
339			return -EFAULT;
340		return 0;
341	case IPC_OLD:
342	{
343		struct msqid_ds tbuf_old;
344
345		if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
346			return -EFAULT;
347
348		out->msg_perm.uid	= tbuf_old.msg_perm.uid;
349		out->msg_perm.gid	= tbuf_old.msg_perm.gid;
350		out->msg_perm.mode	= tbuf_old.msg_perm.mode;
351
352		if (tbuf_old.msg_qbytes == 0)
353			out->msg_qbytes	= tbuf_old.msg_lqbytes;
354		else
355			out->msg_qbytes	= tbuf_old.msg_qbytes;
356
357		return 0;
358	}
359	default:
360		return -EINVAL;
361	}
362}
363
364/*
365 * This function handles some msgctl commands which require the rwsem
366 * to be held in write mode.
367 * NOTE: no locks must be held, the rwsem is taken inside this function.
368 */
369static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
370			struct msqid64_ds *msqid64)
371{
372	struct kern_ipc_perm *ipcp;
373	struct msg_queue *msq;
374	int err;
375
376	down_write(&msg_ids(ns).rwsem);
377	rcu_read_lock();
378
379	ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd,
380				      &msqid64->msg_perm, msqid64->msg_qbytes);
381	if (IS_ERR(ipcp)) {
382		err = PTR_ERR(ipcp);
383		goto out_unlock1;
384	}
385
386	msq = container_of(ipcp, struct msg_queue, q_perm);
387
388	err = security_msg_queue_msgctl(msq, cmd);
389	if (err)
390		goto out_unlock1;
391
392	switch (cmd) {
393	case IPC_RMID:
394		ipc_lock_object(&msq->q_perm);
395		/* freeque unlocks the ipc object and rcu */
396		freeque(ns, ipcp);
397		goto out_up;
398	case IPC_SET:
399	{
400		DEFINE_WAKE_Q(wake_q);
401
402		if (msqid64->msg_qbytes > ns->msg_ctlmnb &&
403		    !capable(CAP_SYS_RESOURCE)) {
404			err = -EPERM;
405			goto out_unlock1;
406		}
407
408		ipc_lock_object(&msq->q_perm);
409		err = ipc_update_perm(&msqid64->msg_perm, ipcp);
410		if (err)
411			goto out_unlock0;
412
413		msq->q_qbytes = msqid64->msg_qbytes;
414
415		msq->q_ctime = ktime_get_real_seconds();
416		/*
417		 * Sleeping receivers might be excluded by
418		 * stricter permissions.
419		 */
420		expunge_all(msq, -EAGAIN, &wake_q);
421		/*
422		 * Sleeping senders might be able to send
423		 * due to a larger queue size.
424		 */
425		ss_wakeup(msq, &wake_q, false);
426		ipc_unlock_object(&msq->q_perm);
427		wake_up_q(&wake_q);
428
429		goto out_unlock1;
430	}
431	default:
432		err = -EINVAL;
433		goto out_unlock1;
434	}
435
436out_unlock0:
437	ipc_unlock_object(&msq->q_perm);
438out_unlock1:
439	rcu_read_unlock();
440out_up:
441	up_write(&msg_ids(ns).rwsem);
442	return err;
443}
444
445static int msgctl_info(struct ipc_namespace *ns, int msqid,
446			 int cmd, struct msginfo *msginfo)
447{
448	int err;
449	int max_id;
450
451	/*
452	 * We must not return kernel stack data.
453	 * due to padding, it's not enough
454	 * to set all member fields.
455	 */
456	err = security_msg_queue_msgctl(NULL, cmd);
457	if (err)
458		return err;
459
460	memset(msginfo, 0, sizeof(*msginfo));
461	msginfo->msgmni = ns->msg_ctlmni;
462	msginfo->msgmax = ns->msg_ctlmax;
463	msginfo->msgmnb = ns->msg_ctlmnb;
464	msginfo->msgssz = MSGSSZ;
465	msginfo->msgseg = MSGSEG;
466	down_read(&msg_ids(ns).rwsem);
467	if (cmd == MSG_INFO) {
468		msginfo->msgpool = msg_ids(ns).in_use;
469		msginfo->msgmap = atomic_read(&ns->msg_hdrs);
470		msginfo->msgtql = atomic_read(&ns->msg_bytes);
471	} else {
472		msginfo->msgmap = MSGMAP;
473		msginfo->msgpool = MSGPOOL;
474		msginfo->msgtql = MSGTQL;
475	}
476	max_id = ipc_get_maxid(&msg_ids(ns));
477	up_read(&msg_ids(ns).rwsem);
478	return (max_id < 0) ? 0 : max_id;
479}
480
481static int msgctl_stat(struct ipc_namespace *ns, int msqid,
482			 int cmd, struct msqid64_ds *p)
483{
484	struct msg_queue *msq;
485	int id = 0;
486	int err;
487
488	memset(p, 0, sizeof(*p));
489
490	rcu_read_lock();
491	if (cmd == MSG_STAT) {
492		msq = msq_obtain_object(ns, msqid);
493		if (IS_ERR(msq)) {
494			err = PTR_ERR(msq);
495			goto out_unlock;
496		}
497		id = msq->q_perm.id;
498	} else {
499		msq = msq_obtain_object_check(ns, msqid);
500		if (IS_ERR(msq)) {
501			err = PTR_ERR(msq);
502			goto out_unlock;
503		}
504	}
505
506	err = -EACCES;
507	if (ipcperms(ns, &msq->q_perm, S_IRUGO))
508		goto out_unlock;
509
510	err = security_msg_queue_msgctl(msq, cmd);
511	if (err)
512		goto out_unlock;
513
514	ipc_lock_object(&msq->q_perm);
515
516	if (!ipc_valid_object(&msq->q_perm)) {
517		ipc_unlock_object(&msq->q_perm);
518		err = -EIDRM;
519		goto out_unlock;
520	}
521
522	kernel_to_ipc64_perm(&msq->q_perm, &p->msg_perm);
523	p->msg_stime  = msq->q_stime;
524	p->msg_rtime  = msq->q_rtime;
525	p->msg_ctime  = msq->q_ctime;
526	p->msg_cbytes = msq->q_cbytes;
527	p->msg_qnum   = msq->q_qnum;
528	p->msg_qbytes = msq->q_qbytes;
529	p->msg_lspid  = msq->q_lspid;
530	p->msg_lrpid  = msq->q_lrpid;
531
532	ipc_unlock_object(&msq->q_perm);
533	rcu_read_unlock();
534	return id;
535
536out_unlock:
537	rcu_read_unlock();
538	return err;
539}
540
541SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
542{
543	int version;
544	struct ipc_namespace *ns;
545	struct msqid64_ds msqid64;
546	int err;
547
548	if (msqid < 0 || cmd < 0)
549		return -EINVAL;
550
551	version = ipc_parse_version(&cmd);
552	ns = current->nsproxy->ipc_ns;
553
554	switch (cmd) {
555	case IPC_INFO:
556	case MSG_INFO: {
557		struct msginfo msginfo;
558		err = msgctl_info(ns, msqid, cmd, &msginfo);
559		if (err < 0)
560			return err;
561		if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
562			err = -EFAULT;
563		return err;
564	}
565	case MSG_STAT:	/* msqid is an index rather than a msg queue id */
566	case IPC_STAT:
567		err = msgctl_stat(ns, msqid, cmd, &msqid64);
568		if (err < 0)
569			return err;
570		if (copy_msqid_to_user(buf, &msqid64, version))
571			err = -EFAULT;
572		return err;
573	case IPC_SET:
574		if (copy_msqid_from_user(&msqid64, buf, version))
575			return -EFAULT;
576		/* fallthru */
577	case IPC_RMID:
578		return msgctl_down(ns, msqid, cmd, &msqid64);
579	default:
580		return  -EINVAL;
581	}
582}
583
584#ifdef CONFIG_COMPAT
585
586struct compat_msqid_ds {
587	struct compat_ipc_perm msg_perm;
588	compat_uptr_t msg_first;
589	compat_uptr_t msg_last;
590	compat_time_t msg_stime;
591	compat_time_t msg_rtime;
592	compat_time_t msg_ctime;
593	compat_ulong_t msg_lcbytes;
594	compat_ulong_t msg_lqbytes;
595	unsigned short msg_cbytes;
596	unsigned short msg_qnum;
597	unsigned short msg_qbytes;
598	compat_ipc_pid_t msg_lspid;
599	compat_ipc_pid_t msg_lrpid;
600};
601
602static int copy_compat_msqid_from_user(struct msqid64_ds *out, void __user *buf,
603					int version)
604{
605	memset(out, 0, sizeof(*out));
606	if (version == IPC_64) {
607		struct compat_msqid64_ds __user *p = buf;
608		if (get_compat_ipc64_perm(&out->msg_perm, &p->msg_perm))
609			return -EFAULT;
610		if (get_user(out->msg_qbytes, &p->msg_qbytes))
611			return -EFAULT;
612	} else {
613		struct compat_msqid_ds __user *p = buf;
614		if (get_compat_ipc_perm(&out->msg_perm, &p->msg_perm))
615			return -EFAULT;
616		if (get_user(out->msg_qbytes, &p->msg_qbytes))
617			return -EFAULT;
618	}
619	return 0;
620}
621
622static int copy_compat_msqid_to_user(void __user *buf, struct msqid64_ds *in,
623					int version)
624{
625	if (version == IPC_64) {
626		struct compat_msqid64_ds v;
627		memset(&v, 0, sizeof(v));
628		to_compat_ipc64_perm(&v.msg_perm, &in->msg_perm);
629		v.msg_stime = in->msg_stime;
630		v.msg_rtime = in->msg_rtime;
631		v.msg_ctime = in->msg_ctime;
632		v.msg_cbytes = in->msg_cbytes;
633		v.msg_qnum = in->msg_qnum;
634		v.msg_qbytes = in->msg_qbytes;
635		v.msg_lspid = in->msg_lspid;
636		v.msg_lrpid = in->msg_lrpid;
637		return copy_to_user(buf, &v, sizeof(v));
638	} else {
639		struct compat_msqid_ds v;
640		memset(&v, 0, sizeof(v));
641		to_compat_ipc_perm(&v.msg_perm, &in->msg_perm);
642		v.msg_stime = in->msg_stime;
643		v.msg_rtime = in->msg_rtime;
644		v.msg_ctime = in->msg_ctime;
645		v.msg_cbytes = in->msg_cbytes;
646		v.msg_qnum = in->msg_qnum;
647		v.msg_qbytes = in->msg_qbytes;
648		v.msg_lspid = in->msg_lspid;
649		v.msg_lrpid = in->msg_lrpid;
650		return copy_to_user(buf, &v, sizeof(v));
651	}
652}
653
654COMPAT_SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, void __user *, uptr)
655{
656	struct ipc_namespace *ns;
657	int err;
658	struct msqid64_ds msqid64;
659	int version = compat_ipc_parse_version(&cmd);
660
661	ns = current->nsproxy->ipc_ns;
662
663	if (msqid < 0 || cmd < 0)
664		return -EINVAL;
665
666	switch (cmd & (~IPC_64)) {
667	case IPC_INFO:
668	case MSG_INFO: {
669		struct msginfo msginfo;
670		err = msgctl_info(ns, msqid, cmd, &msginfo);
671		if (err < 0)
672			return err;
673		if (copy_to_user(uptr, &msginfo, sizeof(struct msginfo)))
674			err = -EFAULT;
675		return err;
676	}
677	case IPC_STAT:
678	case MSG_STAT:
679		err = msgctl_stat(ns, msqid, cmd, &msqid64);
680		if (err < 0)
681			return err;
682		if (copy_compat_msqid_to_user(uptr, &msqid64, version))
683			err = -EFAULT;
684		return err;
685	case IPC_SET:
686		if (copy_compat_msqid_from_user(&msqid64, uptr, version))
687			return -EFAULT;
688		/* fallthru */
689	case IPC_RMID:
690		return msgctl_down(ns, msqid, cmd, &msqid64);
691	default:
692		return -EINVAL;
693	}
694}
695#endif
696
697static int testmsg(struct msg_msg *msg, long type, int mode)
698{
699	switch (mode) {
700	case SEARCH_ANY:
701	case SEARCH_NUMBER:
702		return 1;
703	case SEARCH_LESSEQUAL:
704		if (msg->m_type <= type)
705			return 1;
706		break;
707	case SEARCH_EQUAL:
708		if (msg->m_type == type)
709			return 1;
710		break;
711	case SEARCH_NOTEQUAL:
712		if (msg->m_type != type)
713			return 1;
714		break;
715	}
716	return 0;
717}
718
719static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
720				 struct wake_q_head *wake_q)
721{
722	struct msg_receiver *msr, *t;
723
724	list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
725		if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
726		    !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
727					       msr->r_msgtype, msr->r_mode)) {
728
729			list_del(&msr->r_list);
730			if (msr->r_maxsize < msg->m_ts) {
731				wake_q_add(wake_q, msr->r_tsk);
732				WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG));
733			} else {
734				msq->q_lrpid = task_pid_vnr(msr->r_tsk);
735				msq->q_rtime = get_seconds();
736
737				wake_q_add(wake_q, msr->r_tsk);
738				WRITE_ONCE(msr->r_msg, msg);
739				return 1;
740			}
741		}
742	}
743
744	return 0;
745}
746
747static long do_msgsnd(int msqid, long mtype, void __user *mtext,
748		size_t msgsz, int msgflg)
749{
750	struct msg_queue *msq;
751	struct msg_msg *msg;
752	int err;
753	struct ipc_namespace *ns;
754	DEFINE_WAKE_Q(wake_q);
755
756	ns = current->nsproxy->ipc_ns;
757
758	if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
759		return -EINVAL;
760	if (mtype < 1)
761		return -EINVAL;
762
763	msg = load_msg(mtext, msgsz);
764	if (IS_ERR(msg))
765		return PTR_ERR(msg);
766
767	msg->m_type = mtype;
768	msg->m_ts = msgsz;
769
770	rcu_read_lock();
771	msq = msq_obtain_object_check(ns, msqid);
772	if (IS_ERR(msq)) {
773		err = PTR_ERR(msq);
774		goto out_unlock1;
775	}
776
777	ipc_lock_object(&msq->q_perm);
778
779	for (;;) {
780		struct msg_sender s;
781
782		err = -EACCES;
783		if (ipcperms(ns, &msq->q_perm, S_IWUGO))
784			goto out_unlock0;
785
786		/* raced with RMID? */
787		if (!ipc_valid_object(&msq->q_perm)) {
788			err = -EIDRM;
789			goto out_unlock0;
790		}
791
792		err = security_msg_queue_msgsnd(msq, msg, msgflg);
793		if (err)
794			goto out_unlock0;
795
796		if (msg_fits_inqueue(msq, msgsz))
797			break;
798
799		/* queue full, wait: */
800		if (msgflg & IPC_NOWAIT) {
801			err = -EAGAIN;
802			goto out_unlock0;
803		}
804
805		/* enqueue the sender and prepare to block */
806		ss_add(msq, &s, msgsz);
807
808		if (!ipc_rcu_getref(&msq->q_perm)) {
809			err = -EIDRM;
810			goto out_unlock0;
811		}
812
813		ipc_unlock_object(&msq->q_perm);
814		rcu_read_unlock();
815		schedule();
816
817		rcu_read_lock();
818		ipc_lock_object(&msq->q_perm);
819
820		ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
821		/* raced with RMID? */
822		if (!ipc_valid_object(&msq->q_perm)) {
823			err = -EIDRM;
824			goto out_unlock0;
825		}
826		ss_del(&s);
827
828		if (signal_pending(current)) {
829			err = -ERESTARTNOHAND;
830			goto out_unlock0;
831		}
832
833	}
834
835	msq->q_lspid = task_tgid_vnr(current);
836	msq->q_stime = get_seconds();
837
838	if (!pipelined_send(msq, msg, &wake_q)) {
839		/* no one is waiting for this message, enqueue it */
840		list_add_tail(&msg->m_list, &msq->q_messages);
841		msq->q_cbytes += msgsz;
842		msq->q_qnum++;
843		atomic_add(msgsz, &ns->msg_bytes);
844		atomic_inc(&ns->msg_hdrs);
845	}
846
847	err = 0;
848	msg = NULL;
849
850out_unlock0:
851	ipc_unlock_object(&msq->q_perm);
852	wake_up_q(&wake_q);
853out_unlock1:
854	rcu_read_unlock();
855	if (msg != NULL)
856		free_msg(msg);
857	return err;
858}
859
860SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
861		int, msgflg)
862{
863	long mtype;
864
865	if (get_user(mtype, &msgp->mtype))
866		return -EFAULT;
867	return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
868}
869
870#ifdef CONFIG_COMPAT
871
872struct compat_msgbuf {
873	compat_long_t mtype;
874	char mtext[1];
875};
876
877COMPAT_SYSCALL_DEFINE4(msgsnd, int, msqid, compat_uptr_t, msgp,
878		       compat_ssize_t, msgsz, int, msgflg)
879{
880	struct compat_msgbuf __user *up = compat_ptr(msgp);
881	compat_long_t mtype;
882
883	if (get_user(mtype, &up->mtype))
884		return -EFAULT;
885	return do_msgsnd(msqid, mtype, up->mtext, (ssize_t)msgsz, msgflg);
886}
887#endif
888
889static inline int convert_mode(long *msgtyp, int msgflg)
890{
891	if (msgflg & MSG_COPY)
892		return SEARCH_NUMBER;
893	/*
894	 *  find message of correct type.
895	 *  msgtyp = 0 => get first.
896	 *  msgtyp > 0 => get first message of matching type.
897	 *  msgtyp < 0 => get message with least type must be < abs(msgtype).
898	 */
899	if (*msgtyp == 0)
900		return SEARCH_ANY;
901	if (*msgtyp < 0) {
902		if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */
903			*msgtyp = LONG_MAX;
904		else
905			*msgtyp = -*msgtyp;
906		return SEARCH_LESSEQUAL;
907	}
908	if (msgflg & MSG_EXCEPT)
909		return SEARCH_NOTEQUAL;
910	return SEARCH_EQUAL;
911}
912
913static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
914{
915	struct msgbuf __user *msgp = dest;
916	size_t msgsz;
917
918	if (put_user(msg->m_type, &msgp->mtype))
919		return -EFAULT;
920
921	msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
922	if (store_msg(msgp->mtext, msg, msgsz))
923		return -EFAULT;
924	return msgsz;
925}
926
927#ifdef CONFIG_CHECKPOINT_RESTORE
928/*
929 * This function creates new kernel message structure, large enough to store
930 * bufsz message bytes.
931 */
932static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
933{
934	struct msg_msg *copy;
935
936	/*
937	 * Create dummy message to copy real message to.
938	 */
939	copy = load_msg(buf, bufsz);
940	if (!IS_ERR(copy))
941		copy->m_ts = bufsz;
942	return copy;
943}
944
945static inline void free_copy(struct msg_msg *copy)
946{
947	if (copy)
948		free_msg(copy);
949}
950#else
951static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
952{
953	return ERR_PTR(-ENOSYS);
954}
955
956static inline void free_copy(struct msg_msg *copy)
957{
958}
959#endif
960
961static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode)
962{
963	struct msg_msg *msg, *found = NULL;
964	long count = 0;
965
966	list_for_each_entry(msg, &msq->q_messages, m_list) {
967		if (testmsg(msg, *msgtyp, mode) &&
968		    !security_msg_queue_msgrcv(msq, msg, current,
969					       *msgtyp, mode)) {
970			if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
971				*msgtyp = msg->m_type - 1;
972				found = msg;
973			} else if (mode == SEARCH_NUMBER) {
974				if (*msgtyp == count)
975					return msg;
976			} else
977				return msg;
978			count++;
979		}
980	}
981
982	return found ?: ERR_PTR(-EAGAIN);
983}
984
985static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg,
986	       long (*msg_handler)(void __user *, struct msg_msg *, size_t))
987{
988	int mode;
989	struct msg_queue *msq;
990	struct ipc_namespace *ns;
991	struct msg_msg *msg, *copy = NULL;
992	DEFINE_WAKE_Q(wake_q);
993
994	ns = current->nsproxy->ipc_ns;
995
996	if (msqid < 0 || (long) bufsz < 0)
997		return -EINVAL;
998
999	if (msgflg & MSG_COPY) {
1000		if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT))
1001			return -EINVAL;
1002		copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
1003		if (IS_ERR(copy))
1004			return PTR_ERR(copy);
1005	}
1006	mode = convert_mode(&msgtyp, msgflg);
1007
1008	rcu_read_lock();
1009	msq = msq_obtain_object_check(ns, msqid);
1010	if (IS_ERR(msq)) {
1011		rcu_read_unlock();
1012		free_copy(copy);
1013		return PTR_ERR(msq);
1014	}
1015
1016	for (;;) {
1017		struct msg_receiver msr_d;
1018
1019		msg = ERR_PTR(-EACCES);
1020		if (ipcperms(ns, &msq->q_perm, S_IRUGO))
1021			goto out_unlock1;
1022
1023		ipc_lock_object(&msq->q_perm);
1024
1025		/* raced with RMID? */
1026		if (!ipc_valid_object(&msq->q_perm)) {
1027			msg = ERR_PTR(-EIDRM);
1028			goto out_unlock0;
1029		}
1030
1031		msg = find_msg(msq, &msgtyp, mode);
1032		if (!IS_ERR(msg)) {
1033			/*
1034			 * Found a suitable message.
1035			 * Unlink it from the queue.
1036			 */
1037			if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
1038				msg = ERR_PTR(-E2BIG);
1039				goto out_unlock0;
1040			}
1041			/*
1042			 * If we are copying, then do not unlink message and do
1043			 * not update queue parameters.
1044			 */
1045			if (msgflg & MSG_COPY) {
1046				msg = copy_msg(msg, copy);
1047				goto out_unlock0;
1048			}
1049
1050			list_del(&msg->m_list);
1051			msq->q_qnum--;
1052			msq->q_rtime = get_seconds();
1053			msq->q_lrpid = task_tgid_vnr(current);
1054			msq->q_cbytes -= msg->m_ts;
1055			atomic_sub(msg->m_ts, &ns->msg_bytes);
1056			atomic_dec(&ns->msg_hdrs);
1057			ss_wakeup(msq, &wake_q, false);
1058
1059			goto out_unlock0;
1060		}
1061
1062		/* No message waiting. Wait for a message */
1063		if (msgflg & IPC_NOWAIT) {
1064			msg = ERR_PTR(-ENOMSG);
1065			goto out_unlock0;
1066		}
1067
1068		list_add_tail(&msr_d.r_list, &msq->q_receivers);
1069		msr_d.r_tsk = current;
1070		msr_d.r_msgtype = msgtyp;
1071		msr_d.r_mode = mode;
1072		if (msgflg & MSG_NOERROR)
1073			msr_d.r_maxsize = INT_MAX;
1074		else
1075			msr_d.r_maxsize = bufsz;
1076		msr_d.r_msg = ERR_PTR(-EAGAIN);
1077		__set_current_state(TASK_INTERRUPTIBLE);
1078
1079		ipc_unlock_object(&msq->q_perm);
1080		rcu_read_unlock();
1081		schedule();
1082
1083		/*
1084		 * Lockless receive, part 1:
1085		 * We don't hold a reference to the queue and getting a
1086		 * reference would defeat the idea of a lockless operation,
1087		 * thus the code relies on rcu to guarantee the existence of
1088		 * msq:
1089		 * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
1090		 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
1091		 */
1092		rcu_read_lock();
1093
1094		/*
1095		 * Lockless receive, part 2:
1096		 * The work in pipelined_send() and expunge_all():
1097		 * - Set pointer to message
1098		 * - Queue the receiver task for later wakeup
1099		 * - Wake up the process after the lock is dropped.
1100		 *
1101		 * Should the process wake up before this wakeup (due to a
1102		 * signal) it will either see the message and continue ...
1103		 */
1104		msg = READ_ONCE(msr_d.r_msg);
1105		if (msg != ERR_PTR(-EAGAIN))
1106			goto out_unlock1;
1107
1108		 /*
1109		  * ... or see -EAGAIN, acquire the lock to check the message
1110		  * again.
1111		  */
1112		ipc_lock_object(&msq->q_perm);
1113
1114		msg = msr_d.r_msg;
1115		if (msg != ERR_PTR(-EAGAIN))
1116			goto out_unlock0;
1117
1118		list_del(&msr_d.r_list);
1119		if (signal_pending(current)) {
1120			msg = ERR_PTR(-ERESTARTNOHAND);
1121			goto out_unlock0;
1122		}
1123
1124		ipc_unlock_object(&msq->q_perm);
1125	}
1126
1127out_unlock0:
1128	ipc_unlock_object(&msq->q_perm);
1129	wake_up_q(&wake_q);
1130out_unlock1:
1131	rcu_read_unlock();
1132	if (IS_ERR(msg)) {
1133		free_copy(copy);
1134		return PTR_ERR(msg);
1135	}
1136
1137	bufsz = msg_handler(buf, msg, bufsz);
1138	free_msg(msg);
1139
1140	return bufsz;
1141}
1142
1143SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
1144		long, msgtyp, int, msgflg)
1145{
1146	return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
1147}
1148
1149#ifdef CONFIG_COMPAT
1150static long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
1151{
1152	struct compat_msgbuf __user *msgp = dest;
1153	size_t msgsz;
1154
1155	if (put_user(msg->m_type, &msgp->mtype))
1156		return -EFAULT;
1157
1158	msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
1159	if (store_msg(msgp->mtext, msg, msgsz))
1160		return -EFAULT;
1161	return msgsz;
1162}
1163
1164COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
1165		       compat_ssize_t, msgsz, compat_long_t, msgtyp, int, msgflg)
1166{
1167	return do_msgrcv(msqid, compat_ptr(msgp), (ssize_t)msgsz, (long)msgtyp,
1168			 msgflg, compat_do_msg_fill);
1169}
1170#endif
1171
1172int msg_init_ns(struct ipc_namespace *ns)
1173{
1174	ns->msg_ctlmax = MSGMAX;
1175	ns->msg_ctlmnb = MSGMNB;
1176	ns->msg_ctlmni = MSGMNI;
1177
1178	atomic_set(&ns->msg_bytes, 0);
1179	atomic_set(&ns->msg_hdrs, 0);
1180	return ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
1181}
1182
1183#ifdef CONFIG_IPC_NS
1184void msg_exit_ns(struct ipc_namespace *ns)
1185{
1186	free_ipcs(ns, &msg_ids(ns), freeque);
1187	idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
1188	rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
1189}
1190#endif
1191
1192#ifdef CONFIG_PROC_FS
1193static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
1194{
1195	struct user_namespace *user_ns = seq_user_ns(s);
1196	struct kern_ipc_perm *ipcp = it;
1197	struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
1198
1199	seq_printf(s,
1200		   "%10d %10d  %4o  %10lu %10lu %5u %5u %5u %5u %5u %5u %10llu %10llu %10llu\n",
1201		   msq->q_perm.key,
1202		   msq->q_perm.id,
1203		   msq->q_perm.mode,
1204		   msq->q_cbytes,
1205		   msq->q_qnum,
1206		   msq->q_lspid,
1207		   msq->q_lrpid,
1208		   from_kuid_munged(user_ns, msq->q_perm.uid),
1209		   from_kgid_munged(user_ns, msq->q_perm.gid),
1210		   from_kuid_munged(user_ns, msq->q_perm.cuid),
1211		   from_kgid_munged(user_ns, msq->q_perm.cgid),
1212		   msq->q_stime,
1213		   msq->q_rtime,
1214		   msq->q_ctime);
1215
1216	return 0;
1217}
1218#endif
1219
1220int __init msg_init(void)
1221{
1222	const int err = msg_init_ns(&init_ipc_ns);
1223
1224	ipc_init_proc_interface("sysvipc/msg",
1225				"       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n",
1226				IPC_MSG_IDS, sysvipc_msg_proc_show);
1227	return err;
1228}
1229