msg.c revision 078faac9
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/ipc/msg.c
4 * Copyright (C) 1992 Krishna Balasubramanian
5 *
6 * Removed all the remaining kerneld mess
7 * Catch the -EFAULT stuff properly
8 * Use GFP_KERNEL for messages as in 1.2
9 * Fixed up the unchecked user space derefs
10 * Copyright (C) 1998 Alan Cox & Andi Kleen
11 *
12 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
13 *
14 * mostly rewritten, threaded and wake-one semantics added
15 * MSGMAX limit removed, sysctl's added
16 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
17 *
18 * support for audit of ipc object properties and permission changes
19 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
20 *
21 * namespaces support
22 * OpenVZ, SWsoft Inc.
23 * Pavel Emelianov <xemul@openvz.org>
24 */
25
26#include <linux/capability.h>
27#include <linux/msg.h>
28#include <linux/spinlock.h>
29#include <linux/init.h>
30#include <linux/mm.h>
31#include <linux/proc_fs.h>
32#include <linux/list.h>
33#include <linux/security.h>
34#include <linux/sched/wake_q.h>
35#include <linux/syscalls.h>
36#include <linux/audit.h>
37#include <linux/seq_file.h>
38#include <linux/rwsem.h>
39#include <linux/nsproxy.h>
40#include <linux/ipc_namespace.h>
41
42#include <asm/current.h>
43#include <linux/uaccess.h>
44#include "util.h"
45
46/* one msg_receiver structure for each sleeping receiver */
47struct msg_receiver {
48	struct list_head	r_list;
49	struct task_struct	*r_tsk;
50
51	int			r_mode;
52	long			r_msgtype;
53	long			r_maxsize;
54
55	struct msg_msg		*r_msg;
56};
57
58/* one msg_sender for each sleeping sender */
59struct msg_sender {
60	struct list_head	list;
61	struct task_struct	*tsk;
62	size_t                  msgsz;
63};
64
65#define SEARCH_ANY		1
66#define SEARCH_EQUAL		2
67#define SEARCH_NOTEQUAL		3
68#define SEARCH_LESSEQUAL	4
69#define SEARCH_NUMBER		5
70
71#define msg_ids(ns)	((ns)->ids[IPC_MSG_IDS])
72
73static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id)
74{
75	struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&msg_ids(ns), id);
76
77	if (IS_ERR(ipcp))
78		return ERR_CAST(ipcp);
79
80	return container_of(ipcp, struct msg_queue, q_perm);
81}
82
83static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns,
84							int id)
85{
86	struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id);
87
88	if (IS_ERR(ipcp))
89		return ERR_CAST(ipcp);
90
91	return container_of(ipcp, struct msg_queue, q_perm);
92}
93
94static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
95{
96	ipc_rmid(&msg_ids(ns), &s->q_perm);
97}
98
99static void msg_rcu_free(struct rcu_head *head)
100{
101	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
102	struct msg_queue *msq = container_of(p, struct msg_queue, q_perm);
103
104	security_msg_queue_free(msq);
105	kvfree(msq);
106}
107
108/**
109 * newque - Create a new msg queue
110 * @ns: namespace
111 * @params: ptr to the structure that contains the key and msgflg
112 *
113 * Called with msg_ids.rwsem held (writer)
114 */
115static int newque(struct ipc_namespace *ns, struct ipc_params *params)
116{
117	struct msg_queue *msq;
118	int retval;
119	key_t key = params->key;
120	int msgflg = params->flg;
121
122	msq = kvmalloc(sizeof(*msq), GFP_KERNEL);
123	if (unlikely(!msq))
124		return -ENOMEM;
125
126	msq->q_perm.mode = msgflg & S_IRWXUGO;
127	msq->q_perm.key = key;
128
129	msq->q_perm.security = NULL;
130	retval = security_msg_queue_alloc(msq);
131	if (retval) {
132		kvfree(msq);
133		return retval;
134	}
135
136	msq->q_stime = msq->q_rtime = 0;
137	msq->q_ctime = ktime_get_real_seconds();
138	msq->q_cbytes = msq->q_qnum = 0;
139	msq->q_qbytes = ns->msg_ctlmnb;
140	msq->q_lspid = msq->q_lrpid = 0;
141	INIT_LIST_HEAD(&msq->q_messages);
142	INIT_LIST_HEAD(&msq->q_receivers);
143	INIT_LIST_HEAD(&msq->q_senders);
144
145	/* ipc_addid() locks msq upon success. */
146	retval = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
147	if (retval < 0) {
148		call_rcu(&msq->q_perm.rcu, msg_rcu_free);
149		return retval;
150	}
151
152	ipc_unlock_object(&msq->q_perm);
153	rcu_read_unlock();
154
155	return msq->q_perm.id;
156}
157
158static inline bool msg_fits_inqueue(struct msg_queue *msq, size_t msgsz)
159{
160	return msgsz + msq->q_cbytes <= msq->q_qbytes &&
161		1 + msq->q_qnum <= msq->q_qbytes;
162}
163
164static inline void ss_add(struct msg_queue *msq,
165			  struct msg_sender *mss, size_t msgsz)
166{
167	mss->tsk = current;
168	mss->msgsz = msgsz;
169	__set_current_state(TASK_INTERRUPTIBLE);
170	list_add_tail(&mss->list, &msq->q_senders);
171}
172
173static inline void ss_del(struct msg_sender *mss)
174{
175	if (mss->list.next)
176		list_del(&mss->list);
177}
178
179static void ss_wakeup(struct msg_queue *msq,
180		      struct wake_q_head *wake_q, bool kill)
181{
182	struct msg_sender *mss, *t;
183	struct task_struct *stop_tsk = NULL;
184	struct list_head *h = &msq->q_senders;
185
186	list_for_each_entry_safe(mss, t, h, list) {
187		if (kill)
188			mss->list.next = NULL;
189
190		/*
191		 * Stop at the first task we don't wakeup,
192		 * we've already iterated the original
193		 * sender queue.
194		 */
195		else if (stop_tsk == mss->tsk)
196			break;
197		/*
198		 * We are not in an EIDRM scenario here, therefore
199		 * verify that we really need to wakeup the task.
200		 * To maintain current semantics and wakeup order,
201		 * move the sender to the tail on behalf of the
202		 * blocked task.
203		 */
204		else if (!msg_fits_inqueue(msq, mss->msgsz)) {
205			if (!stop_tsk)
206				stop_tsk = mss->tsk;
207
208			list_move_tail(&mss->list, &msq->q_senders);
209			continue;
210		}
211
212		wake_q_add(wake_q, mss->tsk);
213	}
214}
215
216static void expunge_all(struct msg_queue *msq, int res,
217			struct wake_q_head *wake_q)
218{
219	struct msg_receiver *msr, *t;
220
221	list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
222		wake_q_add(wake_q, msr->r_tsk);
223		WRITE_ONCE(msr->r_msg, ERR_PTR(res));
224	}
225}
226
227/*
228 * freeque() wakes up waiters on the sender and receiver waiting queue,
229 * removes the message queue from message queue ID IDR, and cleans up all the
230 * messages associated with this queue.
231 *
232 * msg_ids.rwsem (writer) and the spinlock for this message queue are held
233 * before freeque() is called. msg_ids.rwsem remains locked on exit.
234 */
235static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
236{
237	struct msg_msg *msg, *t;
238	struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
239	DEFINE_WAKE_Q(wake_q);
240
241	expunge_all(msq, -EIDRM, &wake_q);
242	ss_wakeup(msq, &wake_q, true);
243	msg_rmid(ns, msq);
244	ipc_unlock_object(&msq->q_perm);
245	wake_up_q(&wake_q);
246	rcu_read_unlock();
247
248	list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
249		atomic_dec(&ns->msg_hdrs);
250		free_msg(msg);
251	}
252	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
253	ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
254}
255
256/*
257 * Called with msg_ids.rwsem and ipcp locked.
258 */
259static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
260{
261	struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
262
263	return security_msg_queue_associate(msq, msgflg);
264}
265
266long ksys_msgget(key_t key, int msgflg)
267{
268	struct ipc_namespace *ns;
269	static const struct ipc_ops msg_ops = {
270		.getnew = newque,
271		.associate = msg_security,
272	};
273	struct ipc_params msg_params;
274
275	ns = current->nsproxy->ipc_ns;
276
277	msg_params.key = key;
278	msg_params.flg = msgflg;
279
280	return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
281}
282
283SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)
284{
285	return ksys_msgget(key, msgflg);
286}
287
288static inline unsigned long
289copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
290{
291	switch (version) {
292	case IPC_64:
293		return copy_to_user(buf, in, sizeof(*in));
294	case IPC_OLD:
295	{
296		struct msqid_ds out;
297
298		memset(&out, 0, sizeof(out));
299
300		ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm);
301
302		out.msg_stime		= in->msg_stime;
303		out.msg_rtime		= in->msg_rtime;
304		out.msg_ctime		= in->msg_ctime;
305
306		if (in->msg_cbytes > USHRT_MAX)
307			out.msg_cbytes	= USHRT_MAX;
308		else
309			out.msg_cbytes	= in->msg_cbytes;
310		out.msg_lcbytes		= in->msg_cbytes;
311
312		if (in->msg_qnum > USHRT_MAX)
313			out.msg_qnum	= USHRT_MAX;
314		else
315			out.msg_qnum	= in->msg_qnum;
316
317		if (in->msg_qbytes > USHRT_MAX)
318			out.msg_qbytes	= USHRT_MAX;
319		else
320			out.msg_qbytes	= in->msg_qbytes;
321		out.msg_lqbytes		= in->msg_qbytes;
322
323		out.msg_lspid		= in->msg_lspid;
324		out.msg_lrpid		= in->msg_lrpid;
325
326		return copy_to_user(buf, &out, sizeof(out));
327	}
328	default:
329		return -EINVAL;
330	}
331}
332
333static inline unsigned long
334copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
335{
336	switch (version) {
337	case IPC_64:
338		if (copy_from_user(out, buf, sizeof(*out)))
339			return -EFAULT;
340		return 0;
341	case IPC_OLD:
342	{
343		struct msqid_ds tbuf_old;
344
345		if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
346			return -EFAULT;
347
348		out->msg_perm.uid	= tbuf_old.msg_perm.uid;
349		out->msg_perm.gid	= tbuf_old.msg_perm.gid;
350		out->msg_perm.mode	= tbuf_old.msg_perm.mode;
351
352		if (tbuf_old.msg_qbytes == 0)
353			out->msg_qbytes	= tbuf_old.msg_lqbytes;
354		else
355			out->msg_qbytes	= tbuf_old.msg_qbytes;
356
357		return 0;
358	}
359	default:
360		return -EINVAL;
361	}
362}
363
364/*
365 * This function handles some msgctl commands which require the rwsem
366 * to be held in write mode.
367 * NOTE: no locks must be held, the rwsem is taken inside this function.
368 */
369static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
370			struct msqid64_ds *msqid64)
371{
372	struct kern_ipc_perm *ipcp;
373	struct msg_queue *msq;
374	int err;
375
376	down_write(&msg_ids(ns).rwsem);
377	rcu_read_lock();
378
379	ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd,
380				      &msqid64->msg_perm, msqid64->msg_qbytes);
381	if (IS_ERR(ipcp)) {
382		err = PTR_ERR(ipcp);
383		goto out_unlock1;
384	}
385
386	msq = container_of(ipcp, struct msg_queue, q_perm);
387
388	err = security_msg_queue_msgctl(msq, cmd);
389	if (err)
390		goto out_unlock1;
391
392	switch (cmd) {
393	case IPC_RMID:
394		ipc_lock_object(&msq->q_perm);
395		/* freeque unlocks the ipc object and rcu */
396		freeque(ns, ipcp);
397		goto out_up;
398	case IPC_SET:
399	{
400		DEFINE_WAKE_Q(wake_q);
401
402		if (msqid64->msg_qbytes > ns->msg_ctlmnb &&
403		    !capable(CAP_SYS_RESOURCE)) {
404			err = -EPERM;
405			goto out_unlock1;
406		}
407
408		ipc_lock_object(&msq->q_perm);
409		err = ipc_update_perm(&msqid64->msg_perm, ipcp);
410		if (err)
411			goto out_unlock0;
412
413		msq->q_qbytes = msqid64->msg_qbytes;
414
415		msq->q_ctime = ktime_get_real_seconds();
416		/*
417		 * Sleeping receivers might be excluded by
418		 * stricter permissions.
419		 */
420		expunge_all(msq, -EAGAIN, &wake_q);
421		/*
422		 * Sleeping senders might be able to send
423		 * due to a larger queue size.
424		 */
425		ss_wakeup(msq, &wake_q, false);
426		ipc_unlock_object(&msq->q_perm);
427		wake_up_q(&wake_q);
428
429		goto out_unlock1;
430	}
431	default:
432		err = -EINVAL;
433		goto out_unlock1;
434	}
435
436out_unlock0:
437	ipc_unlock_object(&msq->q_perm);
438out_unlock1:
439	rcu_read_unlock();
440out_up:
441	up_write(&msg_ids(ns).rwsem);
442	return err;
443}
444
445static int msgctl_info(struct ipc_namespace *ns, int msqid,
446			 int cmd, struct msginfo *msginfo)
447{
448	int err;
449	int max_id;
450
451	/*
452	 * We must not return kernel stack data.
453	 * due to padding, it's not enough
454	 * to set all member fields.
455	 */
456	err = security_msg_queue_msgctl(NULL, cmd);
457	if (err)
458		return err;
459
460	memset(msginfo, 0, sizeof(*msginfo));
461	msginfo->msgmni = ns->msg_ctlmni;
462	msginfo->msgmax = ns->msg_ctlmax;
463	msginfo->msgmnb = ns->msg_ctlmnb;
464	msginfo->msgssz = MSGSSZ;
465	msginfo->msgseg = MSGSEG;
466	down_read(&msg_ids(ns).rwsem);
467	if (cmd == MSG_INFO) {
468		msginfo->msgpool = msg_ids(ns).in_use;
469		msginfo->msgmap = atomic_read(&ns->msg_hdrs);
470		msginfo->msgtql = atomic_read(&ns->msg_bytes);
471	} else {
472		msginfo->msgmap = MSGMAP;
473		msginfo->msgpool = MSGPOOL;
474		msginfo->msgtql = MSGTQL;
475	}
476	max_id = ipc_get_maxid(&msg_ids(ns));
477	up_read(&msg_ids(ns).rwsem);
478	return (max_id < 0) ? 0 : max_id;
479}
480
481static int msgctl_stat(struct ipc_namespace *ns, int msqid,
482			 int cmd, struct msqid64_ds *p)
483{
484	struct msg_queue *msq;
485	int id = 0;
486	int err;
487
488	memset(p, 0, sizeof(*p));
489
490	rcu_read_lock();
491	if (cmd == MSG_STAT) {
492		msq = msq_obtain_object(ns, msqid);
493		if (IS_ERR(msq)) {
494			err = PTR_ERR(msq);
495			goto out_unlock;
496		}
497		id = msq->q_perm.id;
498	} else {
499		msq = msq_obtain_object_check(ns, msqid);
500		if (IS_ERR(msq)) {
501			err = PTR_ERR(msq);
502			goto out_unlock;
503		}
504	}
505
506	err = -EACCES;
507	if (ipcperms(ns, &msq->q_perm, S_IRUGO))
508		goto out_unlock;
509
510	err = security_msg_queue_msgctl(msq, cmd);
511	if (err)
512		goto out_unlock;
513
514	ipc_lock_object(&msq->q_perm);
515
516	if (!ipc_valid_object(&msq->q_perm)) {
517		ipc_unlock_object(&msq->q_perm);
518		err = -EIDRM;
519		goto out_unlock;
520	}
521
522	kernel_to_ipc64_perm(&msq->q_perm, &p->msg_perm);
523	p->msg_stime  = msq->q_stime;
524	p->msg_rtime  = msq->q_rtime;
525	p->msg_ctime  = msq->q_ctime;
526	p->msg_cbytes = msq->q_cbytes;
527	p->msg_qnum   = msq->q_qnum;
528	p->msg_qbytes = msq->q_qbytes;
529	p->msg_lspid  = msq->q_lspid;
530	p->msg_lrpid  = msq->q_lrpid;
531
532	ipc_unlock_object(&msq->q_perm);
533	rcu_read_unlock();
534	return id;
535
536out_unlock:
537	rcu_read_unlock();
538	return err;
539}
540
541long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
542{
543	int version;
544	struct ipc_namespace *ns;
545	struct msqid64_ds msqid64;
546	int err;
547
548	if (msqid < 0 || cmd < 0)
549		return -EINVAL;
550
551	version = ipc_parse_version(&cmd);
552	ns = current->nsproxy->ipc_ns;
553
554	switch (cmd) {
555	case IPC_INFO:
556	case MSG_INFO: {
557		struct msginfo msginfo;
558		err = msgctl_info(ns, msqid, cmd, &msginfo);
559		if (err < 0)
560			return err;
561		if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
562			err = -EFAULT;
563		return err;
564	}
565	case MSG_STAT:	/* msqid is an index rather than a msg queue id */
566	case IPC_STAT:
567		err = msgctl_stat(ns, msqid, cmd, &msqid64);
568		if (err < 0)
569			return err;
570		if (copy_msqid_to_user(buf, &msqid64, version))
571			err = -EFAULT;
572		return err;
573	case IPC_SET:
574		if (copy_msqid_from_user(&msqid64, buf, version))
575			return -EFAULT;
576		/* fallthru */
577	case IPC_RMID:
578		return msgctl_down(ns, msqid, cmd, &msqid64);
579	default:
580		return  -EINVAL;
581	}
582}
583
584SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
585{
586	return ksys_msgctl(msqid, cmd, buf);
587}
588
589#ifdef CONFIG_COMPAT
590
591struct compat_msqid_ds {
592	struct compat_ipc_perm msg_perm;
593	compat_uptr_t msg_first;
594	compat_uptr_t msg_last;
595	compat_time_t msg_stime;
596	compat_time_t msg_rtime;
597	compat_time_t msg_ctime;
598	compat_ulong_t msg_lcbytes;
599	compat_ulong_t msg_lqbytes;
600	unsigned short msg_cbytes;
601	unsigned short msg_qnum;
602	unsigned short msg_qbytes;
603	compat_ipc_pid_t msg_lspid;
604	compat_ipc_pid_t msg_lrpid;
605};
606
607static int copy_compat_msqid_from_user(struct msqid64_ds *out, void __user *buf,
608					int version)
609{
610	memset(out, 0, sizeof(*out));
611	if (version == IPC_64) {
612		struct compat_msqid64_ds __user *p = buf;
613		if (get_compat_ipc64_perm(&out->msg_perm, &p->msg_perm))
614			return -EFAULT;
615		if (get_user(out->msg_qbytes, &p->msg_qbytes))
616			return -EFAULT;
617	} else {
618		struct compat_msqid_ds __user *p = buf;
619		if (get_compat_ipc_perm(&out->msg_perm, &p->msg_perm))
620			return -EFAULT;
621		if (get_user(out->msg_qbytes, &p->msg_qbytes))
622			return -EFAULT;
623	}
624	return 0;
625}
626
627static int copy_compat_msqid_to_user(void __user *buf, struct msqid64_ds *in,
628					int version)
629{
630	if (version == IPC_64) {
631		struct compat_msqid64_ds v;
632		memset(&v, 0, sizeof(v));
633		to_compat_ipc64_perm(&v.msg_perm, &in->msg_perm);
634		v.msg_stime = in->msg_stime;
635		v.msg_rtime = in->msg_rtime;
636		v.msg_ctime = in->msg_ctime;
637		v.msg_cbytes = in->msg_cbytes;
638		v.msg_qnum = in->msg_qnum;
639		v.msg_qbytes = in->msg_qbytes;
640		v.msg_lspid = in->msg_lspid;
641		v.msg_lrpid = in->msg_lrpid;
642		return copy_to_user(buf, &v, sizeof(v));
643	} else {
644		struct compat_msqid_ds v;
645		memset(&v, 0, sizeof(v));
646		to_compat_ipc_perm(&v.msg_perm, &in->msg_perm);
647		v.msg_stime = in->msg_stime;
648		v.msg_rtime = in->msg_rtime;
649		v.msg_ctime = in->msg_ctime;
650		v.msg_cbytes = in->msg_cbytes;
651		v.msg_qnum = in->msg_qnum;
652		v.msg_qbytes = in->msg_qbytes;
653		v.msg_lspid = in->msg_lspid;
654		v.msg_lrpid = in->msg_lrpid;
655		return copy_to_user(buf, &v, sizeof(v));
656	}
657}
658
659long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr)
660{
661	struct ipc_namespace *ns;
662	int err;
663	struct msqid64_ds msqid64;
664	int version = compat_ipc_parse_version(&cmd);
665
666	ns = current->nsproxy->ipc_ns;
667
668	if (msqid < 0 || cmd < 0)
669		return -EINVAL;
670
671	switch (cmd & (~IPC_64)) {
672	case IPC_INFO:
673	case MSG_INFO: {
674		struct msginfo msginfo;
675		err = msgctl_info(ns, msqid, cmd, &msginfo);
676		if (err < 0)
677			return err;
678		if (copy_to_user(uptr, &msginfo, sizeof(struct msginfo)))
679			err = -EFAULT;
680		return err;
681	}
682	case IPC_STAT:
683	case MSG_STAT:
684		err = msgctl_stat(ns, msqid, cmd, &msqid64);
685		if (err < 0)
686			return err;
687		if (copy_compat_msqid_to_user(uptr, &msqid64, version))
688			err = -EFAULT;
689		return err;
690	case IPC_SET:
691		if (copy_compat_msqid_from_user(&msqid64, uptr, version))
692			return -EFAULT;
693		/* fallthru */
694	case IPC_RMID:
695		return msgctl_down(ns, msqid, cmd, &msqid64);
696	default:
697		return -EINVAL;
698	}
699}
700
701COMPAT_SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, void __user *, uptr)
702{
703	return compat_ksys_msgctl(msqid, cmd, uptr);
704}
705#endif
706
707static int testmsg(struct msg_msg *msg, long type, int mode)
708{
709	switch (mode) {
710	case SEARCH_ANY:
711	case SEARCH_NUMBER:
712		return 1;
713	case SEARCH_LESSEQUAL:
714		if (msg->m_type <= type)
715			return 1;
716		break;
717	case SEARCH_EQUAL:
718		if (msg->m_type == type)
719			return 1;
720		break;
721	case SEARCH_NOTEQUAL:
722		if (msg->m_type != type)
723			return 1;
724		break;
725	}
726	return 0;
727}
728
729static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
730				 struct wake_q_head *wake_q)
731{
732	struct msg_receiver *msr, *t;
733
734	list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
735		if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
736		    !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
737					       msr->r_msgtype, msr->r_mode)) {
738
739			list_del(&msr->r_list);
740			if (msr->r_maxsize < msg->m_ts) {
741				wake_q_add(wake_q, msr->r_tsk);
742				WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG));
743			} else {
744				msq->q_lrpid = task_pid_vnr(msr->r_tsk);
745				msq->q_rtime = get_seconds();
746
747				wake_q_add(wake_q, msr->r_tsk);
748				WRITE_ONCE(msr->r_msg, msg);
749				return 1;
750			}
751		}
752	}
753
754	return 0;
755}
756
757static long do_msgsnd(int msqid, long mtype, void __user *mtext,
758		size_t msgsz, int msgflg)
759{
760	struct msg_queue *msq;
761	struct msg_msg *msg;
762	int err;
763	struct ipc_namespace *ns;
764	DEFINE_WAKE_Q(wake_q);
765
766	ns = current->nsproxy->ipc_ns;
767
768	if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
769		return -EINVAL;
770	if (mtype < 1)
771		return -EINVAL;
772
773	msg = load_msg(mtext, msgsz);
774	if (IS_ERR(msg))
775		return PTR_ERR(msg);
776
777	msg->m_type = mtype;
778	msg->m_ts = msgsz;
779
780	rcu_read_lock();
781	msq = msq_obtain_object_check(ns, msqid);
782	if (IS_ERR(msq)) {
783		err = PTR_ERR(msq);
784		goto out_unlock1;
785	}
786
787	ipc_lock_object(&msq->q_perm);
788
789	for (;;) {
790		struct msg_sender s;
791
792		err = -EACCES;
793		if (ipcperms(ns, &msq->q_perm, S_IWUGO))
794			goto out_unlock0;
795
796		/* raced with RMID? */
797		if (!ipc_valid_object(&msq->q_perm)) {
798			err = -EIDRM;
799			goto out_unlock0;
800		}
801
802		err = security_msg_queue_msgsnd(msq, msg, msgflg);
803		if (err)
804			goto out_unlock0;
805
806		if (msg_fits_inqueue(msq, msgsz))
807			break;
808
809		/* queue full, wait: */
810		if (msgflg & IPC_NOWAIT) {
811			err = -EAGAIN;
812			goto out_unlock0;
813		}
814
815		/* enqueue the sender and prepare to block */
816		ss_add(msq, &s, msgsz);
817
818		if (!ipc_rcu_getref(&msq->q_perm)) {
819			err = -EIDRM;
820			goto out_unlock0;
821		}
822
823		ipc_unlock_object(&msq->q_perm);
824		rcu_read_unlock();
825		schedule();
826
827		rcu_read_lock();
828		ipc_lock_object(&msq->q_perm);
829
830		ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
831		/* raced with RMID? */
832		if (!ipc_valid_object(&msq->q_perm)) {
833			err = -EIDRM;
834			goto out_unlock0;
835		}
836		ss_del(&s);
837
838		if (signal_pending(current)) {
839			err = -ERESTARTNOHAND;
840			goto out_unlock0;
841		}
842
843	}
844
845	msq->q_lspid = task_tgid_vnr(current);
846	msq->q_stime = get_seconds();
847
848	if (!pipelined_send(msq, msg, &wake_q)) {
849		/* no one is waiting for this message, enqueue it */
850		list_add_tail(&msg->m_list, &msq->q_messages);
851		msq->q_cbytes += msgsz;
852		msq->q_qnum++;
853		atomic_add(msgsz, &ns->msg_bytes);
854		atomic_inc(&ns->msg_hdrs);
855	}
856
857	err = 0;
858	msg = NULL;
859
860out_unlock0:
861	ipc_unlock_object(&msq->q_perm);
862	wake_up_q(&wake_q);
863out_unlock1:
864	rcu_read_unlock();
865	if (msg != NULL)
866		free_msg(msg);
867	return err;
868}
869
870SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
871		int, msgflg)
872{
873	long mtype;
874
875	if (get_user(mtype, &msgp->mtype))
876		return -EFAULT;
877	return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
878}
879
880#ifdef CONFIG_COMPAT
881
882struct compat_msgbuf {
883	compat_long_t mtype;
884	char mtext[1];
885};
886
887COMPAT_SYSCALL_DEFINE4(msgsnd, int, msqid, compat_uptr_t, msgp,
888		       compat_ssize_t, msgsz, int, msgflg)
889{
890	struct compat_msgbuf __user *up = compat_ptr(msgp);
891	compat_long_t mtype;
892
893	if (get_user(mtype, &up->mtype))
894		return -EFAULT;
895	return do_msgsnd(msqid, mtype, up->mtext, (ssize_t)msgsz, msgflg);
896}
897#endif
898
899static inline int convert_mode(long *msgtyp, int msgflg)
900{
901	if (msgflg & MSG_COPY)
902		return SEARCH_NUMBER;
903	/*
904	 *  find message of correct type.
905	 *  msgtyp = 0 => get first.
906	 *  msgtyp > 0 => get first message of matching type.
907	 *  msgtyp < 0 => get message with least type must be < abs(msgtype).
908	 */
909	if (*msgtyp == 0)
910		return SEARCH_ANY;
911	if (*msgtyp < 0) {
912		if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */
913			*msgtyp = LONG_MAX;
914		else
915			*msgtyp = -*msgtyp;
916		return SEARCH_LESSEQUAL;
917	}
918	if (msgflg & MSG_EXCEPT)
919		return SEARCH_NOTEQUAL;
920	return SEARCH_EQUAL;
921}
922
923static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
924{
925	struct msgbuf __user *msgp = dest;
926	size_t msgsz;
927
928	if (put_user(msg->m_type, &msgp->mtype))
929		return -EFAULT;
930
931	msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
932	if (store_msg(msgp->mtext, msg, msgsz))
933		return -EFAULT;
934	return msgsz;
935}
936
937#ifdef CONFIG_CHECKPOINT_RESTORE
938/*
939 * This function creates new kernel message structure, large enough to store
940 * bufsz message bytes.
941 */
942static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
943{
944	struct msg_msg *copy;
945
946	/*
947	 * Create dummy message to copy real message to.
948	 */
949	copy = load_msg(buf, bufsz);
950	if (!IS_ERR(copy))
951		copy->m_ts = bufsz;
952	return copy;
953}
954
955static inline void free_copy(struct msg_msg *copy)
956{
957	if (copy)
958		free_msg(copy);
959}
960#else
961static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
962{
963	return ERR_PTR(-ENOSYS);
964}
965
966static inline void free_copy(struct msg_msg *copy)
967{
968}
969#endif
970
971static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode)
972{
973	struct msg_msg *msg, *found = NULL;
974	long count = 0;
975
976	list_for_each_entry(msg, &msq->q_messages, m_list) {
977		if (testmsg(msg, *msgtyp, mode) &&
978		    !security_msg_queue_msgrcv(msq, msg, current,
979					       *msgtyp, mode)) {
980			if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
981				*msgtyp = msg->m_type - 1;
982				found = msg;
983			} else if (mode == SEARCH_NUMBER) {
984				if (*msgtyp == count)
985					return msg;
986			} else
987				return msg;
988			count++;
989		}
990	}
991
992	return found ?: ERR_PTR(-EAGAIN);
993}
994
995static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg,
996	       long (*msg_handler)(void __user *, struct msg_msg *, size_t))
997{
998	int mode;
999	struct msg_queue *msq;
1000	struct ipc_namespace *ns;
1001	struct msg_msg *msg, *copy = NULL;
1002	DEFINE_WAKE_Q(wake_q);
1003
1004	ns = current->nsproxy->ipc_ns;
1005
1006	if (msqid < 0 || (long) bufsz < 0)
1007		return -EINVAL;
1008
1009	if (msgflg & MSG_COPY) {
1010		if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT))
1011			return -EINVAL;
1012		copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
1013		if (IS_ERR(copy))
1014			return PTR_ERR(copy);
1015	}
1016	mode = convert_mode(&msgtyp, msgflg);
1017
1018	rcu_read_lock();
1019	msq = msq_obtain_object_check(ns, msqid);
1020	if (IS_ERR(msq)) {
1021		rcu_read_unlock();
1022		free_copy(copy);
1023		return PTR_ERR(msq);
1024	}
1025
1026	for (;;) {
1027		struct msg_receiver msr_d;
1028
1029		msg = ERR_PTR(-EACCES);
1030		if (ipcperms(ns, &msq->q_perm, S_IRUGO))
1031			goto out_unlock1;
1032
1033		ipc_lock_object(&msq->q_perm);
1034
1035		/* raced with RMID? */
1036		if (!ipc_valid_object(&msq->q_perm)) {
1037			msg = ERR_PTR(-EIDRM);
1038			goto out_unlock0;
1039		}
1040
1041		msg = find_msg(msq, &msgtyp, mode);
1042		if (!IS_ERR(msg)) {
1043			/*
1044			 * Found a suitable message.
1045			 * Unlink it from the queue.
1046			 */
1047			if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
1048				msg = ERR_PTR(-E2BIG);
1049				goto out_unlock0;
1050			}
1051			/*
1052			 * If we are copying, then do not unlink message and do
1053			 * not update queue parameters.
1054			 */
1055			if (msgflg & MSG_COPY) {
1056				msg = copy_msg(msg, copy);
1057				goto out_unlock0;
1058			}
1059
1060			list_del(&msg->m_list);
1061			msq->q_qnum--;
1062			msq->q_rtime = get_seconds();
1063			msq->q_lrpid = task_tgid_vnr(current);
1064			msq->q_cbytes -= msg->m_ts;
1065			atomic_sub(msg->m_ts, &ns->msg_bytes);
1066			atomic_dec(&ns->msg_hdrs);
1067			ss_wakeup(msq, &wake_q, false);
1068
1069			goto out_unlock0;
1070		}
1071
1072		/* No message waiting. Wait for a message */
1073		if (msgflg & IPC_NOWAIT) {
1074			msg = ERR_PTR(-ENOMSG);
1075			goto out_unlock0;
1076		}
1077
1078		list_add_tail(&msr_d.r_list, &msq->q_receivers);
1079		msr_d.r_tsk = current;
1080		msr_d.r_msgtype = msgtyp;
1081		msr_d.r_mode = mode;
1082		if (msgflg & MSG_NOERROR)
1083			msr_d.r_maxsize = INT_MAX;
1084		else
1085			msr_d.r_maxsize = bufsz;
1086		msr_d.r_msg = ERR_PTR(-EAGAIN);
1087		__set_current_state(TASK_INTERRUPTIBLE);
1088
1089		ipc_unlock_object(&msq->q_perm);
1090		rcu_read_unlock();
1091		schedule();
1092
1093		/*
1094		 * Lockless receive, part 1:
1095		 * We don't hold a reference to the queue and getting a
1096		 * reference would defeat the idea of a lockless operation,
1097		 * thus the code relies on rcu to guarantee the existence of
1098		 * msq:
1099		 * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
1100		 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
1101		 */
1102		rcu_read_lock();
1103
1104		/*
1105		 * Lockless receive, part 2:
1106		 * The work in pipelined_send() and expunge_all():
1107		 * - Set pointer to message
1108		 * - Queue the receiver task for later wakeup
1109		 * - Wake up the process after the lock is dropped.
1110		 *
1111		 * Should the process wake up before this wakeup (due to a
1112		 * signal) it will either see the message and continue ...
1113		 */
1114		msg = READ_ONCE(msr_d.r_msg);
1115		if (msg != ERR_PTR(-EAGAIN))
1116			goto out_unlock1;
1117
1118		 /*
1119		  * ... or see -EAGAIN, acquire the lock to check the message
1120		  * again.
1121		  */
1122		ipc_lock_object(&msq->q_perm);
1123
1124		msg = msr_d.r_msg;
1125		if (msg != ERR_PTR(-EAGAIN))
1126			goto out_unlock0;
1127
1128		list_del(&msr_d.r_list);
1129		if (signal_pending(current)) {
1130			msg = ERR_PTR(-ERESTARTNOHAND);
1131			goto out_unlock0;
1132		}
1133
1134		ipc_unlock_object(&msq->q_perm);
1135	}
1136
1137out_unlock0:
1138	ipc_unlock_object(&msq->q_perm);
1139	wake_up_q(&wake_q);
1140out_unlock1:
1141	rcu_read_unlock();
1142	if (IS_ERR(msg)) {
1143		free_copy(copy);
1144		return PTR_ERR(msg);
1145	}
1146
1147	bufsz = msg_handler(buf, msg, bufsz);
1148	free_msg(msg);
1149
1150	return bufsz;
1151}
1152
1153long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz,
1154		 long msgtyp, int msgflg)
1155{
1156	return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
1157}
1158
1159SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
1160		long, msgtyp, int, msgflg)
1161{
1162	return ksys_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg);
1163}
1164
1165#ifdef CONFIG_COMPAT
1166static long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
1167{
1168	struct compat_msgbuf __user *msgp = dest;
1169	size_t msgsz;
1170
1171	if (put_user(msg->m_type, &msgp->mtype))
1172		return -EFAULT;
1173
1174	msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
1175	if (store_msg(msgp->mtext, msg, msgsz))
1176		return -EFAULT;
1177	return msgsz;
1178}
1179
1180long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz,
1181			compat_long_t msgtyp, int msgflg)
1182{
1183	return do_msgrcv(msqid, compat_ptr(msgp), (ssize_t)msgsz, (long)msgtyp,
1184			 msgflg, compat_do_msg_fill);
1185}
1186
1187COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
1188		       compat_ssize_t, msgsz, compat_long_t, msgtyp,
1189		       int, msgflg)
1190{
1191	return compat_ksys_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg);
1192}
1193#endif
1194
1195int msg_init_ns(struct ipc_namespace *ns)
1196{
1197	ns->msg_ctlmax = MSGMAX;
1198	ns->msg_ctlmnb = MSGMNB;
1199	ns->msg_ctlmni = MSGMNI;
1200
1201	atomic_set(&ns->msg_bytes, 0);
1202	atomic_set(&ns->msg_hdrs, 0);
1203	return ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
1204}
1205
1206#ifdef CONFIG_IPC_NS
1207void msg_exit_ns(struct ipc_namespace *ns)
1208{
1209	free_ipcs(ns, &msg_ids(ns), freeque);
1210	idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
1211	rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
1212}
1213#endif
1214
1215#ifdef CONFIG_PROC_FS
1216static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
1217{
1218	struct user_namespace *user_ns = seq_user_ns(s);
1219	struct kern_ipc_perm *ipcp = it;
1220	struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
1221
1222	seq_printf(s,
1223		   "%10d %10d  %4o  %10lu %10lu %5u %5u %5u %5u %5u %5u %10llu %10llu %10llu\n",
1224		   msq->q_perm.key,
1225		   msq->q_perm.id,
1226		   msq->q_perm.mode,
1227		   msq->q_cbytes,
1228		   msq->q_qnum,
1229		   msq->q_lspid,
1230		   msq->q_lrpid,
1231		   from_kuid_munged(user_ns, msq->q_perm.uid),
1232		   from_kgid_munged(user_ns, msq->q_perm.gid),
1233		   from_kuid_munged(user_ns, msq->q_perm.cuid),
1234		   from_kgid_munged(user_ns, msq->q_perm.cgid),
1235		   msq->q_stime,
1236		   msq->q_rtime,
1237		   msq->q_ctime);
1238
1239	return 0;
1240}
1241#endif
1242
1243int __init msg_init(void)
1244{
1245	const int err = msg_init_ns(&init_ipc_ns);
1246
1247	ipc_init_proc_interface("sysvipc/msg",
1248				"       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n",
1249				IPC_MSG_IDS, sysvipc_msg_proc_show);
1250	return err;
1251}
1252