1b2441318SGreg Kroah-Hartman// SPDX-License-Identifier: GPL-2.0
21da177e4SLinus Torvalds/*
31da177e4SLinus Torvalds * linux/ipc/util.c
41da177e4SLinus Torvalds * Copyright (C) 1992 Krishna Balasubramanian
51da177e4SLinus Torvalds *
61da177e4SLinus Torvalds * Sep 1997 - Call suser() last after "normal" permission checks so we
71da177e4SLinus Torvalds *            get BSD style process accounting right.
81da177e4SLinus Torvalds *            Occurs in several places in the IPC code.
91da177e4SLinus Torvalds *            Chris Evans, <chris@ferret.lmh.ox.ac.uk>
101da177e4SLinus Torvalds * Nov 1999 - ipc helper functions, unified SMP locking
11624dffcbSChristian Kujau *	      Manfred Spraul <manfred@colorfullife.com>
121da177e4SLinus Torvalds * Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary().
131da177e4SLinus Torvalds *            Mingming Cao <cmm@us.ibm.com>
14073115d6SSteve Grubb * Mar 2006 - support for audit of ipc object properties
15073115d6SSteve Grubb *            Dustin Kirkland <dustin.kirkland@us.ibm.com>
1673ea4130SKirill Korotaev * Jun 2006 - namespaces ssupport
1773ea4130SKirill Korotaev *            OpenVZ, SWsoft Inc.
1873ea4130SKirill Korotaev *            Pavel Emelianov <xemul@openvz.org>
1905603c44SDavidlohr Bueso *
2005603c44SDavidlohr Bueso * General sysv ipc locking scheme:
2118ccee26SDavidlohr Bueso *	rcu_read_lock()
2218ccee26SDavidlohr Bueso *          obtain the ipc object (kern_ipc_perm) by looking up the id in an idr
2318ccee26SDavidlohr Bueso *	    tree.
2418ccee26SDavidlohr Bueso *	    - perform initial checks (capabilities, auditing and permission,
2518ccee26SDavidlohr Bueso *	      etc).
2687ad4b0dSPhilippe Mikoyan *	    - perform read-only operations, such as INFO command, that
2787ad4b0dSPhilippe Mikoyan *	      do not demand atomicity
2818ccee26SDavidlohr Bueso *	      acquire the ipc lock (kern_ipc_perm.lock) through
2918ccee26SDavidlohr Bueso *	      ipc_lock_object()
3087ad4b0dSPhilippe Mikoyan *		- perform read-only operations that demand atomicity,
3187ad4b0dSPhilippe Mikoyan *		  such as STAT command.
3218ccee26SDavidlohr Bueso *		- perform data updates, such as SET, RMID commands and
3318ccee26SDavidlohr Bueso *		  mechanism-specific operations (semop/semtimedop,
3418ccee26SDavidlohr Bueso *		  msgsnd/msgrcv, shmat/shmdt).
3518ccee26SDavidlohr Bueso *	    drop the ipc lock, through ipc_unlock_object().
3618ccee26SDavidlohr Bueso *	rcu_read_unlock()
3718ccee26SDavidlohr Bueso *
3818ccee26SDavidlohr Bueso *  The ids->rwsem must be taken when:
3918ccee26SDavidlohr Bueso *	- creating, removing and iterating the existing entries in ipc
4018ccee26SDavidlohr Bueso *	  identifier sets.
4118ccee26SDavidlohr Bueso *	- iterating through files under /proc/sysvipc/
4218ccee26SDavidlohr Bueso *
4318ccee26SDavidlohr Bueso *  Note that sems have a special fast path that avoids kern_ipc_perm.lock -
4418ccee26SDavidlohr Bueso *  see sem_lock().
451da177e4SLinus Torvalds */
461da177e4SLinus Torvalds
471da177e4SLinus Torvalds#include <linux/mm.h>
481da177e4SLinus Torvalds#include <linux/shm.h>
491da177e4SLinus Torvalds#include <linux/init.h>
501da177e4SLinus Torvalds#include <linux/msg.h>
511da177e4SLinus Torvalds#include <linux/vmalloc.h>
521da177e4SLinus Torvalds#include <linux/slab.h>
538f68fa2dSAndrew Morton#include <linux/notifier.h>
54c59ede7bSRandy Dunlap#include <linux/capability.h>
551da177e4SLinus Torvalds#include <linux/highuid.h>
561da177e4SLinus Torvalds#include <linux/security.h>
571da177e4SLinus Torvalds#include <linux/rcupdate.h>
581da177e4SLinus Torvalds#include <linux/workqueue.h>
59ae781774SMike Waychison#include <linux/seq_file.h>
60ae781774SMike Waychison#include <linux/proc_fs.h>
61073115d6SSteve Grubb#include <linux/audit.h>
6273ea4130SKirill Korotaev#include <linux/nsproxy.h>
633e148c79SNadia Derbey#include <linux/rwsem.h>
64b6b337adSNadia Derbey#include <linux/memory.h>
65ae5e1b22SPavel Emelyanov#include <linux/ipc_namespace.h>
660eb71a9dSNeilBrown#include <linux/rhashtable.h>
67b869d5beSManfred Spraul#include <linux/log2.h>
681da177e4SLinus Torvalds
691da177e4SLinus Torvalds#include <asm/unistd.h>
701da177e4SLinus Torvalds
711da177e4SLinus Torvalds#include "util.h"
721da177e4SLinus Torvalds
73ae781774SMike Waychisonstruct ipc_proc_iface {
74ae781774SMike Waychison	const char *path;
75ae781774SMike Waychison	const char *header;
7673ea4130SKirill Korotaev	int ids;
77ae781774SMike Waychison	int (*show)(struct seq_file *, void *);
78ae781774SMike Waychison};
79ae781774SMike Waychison
801da177e4SLinus Torvalds/**
818001c858SDavidlohr Bueso * ipc_init - initialise ipc subsystem
821da177e4SLinus Torvalds *
838001c858SDavidlohr Bueso * The various sysv ipc resources (semaphores, messages and shared
848001c858SDavidlohr Bueso * memory) are initialised.
858001c858SDavidlohr Bueso *
868001c858SDavidlohr Bueso * A callback routine is registered into the memory hotplug notifier
878001c858SDavidlohr Bueso * chain: since msgmni scales to lowmem this callback routine will be
888001c858SDavidlohr Bueso * called upon successful memory add / remove to recompute msmgni.
891da177e4SLinus Torvalds */
901da177e4SLinus Torvaldsstatic int __init ipc_init(void)
911da177e4SLinus Torvalds{
92e74a0effSAlexey Dobriyan	proc_mkdir("sysvipc", NULL);
93eae04d25SDavidlohr Bueso	sem_init();
94eae04d25SDavidlohr Bueso	msg_init();
951da177e4SLinus Torvalds	shm_init();
960cfb6aeeSGuillaume Knispel
97eae04d25SDavidlohr Bueso	return 0;
981da177e4SLinus Torvalds}
996d08a256SDavidlohr Buesodevice_initcall(ipc_init);
1001da177e4SLinus Torvalds
1010cfb6aeeSGuillaume Knispelstatic const struct rhashtable_params ipc_kht_params = {
1020cfb6aeeSGuillaume Knispel	.head_offset		= offsetof(struct kern_ipc_perm, khtnode),
1030cfb6aeeSGuillaume Knispel	.key_offset		= offsetof(struct kern_ipc_perm, key),
104c593642cSPankaj Bharadiya	.key_len		= sizeof_field(struct kern_ipc_perm, key),
1050cfb6aeeSGuillaume Knispel	.automatic_shrinking	= true,
1060cfb6aeeSGuillaume Knispel};
1070cfb6aeeSGuillaume Knispel
1081da177e4SLinus Torvalds/**
1098001c858SDavidlohr Bueso * ipc_init_ids	- initialise ipc identifiers
1108001c858SDavidlohr Bueso * @ids: ipc identifier set
1111da177e4SLinus Torvalds *
1128001c858SDavidlohr Bueso * Set up the sequence range to use for the ipc identifier range (limited
1135ac893b8SWaiman Long * below ipc_mni) then initialise the keys hashtable and ids idr.
1141da177e4SLinus Torvalds */
115eae04d25SDavidlohr Buesovoid ipc_init_ids(struct ipc_ids *ids)
1161da177e4SLinus Torvalds{
1171da177e4SLinus Torvalds	ids->in_use = 0;
1181da177e4SLinus Torvalds	ids->seq = 0;
119daf948c7SDavidlohr Bueso	init_rwsem(&ids->rwsem);
120eae04d25SDavidlohr Bueso	rhashtable_init(&ids->key_ht, &ipc_kht_params);
1217ca7e564SNadia Derbey	idr_init(&ids->ipcs_idr);
12227c331a1SManfred Spraul	ids->max_idx = -1;
1233278a2c2SManfred Spraul	ids->last_idx = -1;
124b8fd9983SDavidlohr Bueso#ifdef CONFIG_CHECKPOINT_RESTORE
125b8fd9983SDavidlohr Bueso	ids->next_id = -1;
126b8fd9983SDavidlohr Bueso#endif
1271da177e4SLinus Torvalds}
1281da177e4SLinus Torvalds
129ae781774SMike Waychison#ifdef CONFIG_PROC_FS
13097a32539SAlexey Dobriyanstatic const struct proc_ops sysvipc_proc_ops;
131ae781774SMike Waychison/**
1328001c858SDavidlohr Bueso * ipc_init_proc_interface -  create a proc interface for sysipc types using a seq_file interface.
1338001c858SDavidlohr Bueso * @path: Path in procfs
1348001c858SDavidlohr Bueso * @header: Banner to be printed at the beginning of the file.
1358001c858SDavidlohr Bueso * @ids: ipc id table to iterate.
1368001c858SDavidlohr Bueso * @show: show routine.
137ae781774SMike Waychison */
138ae781774SMike Waychisonvoid __init ipc_init_proc_interface(const char *path, const char *header,
13973ea4130SKirill Korotaev		int ids, int (*show)(struct seq_file *, void *))
140ae781774SMike Waychison{
141ae781774SMike Waychison	struct proc_dir_entry *pde;
142ae781774SMike Waychison	struct ipc_proc_iface *iface;
143ae781774SMike Waychison
144ae781774SMike Waychison	iface = kmalloc(sizeof(*iface), GFP_KERNEL);
145ae781774SMike Waychison	if (!iface)
146ae781774SMike Waychison		return;
147ae781774SMike Waychison	iface->path	= path;
148ae781774SMike Waychison	iface->header	= header;
149ae781774SMike Waychison	iface->ids	= ids;
150ae781774SMike Waychison	iface->show	= show;
151ae781774SMike Waychison
1526a6375dbSDenis V. Lunev	pde = proc_create_data(path,
1536a6375dbSDenis V. Lunev			       S_IRUGO,        /* world readable */
1546a6375dbSDenis V. Lunev			       NULL,           /* parent dir */
15597a32539SAlexey Dobriyan			       &sysvipc_proc_ops,
1566a6375dbSDenis V. Lunev			       iface);
1573ab08fe2SDavidlohr Bueso	if (!pde)
158ae781774SMike Waychison		kfree(iface);
159ae781774SMike Waychison}
160ae781774SMike Waychison#endif
161ae781774SMike Waychison
1621da177e4SLinus Torvalds/**
1638001c858SDavidlohr Bueso * ipc_findkey	- find a key in an ipc identifier set
1648001c858SDavidlohr Bueso * @ids: ipc identifier set
1658001c858SDavidlohr Bueso * @key: key to find
16646c0a8caSPaul McQuade *
1678001c858SDavidlohr Bueso * Returns the locked pointer to the ipc structure if found or NULL
1688001c858SDavidlohr Bueso * otherwise. If key is found ipc points to the owning ipc structure
1698001c858SDavidlohr Bueso *
1700cfb6aeeSGuillaume Knispel * Called with writer ipc_ids.rwsem held.
1711da177e4SLinus Torvalds */
1727748dbfaSNadia Derbeystatic struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
1731da177e4SLinus Torvalds{
174dc2c8c84SDavidlohr Bueso	struct kern_ipc_perm *ipcp;
1757ca7e564SNadia Derbey
176dc2c8c84SDavidlohr Bueso	ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
1770cfb6aeeSGuillaume Knispel					      ipc_kht_params);
178dc2c8c84SDavidlohr Bueso	if (!ipcp)
179dc2c8c84SDavidlohr Bueso		return NULL;
1807ca7e564SNadia Derbey
181dc2c8c84SDavidlohr Bueso	rcu_read_lock();
182dc2c8c84SDavidlohr Bueso	ipc_lock_object(ipcp);
183dc2c8c84SDavidlohr Bueso	return ipcp;
1841da177e4SLinus Torvalds}
1851da177e4SLinus Torvalds
186b8fd9983SDavidlohr Bueso/*
187e2652ae6SManfred Spraul * Insert new IPC object into idr tree, and set sequence number and id
188e2652ae6SManfred Spraul * in the correct order.
189e2652ae6SManfred Spraul * Especially:
190e2652ae6SManfred Spraul * - the sequence number must be set before inserting the object into the idr,
191e2652ae6SManfred Spraul *   because the sequence number is accessed without a lock.
192e2652ae6SManfred Spraul * - the id can/must be set after inserting the object into the idr.
193e2652ae6SManfred Spraul *   All accesses must be done after getting kern_ipc_perm.lock.
194e2652ae6SManfred Spraul *
195e2652ae6SManfred Spraul * The caller must own kern_ipc_perm.lock.of the new object.
196e2652ae6SManfred Spraul * On error, the function returns a (negative) error code.
1973278a2c2SManfred Spraul *
1983278a2c2SManfred Spraul * To conserve sequence number space, especially with extended ipc_mni,
1993278a2c2SManfred Spraul * the sequence number is incremented only when the returned ID is less than
2003278a2c2SManfred Spraul * the last one.
201b8fd9983SDavidlohr Bueso */
202e2652ae6SManfred Spraulstatic inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
203b8fd9983SDavidlohr Bueso{
204e2652ae6SManfred Spraul	int idx, next_id = -1;
205e2652ae6SManfred Spraul
206e2652ae6SManfred Spraul#ifdef CONFIG_CHECKPOINT_RESTORE
207e2652ae6SManfred Spraul	next_id = ids->next_id;
208e2652ae6SManfred Spraul	ids->next_id = -1;
209e2652ae6SManfred Spraul#endif
210e2652ae6SManfred Spraul
211e2652ae6SManfred Spraul	/*
212e2652ae6SManfred Spraul	 * As soon as a new object is inserted into the idr,
213e2652ae6SManfred Spraul	 * ipc_obtain_object_idr() or ipc_obtain_object_check() can find it,
214e2652ae6SManfred Spraul	 * and the lockless preparations for ipc operations can start.
215e2652ae6SManfred Spraul	 * This means especially: permission checks, audit calls, allocation
216e2652ae6SManfred Spraul	 * of undo structures, ...
217e2652ae6SManfred Spraul	 *
218e2652ae6SManfred Spraul	 * Thus the object must be fully initialized, and if something fails,
219e2652ae6SManfred Spraul	 * then the full tear-down sequence must be followed.
220e2652ae6SManfred Spraul	 * (i.e.: set new->deleted, reduce refcount, call_rcu())
221e2652ae6SManfred Spraul	 */
222e2652ae6SManfred Spraul
223e2652ae6SManfred Spraul	if (next_id < 0) { /* !CHECKPOINT_RESTORE or next_id is unset */
22499db46eaSManfred Spraul		int max_idx;
22599db46eaSManfred Spraul
22699db46eaSManfred Spraul		max_idx = max(ids->in_use*3/2, ipc_min_cycle);
22799db46eaSManfred Spraul		max_idx = min(max_idx, ipc_mni);
2283278a2c2SManfred Spraul
2293278a2c2SManfred Spraul		/* allocate the idx, with a NULL struct kern_ipc_perm */
23099db46eaSManfred Spraul		idx = idr_alloc_cyclic(&ids->ipcs_idr, NULL, 0, max_idx,
23199db46eaSManfred Spraul					GFP_NOWAIT);
2323278a2c2SManfred Spraul
2333278a2c2SManfred Spraul		if (idx >= 0) {
2343278a2c2SManfred Spraul			/*
2353278a2c2SManfred Spraul			 * idx got allocated successfully.
2363278a2c2SManfred Spraul			 * Now calculate the sequence number and set the
2373278a2c2SManfred Spraul			 * pointer for real.
2383278a2c2SManfred Spraul			 */
2393278a2c2SManfred Spraul			if (idx <= ids->last_idx) {
2403278a2c2SManfred Spraul				ids->seq++;
2413278a2c2SManfred Spraul				if (ids->seq >= ipcid_seq_max())
2423278a2c2SManfred Spraul					ids->seq = 0;
2433278a2c2SManfred Spraul			}
2443278a2c2SManfred Spraul			ids->last_idx = idx;
2453278a2c2SManfred Spraul
2463278a2c2SManfred Spraul			new->seq = ids->seq;
2473278a2c2SManfred Spraul			/* no need for smp_wmb(), this is done
2483278a2c2SManfred Spraul			 * inside idr_replace, as part of
2493278a2c2SManfred Spraul			 * rcu_assign_pointer
2503278a2c2SManfred Spraul			 */
2513278a2c2SManfred Spraul			idr_replace(&ids->ipcs_idr, new, idx);
2523278a2c2SManfred Spraul		}
253b8fd9983SDavidlohr Bueso	} else {
254e2652ae6SManfred Spraul		new->seq = ipcid_to_seqx(next_id);
255e2652ae6SManfred Spraul		idx = idr_alloc(&ids->ipcs_idr, new, ipcid_to_idx(next_id),
256e2652ae6SManfred Spraul				0, GFP_NOWAIT);
257b8fd9983SDavidlohr Bueso	}
258e2652ae6SManfred Spraul	if (idx >= 0)
2593278a2c2SManfred Spraul		new->id = (new->seq << ipcmni_seq_shift()) + idx;
260e2652ae6SManfred Spraul	return idx;
261b8fd9983SDavidlohr Bueso}
262b8fd9983SDavidlohr Bueso
2631da177e4SLinus Torvalds/**
2648001c858SDavidlohr Bueso * ipc_addid - add an ipc identifier
2658001c858SDavidlohr Bueso * @ids: ipc identifier set
2668001c858SDavidlohr Bueso * @new: new ipc permission set
267ebf66799SDavidlohr Bueso * @limit: limit for the number of used ids
2681da177e4SLinus Torvalds *
2698001c858SDavidlohr Bueso * Add an entry 'new' to the ipc ids idr. The permissions object is
27027c331a1SManfred Spraul * initialised and the first free entry is set up and the index assigned
2718001c858SDavidlohr Bueso * is returned. The 'new' entry is returned in a locked state on success.
27239cfffd7SManfred Spraul *
2738001c858SDavidlohr Bueso * On failure the entry is not locked and a negative err-code is returned.
27439cfffd7SManfred Spraul * The caller must use ipc_rcu_putref() to free the identifier.
2751da177e4SLinus Torvalds *
2768001c858SDavidlohr Bueso * Called with writer ipc_ids.rwsem held.
2771da177e4SLinus Torvalds */
278ebf66799SDavidlohr Buesoint ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
2791da177e4SLinus Torvalds{
2801efdb69bSEric W. Biederman	kuid_t euid;
2811efdb69bSEric W. Biederman	kgid_t egid;
282e2652ae6SManfred Spraul	int idx, err;
2831da177e4SLinus Torvalds
28439cfffd7SManfred Spraul	/* 1) Initialize the refcount so that ipc_rcu_putref works */
28539cfffd7SManfred Spraul	refcount_set(&new->refcount, 1);
28639cfffd7SManfred Spraul
2875ac893b8SWaiman Long	if (limit > ipc_mni)
2885ac893b8SWaiman Long		limit = ipc_mni;
2897ca7e564SNadia Derbey
290dc2c8c84SDavidlohr Bueso	if (ids->in_use >= limit)
291283bb7faSPierre Peiffer		return -ENOSPC;
2927ca7e564SNadia Derbey
29354924ea3STejun Heo	idr_preload(GFP_KERNEL);
29454924ea3STejun Heo
295e00b4ff7SNadia Derbey	spin_lock_init(&new->lock);
296e00b4ff7SNadia Derbey	rcu_read_lock();
297e00b4ff7SNadia Derbey	spin_lock(&new->lock);
298e00b4ff7SNadia Derbey
299b9a53227SLinus Torvalds	current_euid_egid(&euid, &egid);
300b9a53227SLinus Torvalds	new->cuid = new->uid = euid;
301b9a53227SLinus Torvalds	new->gid = new->cgid = egid;
302b9a53227SLinus Torvalds
30339cfffd7SManfred Spraul	new->deleted = false;
30439cfffd7SManfred Spraul
305e2652ae6SManfred Spraul	idx = ipc_idr_alloc(ids, new);
30654924ea3STejun Heo	idr_preload_end();
3070cfb6aeeSGuillaume Knispel
308e2652ae6SManfred Spraul	if (idx >= 0 && new->key != IPC_PRIVATE) {
309