shm.c revision 39c96a1b
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/ipc/shm.c
4 * Copyright (C) 1992, 1993 Krishna Balasubramanian
5 *	 Many improvements/fixes by Bruno Haible.
6 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
7 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8 *
9 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
10 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
11 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
12 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
13 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
16 *
17 * support for audit of ipc object properties and permission changes
18 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
19 *
20 * namespaces support
21 * OpenVZ, SWsoft Inc.
22 * Pavel Emelianov <xemul@openvz.org>
23 *
24 * Better ipc lock (kern_ipc_perm.lock) handling
25 * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
26 */
27
28#include <linux/slab.h>
29#include <linux/mm.h>
30#include <linux/hugetlb.h>
31#include <linux/shm.h>
32#include <linux/init.h>
33#include <linux/file.h>
34#include <linux/mman.h>
35#include <linux/shmem_fs.h>
36#include <linux/security.h>
37#include <linux/syscalls.h>
38#include <linux/audit.h>
39#include <linux/capability.h>
40#include <linux/ptrace.h>
41#include <linux/seq_file.h>
42#include <linux/rwsem.h>
43#include <linux/nsproxy.h>
44#include <linux/mount.h>
45#include <linux/ipc_namespace.h>
46
47#include <linux/uaccess.h>
48
49#include "util.h"
50
51struct shm_file_data {
52	int id;
53	struct ipc_namespace *ns;
54	struct file *file;
55	const struct vm_operations_struct *vm_ops;
56};
57
58#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
59
60static const struct file_operations shm_file_operations;
61static const struct vm_operations_struct shm_vm_ops;
62
63#define shm_ids(ns)	((ns)->ids[IPC_SHM_IDS])
64
65#define shm_unlock(shp)			\
66	ipc_unlock(&(shp)->shm_perm)
67
68static int newseg(struct ipc_namespace *, struct ipc_params *);
69static void shm_open(struct vm_area_struct *vma);
70static void shm_close(struct vm_area_struct *vma);
71static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
72#ifdef CONFIG_PROC_FS
73static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
74#endif
75
76int shm_init_ns(struct ipc_namespace *ns)
77{
78	ns->shm_ctlmax = SHMMAX;
79	ns->shm_ctlall = SHMALL;
80	ns->shm_ctlmni = SHMMNI;
81	ns->shm_rmid_forced = 0;
82	ns->shm_tot = 0;
83	return ipc_init_ids(&shm_ids(ns));
84}
85
86/*
87 * Called with shm_ids.rwsem (writer) and the shp structure locked.
88 * Only shm_ids.rwsem remains locked on exit.
89 */
90static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
91{
92	struct shmid_kernel *shp;
93
94	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
95
96	if (shp->shm_nattch) {
97		shp->shm_perm.mode |= SHM_DEST;
98		/* Do not find it any more */
99		ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
100		shm_unlock(shp);
101	} else
102		shm_destroy(ns, shp);
103}
104
105#ifdef CONFIG_IPC_NS
106void shm_exit_ns(struct ipc_namespace *ns)
107{
108	free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
109	idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
110	rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
111}
112#endif
113
114static int __init ipc_ns_init(void)
115{
116	const int err = shm_init_ns(&init_ipc_ns);
117	WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err);
118	return err;
119}
120
121pure_initcall(ipc_ns_init);
122
123void __init shm_init(void)
124{
125	ipc_init_proc_interface("sysvipc/shm",
126#if BITS_PER_LONG <= 32
127				"       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime        rss       swap\n",
128#else
129				"       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap\n",
130#endif
131				IPC_SHM_IDS, sysvipc_shm_proc_show);
132}
133
134static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
135{
136	struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
137
138	if (IS_ERR(ipcp))
139		return ERR_CAST(ipcp);
140
141	return container_of(ipcp, struct shmid_kernel, shm_perm);
142}
143
144static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
145{
146	struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
147
148	if (IS_ERR(ipcp))
149		return ERR_CAST(ipcp);
150
151	return container_of(ipcp, struct shmid_kernel, shm_perm);
152}
153
154/*
155 * shm_lock_(check_) routines are called in the paths where the rwsem
156 * is not necessarily held.
157 */
158static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
159{
160	struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
161
162	/*
163	 * Callers of shm_lock() must validate the status of the returned ipc
164	 * object pointer (as returned by ipc_lock()), and error out as
165	 * appropriate.
166	 */
167	if (IS_ERR(ipcp))
168		return (void *)ipcp;
169	return container_of(ipcp, struct shmid_kernel, shm_perm);
170}
171
172static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
173{
174	rcu_read_lock();
175	ipc_lock_object(&ipcp->shm_perm);
176}
177
178static void shm_rcu_free(struct rcu_head *head)
179{
180	struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
181							rcu);
182	struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
183							shm_perm);
184	security_shm_free(shp);
185	kvfree(shp);
186}
187
188static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
189{
190	list_del(&s->shm_clist);
191	ipc_rmid(&shm_ids(ns), &s->shm_perm);
192}
193
194
195static int __shm_open(struct vm_area_struct *vma)
196{
197	struct file *file = vma->vm_file;
198	struct shm_file_data *sfd = shm_file_data(file);
199	struct shmid_kernel *shp;
200
201	shp = shm_lock(sfd->ns, sfd->id);
202
203	if (IS_ERR(shp))
204		return PTR_ERR(shp);
205
206	shp->shm_atim = ktime_get_real_seconds();
207	shp->shm_lprid = task_tgid_vnr(current);
208	shp->shm_nattch++;
209	shm_unlock(shp);
210	return 0;
211}
212
213/* This is called by fork, once for every shm attach. */
214static void shm_open(struct vm_area_struct *vma)
215{
216	int err = __shm_open(vma);
217	/*
218	 * We raced in the idr lookup or with shm_destroy().
219	 * Either way, the ID is busted.
220	 */
221	WARN_ON_ONCE(err);
222}
223
224/*
225 * shm_destroy - free the struct shmid_kernel
226 *
227 * @ns: namespace
228 * @shp: struct to free
229 *
230 * It has to be called with shp and shm_ids.rwsem (writer) locked,
231 * but returns with shp unlocked and freed.
232 */
233static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
234{
235	struct file *shm_file;
236
237	shm_file = shp->shm_file;
238	shp->shm_file = NULL;
239	ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
240	shm_rmid(ns, shp);
241	shm_unlock(shp);
242	if (!is_file_hugepages(shm_file))
243		shmem_lock(shm_file, 0, shp->mlock_user);
244	else if (shp->mlock_user)
245		user_shm_unlock(i_size_read(file_inode(shm_file)),
246				shp->mlock_user);
247	fput(shm_file);
248	ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
249}
250
251/*
252 * shm_may_destroy - identifies whether shm segment should be destroyed now
253 *
254 * Returns true if and only if there are no active users of the segment and
255 * one of the following is true:
256 *
257 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
258 *
259 * 2) sysctl kernel.shm_rmid_forced is set to 1.
260 */
261static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
262{
263	return (shp->shm_nattch == 0) &&
264	       (ns->shm_rmid_forced ||
265		(shp->shm_perm.mode & SHM_DEST));
266}
267
268/*
269 * remove the attach descriptor vma.
270 * free memory for segment if it is marked destroyed.
271 * The descriptor has already been removed from the current->mm->mmap list
272 * and will later be kfree()d.
273 */
274static void shm_close(struct vm_area_struct *vma)
275{
276	struct file *file = vma->vm_file;
277	struct shm_file_data *sfd = shm_file_data(file);
278	struct shmid_kernel *shp;
279	struct ipc_namespace *ns = sfd->ns;
280
281	down_write(&shm_ids(ns).rwsem);
282	/* remove from the list of attaches of the shm segment */
283	shp = shm_lock(ns, sfd->id);
284
285	/*
286	 * We raced in the idr lookup or with shm_destroy().
287	 * Either way, the ID is busted.
288	 */
289	if (WARN_ON_ONCE(IS_ERR(shp)))
290		goto done; /* no-op */
291
292	shp->shm_lprid = task_tgid_vnr(current);
293	shp->shm_dtim = ktime_get_real_seconds();
294	shp->shm_nattch--;
295	if (shm_may_destroy(ns, shp))
296		shm_destroy(ns, shp);
297	else
298		shm_unlock(shp);
299done:
300	up_write(&shm_ids(ns).rwsem);
301}
302
303/* Called with ns->shm_ids(ns).rwsem locked */
304static int shm_try_destroy_orphaned(int id, void *p, void *data)
305{
306	struct ipc_namespace *ns = data;
307	struct kern_ipc_perm *ipcp = p;
308	struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
309
310	/*
311	 * We want to destroy segments without users and with already
312	 * exit'ed originating process.
313	 *
314	 * As shp->* are changed under rwsem, it's safe to skip shp locking.
315	 */
316	if (shp->shm_creator != NULL)
317		return 0;
318
319	if (shm_may_destroy(ns, shp)) {
320		shm_lock_by_ptr(shp);
321		shm_destroy(ns, shp);
322	}
323	return 0;
324}
325
326void shm_destroy_orphaned(struct ipc_namespace *ns)
327{
328	down_write(&shm_ids(ns).rwsem);
329	if (shm_ids(ns).in_use)
330		idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
331	up_write(&shm_ids(ns).rwsem);
332}
333
334/* Locking assumes this will only be called with task == current */
335void exit_shm(struct task_struct *task)
336{
337	struct ipc_namespace *ns = task->nsproxy->ipc_ns;
338	struct shmid_kernel *shp, *n;
339
340	if (list_empty(&task->sysvshm.shm_clist))
341		return;
342
343	/*
344	 * If kernel.shm_rmid_forced is not set then only keep track of
345	 * which shmids are orphaned, so that a later set of the sysctl
346	 * can clean them up.
347	 */
348	if (!ns->shm_rmid_forced) {
349		down_read(&shm_ids(ns).rwsem);
350		list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
351			shp->shm_creator = NULL;
352		/*
353		 * Only under read lock but we are only called on current
354		 * so no entry on the list will be shared.
355		 */
356		list_del(&task->sysvshm.shm_clist);
357		up_read(&shm_ids(ns).rwsem);
358		return;
359	}
360
361	/*
362	 * Destroy all already created segments, that were not yet mapped,
363	 * and mark any mapped as orphan to cover the sysctl toggling.
364	 * Destroy is skipped if shm_may_destroy() returns false.
365	 */
366	down_write(&shm_ids(ns).rwsem);
367	list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
368		shp->shm_creator = NULL;
369
370		if (shm_may_destroy(ns, shp)) {
371			shm_lock_by_ptr(shp);
372			shm_destroy(ns, shp);
373		}
374	}
375
376	/* Remove the list head from any segments still attached. */
377	list_del(&task->sysvshm.shm_clist);
378	up_write(&shm_ids(ns).rwsem);
379}
380
381static int shm_fault(struct vm_fault *vmf)
382{
383	struct file *file = vmf->vma->vm_file;
384	struct shm_file_data *sfd = shm_file_data(file);
385
386	return sfd->vm_ops->fault(vmf);
387}
388
389#ifdef CONFIG_NUMA
390static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
391{
392	struct file *file = vma->vm_file;
393	struct shm_file_data *sfd = shm_file_data(file);
394	int err = 0;
395
396	if (sfd->vm_ops->set_policy)
397		err = sfd->vm_ops->set_policy(vma, new);
398	return err;
399}
400
401static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
402					unsigned long addr)
403{
404	struct file *file = vma->vm_file;
405	struct shm_file_data *sfd = shm_file_data(file);
406	struct mempolicy *pol = NULL;
407
408	if (sfd->vm_ops->get_policy)
409		pol = sfd->vm_ops->get_policy(vma, addr);
410	else if (vma->vm_policy)
411		pol = vma->vm_policy;
412
413	return pol;
414}
415#endif
416
417static int shm_mmap(struct file *file, struct vm_area_struct *vma)
418{
419	struct shm_file_data *sfd = shm_file_data(file);
420	int ret;
421
422	/*
423	 * In case of remap_file_pages() emulation, the file can represent
424	 * removed IPC ID: propogate shm_lock() error to caller.
425	 */
426	ret = __shm_open(vma);
427	if (ret)
428		return ret;
429
430	ret = call_mmap(sfd->file, vma);
431	if (ret) {
432		shm_close(vma);
433		return ret;
434	}
435	sfd->vm_ops = vma->vm_ops;
436#ifdef CONFIG_MMU
437	WARN_ON(!sfd->vm_ops->fault);
438#endif
439	vma->vm_ops = &shm_vm_ops;
440	return 0;
441}
442
443static int shm_release(struct inode *ino, struct file *file)
444{
445	struct shm_file_data *sfd = shm_file_data(file);
446
447	put_ipc_ns(sfd->ns);
448	shm_file_data(file) = NULL;
449	kfree(sfd);
450	return 0;
451}
452
453static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
454{
455	struct shm_file_data *sfd = shm_file_data(file);
456
457	if (!sfd->file->f_op->fsync)
458		return -EINVAL;
459	return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
460}
461
462static long shm_fallocate(struct file *file, int mode, loff_t offset,
463			  loff_t len)
464{
465	struct shm_file_data *sfd = shm_file_data(file);
466
467	if (!sfd->file->f_op->fallocate)
468		return -EOPNOTSUPP;
469	return sfd->file->f_op->fallocate(file, mode, offset, len);
470}
471
472static unsigned long shm_get_unmapped_area(struct file *file,
473	unsigned long addr, unsigned long len, unsigned long pgoff,
474	unsigned long flags)
475{
476	struct shm_file_data *sfd = shm_file_data(file);
477
478	return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
479						pgoff, flags);
480}
481
482static const struct file_operations shm_file_operations = {
483	.mmap		= shm_mmap,
484	.fsync		= shm_fsync,
485	.release	= shm_release,
486	.get_unmapped_area	= shm_get_unmapped_area,
487	.llseek		= noop_llseek,
488	.fallocate	= shm_fallocate,
489};
490
491/*
492 * shm_file_operations_huge is now identical to shm_file_operations,
493 * but we keep it distinct for the sake of is_file_shm_hugepages().
494 */
495static const struct file_operations shm_file_operations_huge = {
496	.mmap		= shm_mmap,
497	.fsync		= shm_fsync,
498	.release	= shm_release,
499	.get_unmapped_area	= shm_get_unmapped_area,
500	.llseek		= noop_llseek,
501	.fallocate	= shm_fallocate,
502};
503
504bool is_file_shm_hugepages(struct file *file)
505{
506	return file->f_op == &shm_file_operations_huge;
507}
508
509static const struct vm_operations_struct shm_vm_ops = {
510	.open	= shm_open,	/* callback for a new vm-area open */
511	.close	= shm_close,	/* callback for when the vm-area is released */
512	.fault	= shm_fault,
513#if defined(CONFIG_NUMA)
514	.set_policy = shm_set_policy,
515	.get_policy = shm_get_policy,
516#endif
517};
518
519/**
520 * newseg - Create a new shared memory segment
521 * @ns: namespace
522 * @params: ptr to the structure that contains key, size and shmflg
523 *
524 * Called with shm_ids.rwsem held as a writer.
525 */
526static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
527{
528	key_t key = params->key;
529	int shmflg = params->flg;
530	size_t size = params->u.size;
531	int error;
532	struct shmid_kernel *shp;
533	size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
534	struct file *file;
535	char name[13];
536	vm_flags_t acctflag = 0;
537
538	if (size < SHMMIN || size > ns->shm_ctlmax)
539		return -EINVAL;
540
541	if (numpages << PAGE_SHIFT < size)
542		return -ENOSPC;
543
544	if (ns->shm_tot + numpages < ns->shm_tot ||
545			ns->shm_tot + numpages > ns->shm_ctlall)
546		return -ENOSPC;
547
548	shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
549	if (unlikely(!shp))
550		return -ENOMEM;
551
552	shp->shm_perm.key = key;
553	shp->shm_perm.mode = (shmflg & S_IRWXUGO);
554	shp->mlock_user = NULL;
555
556	shp->shm_perm.security = NULL;
557	error = security_shm_alloc(shp);
558	if (error) {
559		kvfree(shp);
560		return error;
561	}
562
563	sprintf(name, "SYSV%08x", key);
564	if (shmflg & SHM_HUGETLB) {
565		struct hstate *hs;
566		size_t hugesize;
567
568		hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
569		if (!hs) {
570			error = -EINVAL;
571			goto no_file;
572		}
573		hugesize = ALIGN(size, huge_page_size(hs));
574
575		/* hugetlb_file_setup applies strict accounting */
576		if (shmflg & SHM_NORESERVE)
577			acctflag = VM_NORESERVE;
578		file = hugetlb_file_setup(name, hugesize, acctflag,
579				  &shp->mlock_user, HUGETLB_SHMFS_INODE,
580				(shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
581	} else {
582		/*
583		 * Do not allow no accounting for OVERCOMMIT_NEVER, even
584		 * if it's asked for.
585		 */
586		if  ((shmflg & SHM_NORESERVE) &&
587				sysctl_overcommit_memory != OVERCOMMIT_NEVER)
588			acctflag = VM_NORESERVE;
589		file = shmem_kernel_file_setup(name, size, acctflag);
590	}
591	error = PTR_ERR(file);
592	if (IS_ERR(file))
593		goto no_file;
594
595	shp->shm_cprid = task_tgid_vnr(current);
596	shp->shm_lprid = 0;
597	shp->shm_atim = shp->shm_dtim = 0;
598	shp->shm_ctim = ktime_get_real_seconds();
599	shp->shm_segsz = size;
600	shp->shm_nattch = 0;
601	shp->shm_file = file;
602	shp->shm_creator = current;
603
604	/* ipc_addid() locks shp upon success. */
605	error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
606	if (error < 0)
607		goto no_id;
608
609	list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
610
611	/*
612	 * shmid gets reported as "inode#" in /proc/pid/maps.
613	 * proc-ps tools use this. Changing this will break them.
614	 */
615	file_inode(file)->i_ino = shp->shm_perm.id;
616
617	ns->shm_tot += numpages;
618	error = shp->shm_perm.id;
619
620	ipc_unlock_object(&shp->shm_perm);
621	rcu_read_unlock();
622	return error;
623
624no_id:
625	if (is_file_hugepages(file) && shp->mlock_user)
626		user_shm_unlock(size, shp->mlock_user);
627	fput(file);
628no_file:
629	call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
630	return error;
631}
632
633/*
634 * Called with shm_ids.rwsem and ipcp locked.
635 */
636static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
637{
638	struct shmid_kernel *shp;
639
640	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
641	return security_shm_associate(shp, shmflg);
642}
643
644/*
645 * Called with shm_ids.rwsem and ipcp locked.
646 */
647static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
648				struct ipc_params *params)
649{
650	struct shmid_kernel *shp;
651
652	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
653	if (shp->shm_segsz < params->u.size)
654		return -EINVAL;
655
656	return 0;
657}
658
659SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
660{
661	struct ipc_namespace *ns;
662	static const struct ipc_ops shm_ops = {
663		.getnew = newseg,
664		.associate = shm_security,
665		.more_checks = shm_more_checks,
666	};
667	struct ipc_params shm_params;
668
669	ns = current->nsproxy->ipc_ns;
670
671	shm_params.key = key;
672	shm_params.flg = shmflg;
673	shm_params.u.size = size;
674
675	return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
676}
677
678static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
679{
680	switch (version) {
681	case IPC_64:
682		return copy_to_user(buf, in, sizeof(*in));
683	case IPC_OLD:
684	    {
685		struct shmid_ds out;
686
687		memset(&out, 0, sizeof(out));
688		ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
689		out.shm_segsz	= in->shm_segsz;
690		out.shm_atime	= in->shm_atime;
691		out.shm_dtime	= in->shm_dtime;
692		out.shm_ctime	= in->shm_ctime;
693		out.shm_cpid	= in->shm_cpid;
694		out.shm_lpid	= in->shm_lpid;
695		out.shm_nattch	= in->shm_nattch;
696
697		return copy_to_user(buf, &out, sizeof(out));
698	    }
699	default:
700		return -EINVAL;
701	}
702}
703
704static inline unsigned long
705copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
706{
707	switch (version) {
708	case IPC_64:
709		if (copy_from_user(out, buf, sizeof(*out)))
710			return -EFAULT;
711		return 0;
712	case IPC_OLD:
713	    {
714		struct shmid_ds tbuf_old;
715
716		if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
717			return -EFAULT;
718
719		out->shm_perm.uid	= tbuf_old.shm_perm.uid;
720		out->shm_perm.gid	= tbuf_old.shm_perm.gid;
721		out->shm_perm.mode	= tbuf_old.shm_perm.mode;
722
723		return 0;
724	    }
725	default:
726		return -EINVAL;
727	}
728}
729
730static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
731{
732	switch (version) {
733	case IPC_64:
734		return copy_to_user(buf, in, sizeof(*in));
735	case IPC_OLD:
736	    {
737		struct shminfo out;
738
739		if (in->shmmax > INT_MAX)
740			out.shmmax = INT_MAX;
741		else
742			out.shmmax = (int)in->shmmax;
743
744		out.shmmin	= in->shmmin;
745		out.shmmni	= in->shmmni;
746		out.shmseg	= in->shmseg;
747		out.shmall	= in->shmall;
748
749		return copy_to_user(buf, &out, sizeof(out));
750	    }
751	default:
752		return -EINVAL;
753	}
754}
755
756/*
757 * Calculate and add used RSS and swap pages of a shm.
758 * Called with shm_ids.rwsem held as a reader
759 */
760static void shm_add_rss_swap(struct shmid_kernel *shp,
761	unsigned long *rss_add, unsigned long *swp_add)
762{
763	struct inode *inode;
764
765	inode = file_inode(shp->shm_file);
766
767	if (is_file_hugepages(shp->shm_file)) {
768		struct address_space *mapping = inode->i_mapping;
769		struct hstate *h = hstate_file(shp->shm_file);
770		*rss_add += pages_per_huge_page(h) * mapping->nrpages;
771	} else {
772#ifdef CONFIG_SHMEM
773		struct shmem_inode_info *info = SHMEM_I(inode);
774
775		spin_lock_irq(&info->lock);
776		*rss_add += inode->i_mapping->nrpages;
777		*swp_add += info->swapped;
778		spin_unlock_irq(&info->lock);
779#else
780		*rss_add += inode->i_mapping->nrpages;
781#endif
782	}
783}
784
785/*
786 * Called with shm_ids.rwsem held as a reader
787 */
788static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
789		unsigned long *swp)
790{
791	int next_id;
792	int total, in_use;
793
794	*rss = 0;
795	*swp = 0;
796
797	in_use = shm_ids(ns).in_use;
798
799	for (total = 0, next_id = 0; total < in_use; next_id++) {
800		struct kern_ipc_perm *ipc;
801		struct shmid_kernel *shp;
802
803		ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
804		if (ipc == NULL)
805			continue;
806		shp = container_of(ipc, struct shmid_kernel, shm_perm);
807
808		shm_add_rss_swap(shp, rss, swp);
809
810		total++;
811	}
812}
813
814/*
815 * This function handles some shmctl commands which require the rwsem
816 * to be held in write mode.
817 * NOTE: no locks must be held, the rwsem is taken inside this function.
818 */
819static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
820		       struct shmid64_ds *shmid64)
821{
822	struct kern_ipc_perm *ipcp;
823	struct shmid_kernel *shp;
824	int err;
825
826	down_write(&shm_ids(ns).rwsem);
827	rcu_read_lock();
828
829	ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
830				      &shmid64->shm_perm, 0);
831	if (IS_ERR(ipcp)) {
832		err = PTR_ERR(ipcp);
833		goto out_unlock1;
834	}
835
836	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
837
838	err = security_shm_shmctl(shp, cmd);
839	if (err)
840		goto out_unlock1;
841
842	switch (cmd) {
843	case IPC_RMID:
844		ipc_lock_object(&shp->shm_perm);
845		/* do_shm_rmid unlocks the ipc object and rcu */
846		do_shm_rmid(ns, ipcp);
847		goto out_up;
848	case IPC_SET:
849		ipc_lock_object(&shp->shm_perm);
850		err = ipc_update_perm(&shmid64->shm_perm, ipcp);
851		if (err)
852			goto out_unlock0;
853		shp->shm_ctim = ktime_get_real_seconds();
854		break;
855	default:
856		err = -EINVAL;
857		goto out_unlock1;
858	}
859
860out_unlock0:
861	ipc_unlock_object(&shp->shm_perm);
862out_unlock1:
863	rcu_read_unlock();
864out_up:
865	up_write(&shm_ids(ns).rwsem);
866	return err;
867}
868
869static int shmctl_ipc_info(struct ipc_namespace *ns,
870			   struct shminfo64 *shminfo)
871{
872	int err = security_shm_shmctl(NULL, IPC_INFO);
873	if (!err) {
874		memset(shminfo, 0, sizeof(*shminfo));
875		shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
876		shminfo->shmmax = ns->shm_ctlmax;
877		shminfo->shmall = ns->shm_ctlall;
878		shminfo->shmmin = SHMMIN;
879		down_read(&shm_ids(ns).rwsem);
880		err = ipc_get_maxid(&shm_ids(ns));
881		up_read(&shm_ids(ns).rwsem);
882		if (err < 0)
883			err = 0;
884	}
885	return err;
886}
887
888static int shmctl_shm_info(struct ipc_namespace *ns,
889			   struct shm_info *shm_info)
890{
891	int err = security_shm_shmctl(NULL, SHM_INFO);
892	if (!err) {
893		memset(shm_info, 0, sizeof(*shm_info));
894		down_read(&shm_ids(ns).rwsem);
895		shm_info->used_ids = shm_ids(ns).in_use;
896		shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
897		shm_info->shm_tot = ns->shm_tot;
898		shm_info->swap_attempts = 0;
899		shm_info->swap_successes = 0;
900		err = ipc_get_maxid(&shm_ids(ns));
901		up_read(&shm_ids(ns).rwsem);
902		if (err < 0)
903			err = 0;
904	}
905	return err;
906}
907
908static int shmctl_stat(struct ipc_namespace *ns, int shmid,
909			int cmd, struct shmid64_ds *tbuf)
910{
911	struct shmid_kernel *shp;
912	int result;
913	int err;
914
915	rcu_read_lock();
916	if (cmd == SHM_STAT) {
917		shp = shm_obtain_object(ns, shmid);
918		if (IS_ERR(shp)) {
919			err = PTR_ERR(shp);
920			goto out_unlock;
921		}
922		result = shp->shm_perm.id;
923	} else {
924		shp = shm_obtain_object_check(ns, shmid);
925		if (IS_ERR(shp)) {
926			err = PTR_ERR(shp);
927			goto out_unlock;
928		}
929		result = 0;
930	}
931
932	err = -EACCES;
933	if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
934		goto out_unlock;
935
936	err = security_shm_shmctl(shp, cmd);
937	if (err)
938		goto out_unlock;
939
940	memset(tbuf, 0, sizeof(*tbuf));
941	kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
942	tbuf->shm_segsz	= shp->shm_segsz;
943	tbuf->shm_atime	= shp->shm_atim;
944	tbuf->shm_dtime	= shp->shm_dtim;
945	tbuf->shm_ctime	= shp->shm_ctim;
946	tbuf->shm_cpid	= shp->shm_cprid;
947	tbuf->shm_lpid	= shp->shm_lprid;
948	tbuf->shm_nattch = shp->shm_nattch;
949	rcu_read_unlock();
950	return result;
951
952out_unlock:
953	rcu_read_unlock();
954	return err;
955}
956
957static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
958{
959	struct shmid_kernel *shp;
960	struct file *shm_file;
961	int err;
962
963	rcu_read_lock();
964	shp = shm_obtain_object_check(ns, shmid);
965	if (IS_ERR(shp)) {
966		err = PTR_ERR(shp);
967		goto out_unlock1;
968	}
969
970	audit_ipc_obj(&(shp->shm_perm));
971	err = security_shm_shmctl(shp, cmd);
972	if (err)
973		goto out_unlock1;
974
975	ipc_lock_object(&shp->shm_perm);
976
977	/* check if shm_destroy() is tearing down shp */
978	if (!ipc_valid_object(&shp->shm_perm)) {
979		err = -EIDRM;
980		goto out_unlock0;
981	}
982
983	if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
984		kuid_t euid = current_euid();
985
986		if (!uid_eq(euid, shp->shm_perm.uid) &&
987		    !uid_eq(euid, shp->shm_perm.cuid)) {
988			err = -EPERM;
989			goto out_unlock0;
990		}
991		if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
992			err = -EPERM;
993			goto out_unlock0;
994		}
995	}
996
997	shm_file = shp->shm_file;
998	if (is_file_hugepages(shm_file))
999		goto out_unlock0;
1000
1001	if (cmd == SHM_LOCK) {
1002		struct user_struct *user = current_user();
1003
1004		err = shmem_lock(shm_file, 1, user);
1005		if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1006			shp->shm_perm.mode |= SHM_LOCKED;
1007			shp->mlock_user = user;
1008		}
1009		goto out_unlock0;
1010	}
1011
1012	/* SHM_UNLOCK */
1013	if (!(shp->shm_perm.mode & SHM_LOCKED))
1014		goto out_unlock0;
1015	shmem_lock(shm_file, 0, shp->mlock_user);
1016	shp->shm_perm.mode &= ~SHM_LOCKED;
1017	shp->mlock_user = NULL;
1018	get_file(shm_file);
1019	ipc_unlock_object(&shp->shm_perm);
1020	rcu_read_unlock();
1021	shmem_unlock_mapping(shm_file->f_mapping);
1022
1023	fput(shm_file);
1024	return err;
1025
1026out_unlock0:
1027	ipc_unlock_object(&shp->shm_perm);
1028out_unlock1:
1029	rcu_read_unlock();
1030	return err;
1031}
1032
1033SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1034{
1035	int err, version;
1036	struct ipc_namespace *ns;
1037	struct shmid64_ds sem64;
1038
1039	if (cmd < 0 || shmid < 0)
1040		return -EINVAL;
1041
1042	version = ipc_parse_version(&cmd);
1043	ns = current->nsproxy->ipc_ns;
1044
1045	switch (cmd) {
1046	case IPC_INFO: {
1047		struct shminfo64 shminfo;
1048		err = shmctl_ipc_info(ns, &shminfo);
1049		if (err < 0)
1050			return err;
1051		if (copy_shminfo_to_user(buf, &shminfo, version))
1052			err = -EFAULT;
1053		return err;
1054	}
1055	case SHM_INFO: {
1056		struct shm_info shm_info;
1057		err = shmctl_shm_info(ns, &shm_info);
1058		if (err < 0)
1059			return err;
1060		if (copy_to_user(buf, &shm_info, sizeof(shm_info)))
1061			err = -EFAULT;
1062		return err;
1063	}
1064	case SHM_STAT:
1065	case IPC_STAT: {
1066		err = shmctl_stat(ns, shmid, cmd, &sem64);
1067		if (err < 0)
1068			return err;
1069		if (copy_shmid_to_user(buf, &sem64, version))
1070			err = -EFAULT;
1071		return err;
1072	}
1073	case IPC_SET:
1074		if (copy_shmid_from_user(&sem64, buf, version))
1075			return -EFAULT;
1076		/* fallthru */
1077	case IPC_RMID:
1078		return shmctl_down(ns, shmid, cmd, &sem64);
1079	case SHM_LOCK:
1080	case SHM_UNLOCK:
1081		return shmctl_do_lock(ns, shmid, cmd);
1082	default:
1083		return -EINVAL;
1084	}
1085}
1086
1087#ifdef CONFIG_COMPAT
1088
1089struct compat_shmid_ds {
1090	struct compat_ipc_perm shm_perm;
1091	int shm_segsz;
1092	compat_time_t shm_atime;
1093	compat_time_t shm_dtime;
1094	compat_time_t shm_ctime;
1095	compat_ipc_pid_t shm_cpid;
1096	compat_ipc_pid_t shm_lpid;
1097	unsigned short shm_nattch;
1098	unsigned short shm_unused;
1099	compat_uptr_t shm_unused2;
1100	compat_uptr_t shm_unused3;
1101};
1102
1103struct compat_shminfo64 {
1104	compat_ulong_t shmmax;
1105	compat_ulong_t shmmin;
1106	compat_ulong_t shmmni;
1107	compat_ulong_t shmseg;
1108	compat_ulong_t shmall;
1109	compat_ulong_t __unused1;
1110	compat_ulong_t __unused2;
1111	compat_ulong_t __unused3;
1112	compat_ulong_t __unused4;
1113};
1114
1115struct compat_shm_info {
1116	compat_int_t used_ids;
1117	compat_ulong_t shm_tot, shm_rss, shm_swp;
1118	compat_ulong_t swap_attempts, swap_successes;
1119};
1120
1121static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
1122					int version)
1123{
1124	if (in->shmmax > INT_MAX)
1125		in->shmmax = INT_MAX;
1126	if (version == IPC_64) {
1127		struct compat_shminfo64 info;
1128		memset(&info, 0, sizeof(info));
1129		info.shmmax = in->shmmax;
1130		info.shmmin = in->shmmin;
1131		info.shmmni = in->shmmni;
1132		info.shmseg = in->shmseg;
1133		info.shmall = in->shmall;
1134		return copy_to_user(buf, &info, sizeof(info));
1135	} else {
1136		struct shminfo info;
1137		memset(&info, 0, sizeof(info));
1138		info.shmmax = in->shmmax;
1139		info.shmmin = in->shmmin;
1140		info.shmmni = in->shmmni;
1141		info.shmseg = in->shmseg;
1142		info.shmall = in->shmall;
1143		return copy_to_user(buf, &info, sizeof(info));
1144	}
1145}
1146
1147static int put_compat_shm_info(struct shm_info *ip,
1148				struct compat_shm_info __user *uip)
1149{
1150	struct compat_shm_info info;
1151
1152	memset(&info, 0, sizeof(info));
1153	info.used_ids = ip->used_ids;
1154	info.shm_tot = ip->shm_tot;
1155	info.shm_rss = ip->shm_rss;
1156	info.shm_swp = ip->shm_swp;
1157	info.swap_attempts = ip->swap_attempts;
1158	info.swap_successes = ip->swap_successes;
1159	return copy_to_user(uip, &info, sizeof(info));
1160}
1161
1162static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
1163					int version)
1164{
1165	if (version == IPC_64) {
1166		struct compat_shmid64_ds v;
1167		memset(&v, 0, sizeof(v));
1168		to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
1169		v.shm_atime = in->shm_atime;
1170		v.shm_dtime = in->shm_dtime;
1171		v.shm_ctime = in->shm_ctime;
1172		v.shm_segsz = in->shm_segsz;
1173		v.shm_nattch = in->shm_nattch;
1174		v.shm_cpid = in->shm_cpid;
1175		v.shm_lpid = in->shm_lpid;
1176		return copy_to_user(buf, &v, sizeof(v));
1177	} else {
1178		struct compat_shmid_ds v;
1179		memset(&v, 0, sizeof(v));
1180		to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
1181		v.shm_perm.key = in->shm_perm.key;
1182		v.shm_atime = in->shm_atime;
1183		v.shm_dtime = in->shm_dtime;
1184		v.shm_ctime = in->shm_ctime;
1185		v.shm_segsz = in->shm_segsz;
1186		v.shm_nattch = in->shm_nattch;
1187		v.shm_cpid = in->shm_cpid;
1188		v.shm_lpid = in->shm_lpid;
1189		return copy_to_user(buf, &v, sizeof(v));
1190	}
1191}
1192
1193static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
1194					int version)
1195{
1196	memset(out, 0, sizeof(*out));
1197	if (version == IPC_64) {
1198		struct compat_shmid64_ds *p = buf;
1199		return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
1200	} else {
1201		struct compat_shmid_ds *p = buf;
1202		return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
1203	}
1204}
1205
1206COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
1207{
1208	struct ipc_namespace *ns;
1209	struct shmid64_ds sem64;
1210	int version = compat_ipc_parse_version(&cmd);
1211	int err;
1212
1213	ns = current->nsproxy->ipc_ns;
1214
1215	if (cmd < 0 || shmid < 0)
1216		return -EINVAL;
1217
1218	switch (cmd) {
1219	case IPC_INFO: {
1220		struct shminfo64 shminfo;
1221		err = shmctl_ipc_info(ns, &shminfo);
1222		if (err < 0)
1223			return err;
1224		if (copy_compat_shminfo_to_user(uptr, &shminfo, version))
1225			err = -EFAULT;
1226		return err;
1227	}
1228	case SHM_INFO: {
1229		struct shm_info shm_info;
1230		err = shmctl_shm_info(ns, &shm_info);
1231		if (err < 0)
1232			return err;
1233		if (put_compat_shm_info(&shm_info, uptr))
1234			err = -EFAULT;
1235		return err;
1236	}
1237	case IPC_STAT:
1238	case SHM_STAT:
1239		err = shmctl_stat(ns, shmid, cmd, &sem64);
1240		if (err < 0)
1241			return err;
1242		if (copy_compat_shmid_to_user(uptr, &sem64, version))
1243			err = -EFAULT;
1244		return err;
1245
1246	case IPC_SET:
1247		if (copy_compat_shmid_from_user(&sem64, uptr, version))
1248			return -EFAULT;
1249		/* fallthru */
1250	case IPC_RMID:
1251		return shmctl_down(ns, shmid, cmd, &sem64);
1252	case SHM_LOCK:
1253	case SHM_UNLOCK:
1254		return shmctl_do_lock(ns, shmid, cmd);
1255		break;
1256	default:
1257		return -EINVAL;
1258	}
1259	return err;
1260}
1261#endif
1262
1263/*
1264 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1265 *
1266 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1267 * "raddr" thing points to kernel space, and there has to be a wrapper around
1268 * this.
1269 */
1270long do_shmat(int shmid, char __user *shmaddr, int shmflg,
1271	      ulong *raddr, unsigned long shmlba)
1272{
1273	struct shmid_kernel *shp;
1274	unsigned long addr = (unsigned long)shmaddr;
1275	unsigned long size;
1276	struct file *file;
1277	int    err;
1278	unsigned long flags = MAP_SHARED;
1279	unsigned long prot;
1280	int acc_mode;
1281	struct ipc_namespace *ns;
1282	struct shm_file_data *sfd;
1283	struct path path;
1284	fmode_t f_mode;
1285	unsigned long populate = 0;
1286
1287	err = -EINVAL;
1288	if (shmid < 0)
1289		goto out;
1290
1291	if (addr) {
1292		if (addr & (shmlba - 1)) {
1293			/*
1294			 * Round down to the nearest multiple of shmlba.
1295			 * For sane do_mmap_pgoff() parameters, avoid
1296			 * round downs that trigger nil-page and MAP_FIXED.
1297			 */
1298			if ((shmflg & SHM_RND) && addr >= shmlba)
1299				addr &= ~(shmlba - 1);
1300			else
1301#ifndef __ARCH_FORCE_SHMLBA
1302				if (addr & ~PAGE_MASK)
1303#endif
1304					goto out;
1305		}
1306
1307		flags |= MAP_FIXED;
1308	} else if ((shmflg & SHM_REMAP))
1309		goto out;
1310
1311	if (shmflg & SHM_RDONLY) {
1312		prot = PROT_READ;
1313		acc_mode = S_IRUGO;
1314		f_mode = FMODE_READ;
1315	} else {
1316		prot = PROT_READ | PROT_WRITE;
1317		acc_mode = S_IRUGO | S_IWUGO;
1318		f_mode = FMODE_READ | FMODE_WRITE;
1319	}
1320	if (shmflg & SHM_EXEC) {
1321		prot |= PROT_EXEC;
1322		acc_mode |= S_IXUGO;
1323	}
1324
1325	/*
1326	 * We cannot rely on the fs check since SYSV IPC does have an
1327	 * additional creator id...
1328	 */
1329	ns = current->nsproxy->ipc_ns;
1330	rcu_read_lock();
1331	shp = shm_obtain_object_check(ns, shmid);
1332	if (IS_ERR(shp)) {
1333		err = PTR_ERR(shp);
1334		goto out_unlock;
1335	}
1336
1337	err = -EACCES;
1338	if (ipcperms(ns, &shp->shm_perm, acc_mode))
1339		goto out_unlock;
1340
1341	err = security_shm_shmat(shp, shmaddr, shmflg);
1342	if (err)
1343		goto out_unlock;
1344
1345	ipc_lock_object(&shp->shm_perm);
1346
1347	/* check if shm_destroy() is tearing down shp */
1348	if (!ipc_valid_object(&shp->shm_perm)) {
1349		ipc_unlock_object(&shp->shm_perm);
1350		err = -EIDRM;
1351		goto out_unlock;
1352	}
1353
1354	path = shp->shm_file->f_path;
1355	path_get(&path);
1356	shp->shm_nattch++;
1357	size = i_size_read(d_inode(path.dentry));
1358	ipc_unlock_object(&shp->shm_perm);
1359	rcu_read_unlock();
1360
1361	err = -ENOMEM;
1362	sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1363	if (!sfd) {
1364		path_put(&path);
1365		goto out_nattch;
1366	}
1367
1368	file = alloc_file(&path, f_mode,
1369			  is_file_hugepages(shp->shm_file) ?
1370				&shm_file_operations_huge :
1371				&shm_file_operations);
1372	err = PTR_ERR(file);
1373	if (IS_ERR(file)) {
1374		kfree(sfd);
1375		path_put(&path);
1376		goto out_nattch;
1377	}
1378
1379	file->private_data = sfd;
1380	file->f_mapping = shp->shm_file->f_mapping;
1381	sfd->id = shp->shm_perm.id;
1382	sfd->ns = get_ipc_ns(ns);
1383	sfd->file = shp->shm_file;
1384	sfd->vm_ops = NULL;
1385
1386	err = security_mmap_file(file, prot, flags);
1387	if (err)
1388		goto out_fput;
1389
1390	if (down_write_killable(&current->mm->mmap_sem)) {
1391		err = -EINTR;
1392		goto out_fput;
1393	}
1394
1395	if (addr && !(shmflg & SHM_REMAP)) {
1396		err = -EINVAL;
1397		if (addr + size < addr)
1398			goto invalid;
1399
1400		if (find_vma_intersection(current->mm, addr, addr + size))
1401			goto invalid;
1402	}
1403
1404	addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
1405	*raddr = addr;
1406	err = 0;
1407	if (IS_ERR_VALUE(addr))
1408		err = (long)addr;
1409invalid:
1410	up_write(&current->mm->mmap_sem);
1411	if (populate)
1412		mm_populate(addr, populate);
1413
1414out_fput:
1415	fput(file);
1416
1417out_nattch:
1418	down_write(&shm_ids(ns).rwsem);
1419	shp = shm_lock(ns, shmid);
1420	shp->shm_nattch--;
1421	if (shm_may_destroy(ns, shp))
1422		shm_destroy(ns, shp);
1423	else
1424		shm_unlock(shp);
1425	up_write(&shm_ids(ns).rwsem);
1426	return err;
1427
1428out_unlock:
1429	rcu_read_unlock();
1430out:
1431	return err;
1432}
1433
1434SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1435{
1436	unsigned long ret;
1437	long err;
1438
1439	err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1440	if (err)
1441		return err;
1442	force_successful_syscall_return();
1443	return (long)ret;
1444}
1445
1446#ifdef CONFIG_COMPAT
1447
1448#ifndef COMPAT_SHMLBA
1449#define COMPAT_SHMLBA	SHMLBA
1450#endif
1451
1452COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
1453{
1454	unsigned long ret;
1455	long err;
1456
1457	err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
1458	if (err)
1459		return err;
1460	force_successful_syscall_return();
1461	return (long)ret;
1462}
1463#endif
1464
1465/*
1466 * detach and kill segment if marked destroyed.
1467 * The work is done in shm_close.
1468 */
1469SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1470{
1471	struct mm_struct *mm = current->mm;
1472	struct vm_area_struct *vma;
1473	unsigned long addr = (unsigned long)shmaddr;
1474	int retval = -EINVAL;
1475#ifdef CONFIG_MMU
1476	loff_t size = 0;
1477	struct file *file;
1478	struct vm_area_struct *next;
1479#endif
1480
1481	if (addr & ~PAGE_MASK)
1482		return retval;
1483
1484	if (down_write_killable(&mm->mmap_sem))
1485		return -EINTR;
1486
1487	/*
1488	 * This function tries to be smart and unmap shm segments that
1489	 * were modified by partial mlock or munmap calls:
1490	 * - It first determines the size of the shm segment that should be
1491	 *   unmapped: It searches for a vma that is backed by shm and that
1492	 *   started at address shmaddr. It records it's size and then unmaps
1493	 *   it.
1494	 * - Then it unmaps all shm vmas that started at shmaddr and that
1495	 *   are within the initially determined size and that are from the
1496	 *   same shm segment from which we determined the size.
1497	 * Errors from do_munmap are ignored: the function only fails if
1498	 * it's called with invalid parameters or if it's called to unmap
1499	 * a part of a vma. Both calls in this function are for full vmas,
1500	 * the parameters are directly copied from the vma itself and always
1501	 * valid - therefore do_munmap cannot fail. (famous last words?)
1502	 */
1503	/*
1504	 * If it had been mremap()'d, the starting address would not
1505	 * match the usual checks anyway. So assume all vma's are
1506	 * above the starting address given.
1507	 */
1508	vma = find_vma(mm, addr);
1509
1510#ifdef CONFIG_MMU
1511	while (vma) {
1512		next = vma->vm_next;
1513
1514		/*
1515		 * Check if the starting address would match, i.e. it's
1516		 * a fragment created by mprotect() and/or munmap(), or it
1517		 * otherwise it starts at this address with no hassles.
1518		 */
1519		if ((vma->vm_ops == &shm_vm_ops) &&
1520			(vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1521
1522			/*
1523			 * Record the file of the shm segment being
1524			 * unmapped.  With mremap(), someone could place
1525			 * page from another segment but with equal offsets
1526			 * in the range we are unmapping.
1527			 */
1528			file = vma->vm_file;
1529			size = i_size_read(file_inode(vma->vm_file));
1530			do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1531			/*
1532			 * We discovered the size of the shm segment, so
1533			 * break out of here and fall through to the next
1534			 * loop that uses the size information to stop
1535			 * searching for matching vma's.
1536			 */
1537			retval = 0;
1538			vma = next;
1539			break;
1540		}
1541		vma = next;
1542	}
1543
1544	/*
1545	 * We need look no further than the maximum address a fragment
1546	 * could possibly have landed at. Also cast things to loff_t to
1547	 * prevent overflows and make comparisons vs. equal-width types.
1548	 */
1549	size = PAGE_ALIGN(size);
1550	while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1551		next = vma->vm_next;
1552
1553		/* finding a matching vma now does not alter retval */
1554		if ((vma->vm_ops == &shm_vm_ops) &&
1555		    ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
1556		    (vma->vm_file == file))
1557			do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1558		vma = next;
1559	}
1560
1561#else	/* CONFIG_MMU */
1562	/* under NOMMU conditions, the exact address to be destroyed must be
1563	 * given
1564	 */
1565	if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1566		do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1567		retval = 0;
1568	}
1569
1570#endif
1571
1572	up_write(&mm->mmap_sem);
1573	return retval;
1574}
1575
1576#ifdef CONFIG_PROC_FS
1577static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1578{
1579	struct user_namespace *user_ns = seq_user_ns(s);
1580	struct kern_ipc_perm *ipcp = it;
1581	struct shmid_kernel *shp;
1582	unsigned long rss = 0, swp = 0;
1583
1584	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1585	shm_add_rss_swap(shp, &rss, &swp);
1586
1587#if BITS_PER_LONG <= 32
1588#define SIZE_SPEC "%10lu"
1589#else
1590#define SIZE_SPEC "%21lu"
1591#endif
1592
1593	seq_printf(s,
1594		   "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
1595		   "%5lu %5u %5u %5u %5u %10llu %10llu %10llu "
1596		   SIZE_SPEC " " SIZE_SPEC "\n",
1597		   shp->shm_perm.key,
1598		   shp->shm_perm.id,
1599		   shp->shm_perm.mode,
1600		   shp->shm_segsz,
1601		   shp->shm_cprid,
1602		   shp->shm_lprid,
1603		   shp->shm_nattch,
1604		   from_kuid_munged(user_ns, shp->shm_perm.uid),
1605		   from_kgid_munged(user_ns, shp->shm_perm.gid),
1606		   from_kuid_munged(user_ns, shp->shm_perm.cuid),
1607		   from_kgid_munged(user_ns, shp->shm_perm.cgid),
1608		   shp->shm_atim,
1609		   shp->shm_dtim,
1610		   shp->shm_ctim,
1611		   rss * PAGE_SIZE,
1612		   swp * PAGE_SIZE);
1613
1614	return 0;
1615}
1616#endif
1617