procfs_mem.c revision 76827
1/*
2 * Copyright (c) 1993 Jan-Simon Pendry
3 * Copyright (c) 1993 Sean Eric Fagan
4 * Copyright (c) 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry and Sean Eric Fagan.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)procfs_mem.c	8.5 (Berkeley) 6/15/94
39 *
40 * $FreeBSD: head/sys/fs/procfs/procfs_mem.c 76827 2001-05-19 01:28:09Z alfred $
41 */
42
43/*
44 * This is a lightly hacked and merged version
45 * of sef's pread/pwrite functions
46 */
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/lock.h>
51#include <sys/mutex.h>
52#include <sys/proc.h>
53#include <sys/ptrace.h>
54#include <sys/user.h>
55#include <sys/vnode.h>
56
57#include <miscfs/procfs/procfs.h>
58
59#include <vm/vm.h>
60#include <vm/vm_param.h>
61#include <vm/pmap.h>
62#include <vm/vm_extern.h>
63#include <vm/vm_map.h>
64#include <vm/vm_kern.h>
65#include <vm/vm_object.h>
66#include <vm/vm_page.h>
67
68static int	procfs_rwmem __P((struct proc *curp,
69				  struct proc *p, struct uio *uio));
70
71static int
72procfs_rwmem(curp, p, uio)
73	struct proc *curp;
74	struct proc *p;
75	struct uio *uio;
76{
77	int error;
78	int writing;
79	struct vmspace *vm;
80	vm_map_t map;
81	vm_object_t object = NULL;
82	vm_offset_t pageno = 0;		/* page number */
83	vm_prot_t reqprot;
84	vm_offset_t kva;
85
86	/*
87	 * if the vmspace is in the midst of being deallocated or the
88	 * process is exiting, don't try to grab anything.  The page table
89	 * usage in that process can be messed up.
90	 */
91	vm = p->p_vmspace;
92	if ((p->p_flag & P_WEXIT))
93		return EFAULT;
94
95	mtx_lock(&vm_mtx);
96	if (vm->vm_refcnt < 1) {
97		mtx_unlock(&vm_mtx);
98		return EFAULT;
99	}
100	++vm->vm_refcnt;
101	/*
102	 * The map we want...
103	 */
104	map = &vm->vm_map;
105
106	writing = uio->uio_rw == UIO_WRITE;
107	reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) : VM_PROT_READ;
108
109	kva = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
110
111	/*
112	 * Only map in one page at a time.  We don't have to, but it
113	 * makes things easier.  This way is trivial - right?
114	 */
115	do {
116		vm_map_t tmap;
117		vm_offset_t uva;
118		int page_offset;		/* offset into page */
119		vm_map_entry_t out_entry;
120		vm_prot_t out_prot;
121		boolean_t wired;
122		vm_pindex_t pindex;
123		u_int len;
124		vm_page_t m;
125
126		object = NULL;
127
128		uva = (vm_offset_t) uio->uio_offset;
129
130		/*
131		 * Get the page number of this segment.
132		 */
133		pageno = trunc_page(uva);
134		page_offset = uva - pageno;
135
136		/*
137		 * How many bytes to copy
138		 */
139		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
140
141		/*
142		 * Fault the page on behalf of the process
143		 */
144		error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL);
145		if (error) {
146			error = EFAULT;
147			break;
148		}
149
150		/*
151		 * Now we need to get the page.  out_entry, out_prot, wired,
152		 * and single_use aren't used.  One would think the vm code
153		 * would be a *bit* nicer...  We use tmap because
154		 * vm_map_lookup() can change the map argument.
155		 */
156		tmap = map;
157		error = vm_map_lookup(&tmap, pageno, reqprot,
158			      &out_entry, &object, &pindex, &out_prot,
159			      &wired);
160
161		if (error) {
162			error = EFAULT;
163
164			/*
165			 * Make sure that there is no residue in 'object' from
166			 * an error return on vm_map_lookup.
167			 */
168			object = NULL;
169
170			break;
171		}
172
173		m = vm_page_lookup(object, pindex);
174
175		/* Allow fallback to backing objects if we are reading */
176
177		while (m == NULL && !writing && object->backing_object) {
178
179		  pindex += OFF_TO_IDX(object->backing_object_offset);
180		  object = object->backing_object;
181
182		  m = vm_page_lookup(object, pindex);
183		}
184
185		if (m == NULL) {
186			error = EFAULT;
187
188			/*
189			 * Make sure that there is no residue in 'object' from
190			 * an error return on vm_map_lookup.
191			 */
192			object = NULL;
193
194			vm_map_lookup_done(tmap, out_entry);
195
196			break;
197		}
198
199		/*
200		 * Wire the page into memory
201		 */
202		vm_page_wire(m);
203
204		/*
205		 * We're done with tmap now.
206		 * But reference the object first, so that we won't loose
207		 * it.
208		 */
209		vm_object_reference(object);
210		vm_map_lookup_done(tmap, out_entry);
211
212		pmap_kenter(kva, VM_PAGE_TO_PHYS(m));
213
214		/*
215		 * Now do the i/o move.
216		 */
217		mtx_unlock(&vm_mtx);
218		error = uiomove((caddr_t)(kva + page_offset), len, uio);
219		mtx_lock(&vm_mtx);
220
221		pmap_kremove(kva);
222
223		/*
224		 * release the page and the object
225		 */
226		vm_page_unwire(m, 1);
227		vm_object_deallocate(object);
228
229		object = NULL;
230
231	} while (error == 0 && uio->uio_resid > 0);
232
233	if (object)
234		vm_object_deallocate(object);
235
236	kmem_free(kernel_map, kva, PAGE_SIZE);
237	vmspace_free(vm);
238	mtx_unlock(&vm_mtx);
239	return (error);
240}
241
242/*
243 * Copy data in and out of the target process.
244 * We do this by mapping the process's page into
245 * the kernel and then doing a uiomove direct
246 * from the kernel address space.
247 */
248int
249procfs_domem(curp, p, pfs, uio)
250	struct proc *curp;
251	struct proc *p;
252	struct pfsnode *pfs;
253	struct uio *uio;
254{
255
256	if (uio->uio_resid == 0)
257		return (0);
258
259 	/*
260 	 * XXX
261 	 * We need to check for KMEM_GROUP because ps is sgid kmem;
262 	 * not allowing it here causes ps to not work properly.  Arguably,
263 	 * this is a bug with what ps does.  We only need to do this
264 	 * for Pmem nodes, and only if it's reading.  This is still not
265 	 * good, as it may still be possible to grab illicit data if
266 	 * a process somehow gets to be KMEM_GROUP.  Note that this also
267 	 * means that KMEM_GROUP can't change without editing procfs.h!
268 	 * All in all, quite yucky.
269 	 */
270
271 	if (p_can(curp, p, P_CAN_DEBUG, NULL) &&
272	    !(uio->uio_rw == UIO_READ &&
273	      procfs_kmemaccess(curp)))
274 		return EPERM;
275
276	return (procfs_rwmem(curp, p, uio));
277}
278
279/*
280 * Given process (p), find the vnode from which
281 * its text segment is being executed.
282 *
283 * It would be nice to grab this information from
284 * the VM system, however, there is no sure-fire
285 * way of doing that.  Instead, fork(), exec() and
286 * wait() all maintain the p_textvp field in the
287 * process proc structure which contains a held
288 * reference to the exec'ed vnode.
289 *
290 * XXX - Currently, this is not not used, as the
291 * /proc/pid/file object exposes an information leak
292 * that shouldn't happen.  Using a mount option would
293 * make it configurable on a per-system (or, at least,
294 * per-mount) basis; however, that's not really best.
295 * The best way to do it, I think, would be as an
296 * ioctl; this would restrict it to the uid running
297 * program, or root, which seems a reasonable compromise.
298 * However, the number of applications for this is
299 * minimal, if it can't be seen in the filesytem space,
300 * and doint it as an ioctl makes it somewhat less
301 * useful due to the, well, inelegance.
302 *
303 */
304struct vnode *
305procfs_findtextvp(p)
306	struct proc *p;
307{
308
309	return (p->p_textvp);
310}
311
312int procfs_kmemaccess(curp)
313	struct proc *curp;
314{
315	int i;
316	struct ucred *cred;
317
318	cred = curp->p_ucred;
319	if (suser(curp))
320		return 1;
321
322	/* XXX: Why isn't this done with file-perms ??? */
323	for (i = 0; i < cred->cr_ngroups; i++)
324		if (cred->cr_groups[i] == KMEM_GROUP)
325			return 1;
326
327	return 0;
328}
329