procfs_mem.c revision 53045
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1993 Jan-Simon Pendry
31541Srgrimes * Copyright (c) 1993 Sean Eric Fagan
41541Srgrimes * Copyright (c) 1993
51541Srgrimes *	The Regents of the University of California.  All rights reserved.
61541Srgrimes *
71541Srgrimes * This code is derived from software contributed to Berkeley by
81541Srgrimes * Jan-Simon Pendry and Sean Eric Fagan.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 3. All advertising materials mentioning features or use of this software
191541Srgrimes *    must display the following acknowledgement:
201541Srgrimes *	This product includes software developed by the University of
211541Srgrimes *	California, Berkeley and its contributors.
221541Srgrimes * 4. Neither the name of the University nor the names of its contributors
231541Srgrimes *    may be used to endorse or promote products derived from this software
241541Srgrimes *    without specific prior written permission.
251541Srgrimes *
261541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
271541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
281541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
291541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
301541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
311541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
321541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
331541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
341541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
351541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
361541Srgrimes * SUCH DAMAGE.
371541Srgrimes *
3822521Sdyson *	@(#)procfs_mem.c	8.5 (Berkeley) 6/15/94
391541Srgrimes *
4050477Speter * $FreeBSD: head/sys/fs/procfs/procfs_mem.c 53045 1999-11-09 01:44:28Z alc $
411541Srgrimes */
421541Srgrimes
431541Srgrimes/*
441541Srgrimes * This is a lightly hacked and merged version
451541Srgrimes * of sef's pread/pwrite functions
461541Srgrimes */
471541Srgrimes
481541Srgrimes#include <sys/param.h>
491541Srgrimes#include <sys/systm.h>
501541Srgrimes#include <sys/proc.h>
511541Srgrimes#include <sys/vnode.h>
521541Srgrimes#include <miscfs/procfs/procfs.h>
531541Srgrimes#include <vm/vm.h>
5412662Sdg#include <vm/vm_param.h>
5522521Sdyson#include <sys/lock.h>
5612662Sdg#include <vm/pmap.h>
5712662Sdg#include <vm/vm_map.h>
581541Srgrimes#include <vm/vm_kern.h>
5912662Sdg#include <vm/vm_object.h>
601541Srgrimes#include <vm/vm_page.h>
6112662Sdg#include <vm/vm_extern.h>
6213608Speter#include <sys/user.h>
6336168Stegge#include <sys/ptrace.h>
641541Srgrimes
6536168Steggestatic int	procfs_rwmem __P((struct proc *curp,
6636168Stegge				  struct proc *p, struct uio *uio));
6712595Sbde
681541Srgrimesstatic int
6936168Steggeprocfs_rwmem(curp, p, uio)
7036168Stegge	struct proc *curp;
711541Srgrimes	struct proc *p;
721541Srgrimes	struct uio *uio;
731541Srgrimes{
741541Srgrimes	int error;
751541Srgrimes	int writing;
7616308Sdyson	struct vmspace *vm;
7716889Sdyson	vm_map_t map;
7816889Sdyson	vm_object_t object = NULL;
7916889Sdyson	vm_offset_t pageno = 0;		/* page number */
8024666Sdyson	vm_prot_t reqprot;
8124666Sdyson	vm_offset_t kva;
821541Srgrimes
8316308Sdyson	/*
8416308Sdyson	 * if the vmspace is in the midst of being deallocated or the
8516308Sdyson	 * process is exiting, don't try to grab anything.  The page table
8616308Sdyson	 * usage in that process can be messed up.
8716308Sdyson	 */
8816308Sdyson	vm = p->p_vmspace;
8916308Sdyson	if ((p->p_flag & P_WEXIT) || (vm->vm_refcnt < 1))
9016308Sdyson		return EFAULT;
9116308Sdyson	++vm->vm_refcnt;
9216889Sdyson	/*
9316889Sdyson	 * The map we want...
9416889Sdyson	 */
9516889Sdyson	map = &vm->vm_map;
9616308Sdyson
971541Srgrimes	writing = uio->uio_rw == UIO_WRITE;
9824666Sdyson	reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) : VM_PROT_READ;
991541Srgrimes
10024666Sdyson	kva = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
10124666Sdyson
1021541Srgrimes	/*
1031541Srgrimes	 * Only map in one page at a time.  We don't have to, but it
1041541Srgrimes	 * makes things easier.  This way is trivial - right?
1051541Srgrimes	 */
1061541Srgrimes	do {
10716889Sdyson		vm_map_t tmap;
1081541Srgrimes		vm_offset_t uva;
1091541Srgrimes		int page_offset;		/* offset into page */
1101541Srgrimes		vm_map_entry_t out_entry;
1111541Srgrimes		vm_prot_t out_prot;
11232702Sdyson		boolean_t wired;
11312904Sbde		vm_pindex_t pindex;
1141541Srgrimes		u_int len;
11524666Sdyson		vm_page_t m;
1161541Srgrimes
11716889Sdyson		object = NULL;
11816889Sdyson
1191541Srgrimes		uva = (vm_offset_t) uio->uio_offset;
1201541Srgrimes
1211541Srgrimes		/*
1221541Srgrimes		 * Get the page number of this segment.
1231541Srgrimes		 */
1241541Srgrimes		pageno = trunc_page(uva);
1251541Srgrimes		page_offset = uva - pageno;
1261541Srgrimes
1271541Srgrimes		/*
1281541Srgrimes		 * How many bytes to copy
1291541Srgrimes		 */
1301541Srgrimes		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
1311541Srgrimes
13213627Speter		if (uva >= VM_MAXUSER_ADDRESS) {
13324666Sdyson			vm_offset_t tkva;
13424666Sdyson
13536168Stegge			if (writing ||
13636168Stegge			    uva >= VM_MAXUSER_ADDRESS + UPAGES * PAGE_SIZE ||
13736168Stegge			    (ptrace_read_u_check(p,
13836168Stegge						 uva - (vm_offset_t) VM_MAXUSER_ADDRESS,
13936168Stegge						 (size_t) len) &&
14036168Stegge			     !procfs_kmemaccess(curp))) {
14113627Speter				error = 0;
14213627Speter				break;
14313627Speter			}
14413627Speter
14513627Speter			/* we are reading the "U area", force it into core */
14613627Speter			PHOLD(p);
14713627Speter
14813627Speter			/* sanity check */
14913627Speter			if (!(p->p_flag & P_INMEM)) {
15013627Speter				/* aiee! */
15116889Sdyson				PRELE(p);
15213627Speter				error = EFAULT;
15313627Speter				break;
15413627Speter			}
15513627Speter
15613627Speter			/* populate the ptrace/procfs area */
15713627Speter			p->p_addr->u_kproc.kp_proc = *p;
15813627Speter			fill_eproc (p, &p->p_addr->u_kproc.kp_eproc);
15913627Speter
16013627Speter			/* locate the in-core address */
16137649Sbde			tkva = (uintptr_t)p->p_addr + uva - VM_MAXUSER_ADDRESS;
16213627Speter
16313627Speter			/* transfer it */
16424666Sdyson			error = uiomove((caddr_t)tkva, len, uio);
16513627Speter
16613627Speter			/* let the pages go */
16713627Speter			PRELE(p);
16813627Speter
16913627Speter			continue;
17013627Speter		}
17113627Speter
1721541Srgrimes		/*
17324666Sdyson		 * Fault the page on behalf of the process
1741541Srgrimes		 */
17553045Salc		error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL);
17624666Sdyson		if (error) {
17724666Sdyson			error = EFAULT;
17824666Sdyson			break;
1791541Srgrimes		}
1801541Srgrimes
1811541Srgrimes		/*
1821541Srgrimes		 * Now we need to get the page.  out_entry, out_prot, wired,
1831541Srgrimes		 * and single_use aren't used.  One would think the vm code
1841541Srgrimes		 * would be a *bit* nicer...  We use tmap because
1851541Srgrimes		 * vm_map_lookup() can change the map argument.
1861541Srgrimes		 */
1871541Srgrimes		tmap = map;
18824666Sdyson		error = vm_map_lookup(&tmap, pageno, reqprot,
18916889Sdyson			      &out_entry, &object, &pindex, &out_prot,
19032702Sdyson			      &wired);
19116889Sdyson
19216889Sdyson		if (error) {
19324666Sdyson			error = EFAULT;
19424666Sdyson
19516889Sdyson			/*
19616889Sdyson			 * Make sure that there is no residue in 'object' from
19716889Sdyson			 * an error return on vm_map_lookup.
19816889Sdyson			 */
19916889Sdyson			object = NULL;
20024666Sdyson
20116889Sdyson			break;
20216889Sdyson		}
20316889Sdyson
20424666Sdyson		m = vm_page_lookup(object, pindex);
20525055Sdyson
20625055Sdyson		/* Allow fallback to backing objects if we are reading */
20725055Sdyson
20825055Sdyson		while (m == NULL && !writing && object->backing_object) {
20925055Sdyson
21025055Sdyson		  pindex += OFF_TO_IDX(object->backing_object_offset);
21125055Sdyson		  object = object->backing_object;
21225055Sdyson
21325055Sdyson		  m = vm_page_lookup(object, pindex);
21425055Sdyson		}
21525055Sdyson
21624666Sdyson		if (m == NULL) {
21724666Sdyson			error = EFAULT;
21824666Sdyson
21924666Sdyson			/*
22024666Sdyson			 * Make sure that there is no residue in 'object' from
22124666Sdyson			 * an error return on vm_map_lookup.
22224666Sdyson			 */
22324666Sdyson			object = NULL;
22424666Sdyson
22525055Sdyson			vm_map_lookup_done(tmap, out_entry);
22625055Sdyson
22724666Sdyson			break;
22824666Sdyson		}
22924666Sdyson
2301541Srgrimes		/*
23124666Sdyson		 * Wire the page into memory
23224666Sdyson		 */
23324666Sdyson		vm_page_wire(m);
23424666Sdyson
23524666Sdyson		/*
2361541Srgrimes		 * We're done with tmap now.
23716889Sdyson		 * But reference the object first, so that we won't loose
23816889Sdyson		 * it.
2391541Srgrimes		 */
24016889Sdyson		vm_object_reference(object);
24116889Sdyson		vm_map_lookup_done(tmap, out_entry);
2428876Srgrimes
24324666Sdyson		pmap_kenter(kva, VM_PAGE_TO_PHYS(m));
24424666Sdyson
2451541Srgrimes		/*
24624666Sdyson		 * Now do the i/o move.
2471541Srgrimes		 */
24824666Sdyson		error = uiomove((caddr_t)(kva + page_offset), len, uio);
2491541Srgrimes
25024666Sdyson		pmap_kremove(kva);
2511541Srgrimes
25216889Sdyson		/*
25324666Sdyson		 * release the page and the object
25416889Sdyson		 */
25540700Sdg		vm_page_unwire(m, 1);
25624666Sdyson		vm_object_deallocate(object);
2571541Srgrimes
25816889Sdyson		object = NULL;
25916889Sdyson
2601541Srgrimes	} while (error == 0 && uio->uio_resid > 0);
2611541Srgrimes
26216889Sdyson	if (object)
26316889Sdyson		vm_object_deallocate(object);
26416889Sdyson
26524666Sdyson	kmem_free(kernel_map, kva, PAGE_SIZE);
26616308Sdyson	vmspace_free(vm);
2671541Srgrimes	return (error);
2681541Srgrimes}
2691541Srgrimes
2701541Srgrimes/*
2711541Srgrimes * Copy data in and out of the target process.
2721541Srgrimes * We do this by mapping the process's page into
2731541Srgrimes * the kernel and then doing a uiomove direct
2741541Srgrimes * from the kernel address space.
2751541Srgrimes */
2761541Srgrimesint
2771541Srgrimesprocfs_domem(curp, p, pfs, uio)
2781541Srgrimes	struct proc *curp;
2791541Srgrimes	struct proc *p;
2801541Srgrimes	struct pfsnode *pfs;
2811541Srgrimes	struct uio *uio;
2821541Srgrimes{
2831541Srgrimes
2841541Srgrimes	if (uio->uio_resid == 0)
2851541Srgrimes		return (0);
2861541Srgrimes
28728086Ssef 	/*
28828086Ssef 	 * XXX
28928086Ssef 	 * We need to check for KMEM_GROUP because ps is sgid kmem;
29028086Ssef 	 * not allowing it here causes ps to not work properly.  Arguably,
29128086Ssef 	 * this is a bug with what ps does.  We only need to do this
29228086Ssef 	 * for Pmem nodes, and only if it's reading.  This is still not
29328086Ssef 	 * good, as it may still be possible to grab illicit data if
29428086Ssef 	 * a process somehow gets to be KMEM_GROUP.  Note that this also
29528086Ssef 	 * means that KMEM_GROUP can't change without editing procfs.h!
29628086Ssef 	 * All in all, quite yucky.
29728086Ssef 	 */
29828086Ssef
29928086Ssef 	if (!CHECKIO(curp, p) &&
30036168Stegge	    !(uio->uio_rw == UIO_READ &&
30136168Stegge	      procfs_kmemaccess(curp)))
30228086Ssef 		return EPERM;
30328086Ssef
30436168Stegge	return (procfs_rwmem(curp, p, uio));
3051541Srgrimes}
3061541Srgrimes
3071541Srgrimes/*
3081541Srgrimes * Given process (p), find the vnode from which
30935256Sdes * its text segment is being executed.
3101541Srgrimes *
3111541Srgrimes * It would be nice to grab this information from
3121541Srgrimes * the VM system, however, there is no sure-fire
3131541Srgrimes * way of doing that.  Instead, fork(), exec() and
3141541Srgrimes * wait() all maintain the p_textvp field in the
3151541Srgrimes * process proc structure which contains a held
3161541Srgrimes * reference to the exec'ed vnode.
31752990Ssef *
31852990Ssef * XXX - Currently, this is not not used, as the
31952990Ssef * /proc/pid/file object exposes an information leak
32052990Ssef * that shouldn't happen.  Using a mount option would
32152990Ssef * make it configurable on a per-system (or, at least,
32252990Ssef * per-mount) basis; however, that's not really best.
32352990Ssef * The best way to do it, I think, would be as an
32452990Ssef * ioctl; this would restrict it to the uid running
32552990Ssef * program, or root, which seems a reasonable compromise.
32652990Ssef * However, the number of applications for this is
32752990Ssef * minimal, if it can't be seen in the filesytem space,
32852990Ssef * and doint it as an ioctl makes it somewhat less
32952990Ssef * useful due to the, well, inelegance.
33052990Ssef *
3311541Srgrimes */
3321541Srgrimesstruct vnode *
3331541Srgrimesprocfs_findtextvp(p)
3341541Srgrimes	struct proc *p;
3351541Srgrimes{
33622521Sdyson
3371541Srgrimes	return (p->p_textvp);
3381541Srgrimes}
33936168Stegge
34036168Steggeint procfs_kmemaccess(curp)
34136168Stegge	struct proc *curp;
34236168Stegge{
34336168Stegge	int i;
34436168Stegge	struct ucred *cred;
34536168Stegge
34636168Stegge	cred = curp->p_cred->pc_ucred;
34746116Sphk	if (suser(curp))
34836168Stegge		return 1;
34946116Sphk
35046116Sphk	/* XXX: Why isn't this done with file-perms ??? */
35136168Stegge	for (i = 0; i < cred->cr_ngroups; i++)
35246116Sphk		if (cred->cr_groups[i] == KMEM_GROUP)
35336168Stegge			return 1;
35436168Stegge
35536168Stegge	return 0;
35636168Stegge}
357