procfs_mem.c revision 50477
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1993 Jan-Simon Pendry
31541Srgrimes * Copyright (c) 1993 Sean Eric Fagan
41541Srgrimes * Copyright (c) 1993
51541Srgrimes *	The Regents of the University of California.  All rights reserved.
61541Srgrimes *
71541Srgrimes * This code is derived from software contributed to Berkeley by
81541Srgrimes * Jan-Simon Pendry and Sean Eric Fagan.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 3. All advertising materials mentioning features or use of this software
191541Srgrimes *    must display the following acknowledgement:
201541Srgrimes *	This product includes software developed by the University of
211541Srgrimes *	California, Berkeley and its contributors.
221541Srgrimes * 4. Neither the name of the University nor the names of its contributors
231541Srgrimes *    may be used to endorse or promote products derived from this software
241541Srgrimes *    without specific prior written permission.
251541Srgrimes *
261541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
271541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
281541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
291541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
301541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
311541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
321541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
331541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
341541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
351541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
361541Srgrimes * SUCH DAMAGE.
371541Srgrimes *
3822521Sdyson *	@(#)procfs_mem.c	8.5 (Berkeley) 6/15/94
391541Srgrimes *
4050477Speter * $FreeBSD: head/sys/fs/procfs/procfs_mem.c 50477 1999-08-28 01:08:13Z peter $
411541Srgrimes */
421541Srgrimes
431541Srgrimes/*
441541Srgrimes * This is a lightly hacked and merged version
451541Srgrimes * of sef's pread/pwrite functions
461541Srgrimes */
471541Srgrimes
481541Srgrimes#include <sys/param.h>
491541Srgrimes#include <sys/systm.h>
501541Srgrimes#include <sys/proc.h>
511541Srgrimes#include <sys/vnode.h>
521541Srgrimes#include <miscfs/procfs/procfs.h>
531541Srgrimes#include <vm/vm.h>
5412662Sdg#include <vm/vm_param.h>
5512662Sdg#include <vm/vm_prot.h>
5622521Sdyson#include <sys/lock.h>
5712662Sdg#include <vm/pmap.h>
5812662Sdg#include <vm/vm_map.h>
591541Srgrimes#include <vm/vm_kern.h>
6012662Sdg#include <vm/vm_object.h>
611541Srgrimes#include <vm/vm_page.h>
6212662Sdg#include <vm/vm_extern.h>
6313608Speter#include <sys/user.h>
6436168Stegge#include <sys/ptrace.h>
651541Srgrimes
6636168Steggestatic int	procfs_rwmem __P((struct proc *curp,
6736168Stegge				  struct proc *p, struct uio *uio));
6812595Sbde
691541Srgrimesstatic int
7036168Steggeprocfs_rwmem(curp, p, uio)
7136168Stegge	struct proc *curp;
721541Srgrimes	struct proc *p;
731541Srgrimes	struct uio *uio;
741541Srgrimes{
751541Srgrimes	int error;
761541Srgrimes	int writing;
7716308Sdyson	struct vmspace *vm;
7816889Sdyson	vm_map_t map;
7916889Sdyson	vm_object_t object = NULL;
8016889Sdyson	vm_offset_t pageno = 0;		/* page number */
8124666Sdyson	vm_prot_t reqprot;
8224666Sdyson	vm_offset_t kva;
831541Srgrimes
8416308Sdyson	/*
8516308Sdyson	 * if the vmspace is in the midst of being deallocated or the
8616308Sdyson	 * process is exiting, don't try to grab anything.  The page table
8716308Sdyson	 * usage in that process can be messed up.
8816308Sdyson	 */
8916308Sdyson	vm = p->p_vmspace;
9016308Sdyson	if ((p->p_flag & P_WEXIT) || (vm->vm_refcnt < 1))
9116308Sdyson		return EFAULT;
9216308Sdyson	++vm->vm_refcnt;
9316889Sdyson	/*
9416889Sdyson	 * The map we want...
9516889Sdyson	 */
9616889Sdyson	map = &vm->vm_map;
9716308Sdyson
981541Srgrimes	writing = uio->uio_rw == UIO_WRITE;
9924666Sdyson	reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) : VM_PROT_READ;
1001541Srgrimes
10124666Sdyson	kva = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
10224666Sdyson
1031541Srgrimes	/*
1041541Srgrimes	 * Only map in one page at a time.  We don't have to, but it
1051541Srgrimes	 * makes things easier.  This way is trivial - right?
1061541Srgrimes	 */
1071541Srgrimes	do {
10816889Sdyson		vm_map_t tmap;
1091541Srgrimes		vm_offset_t uva;
1101541Srgrimes		int page_offset;		/* offset into page */
1111541Srgrimes		vm_map_entry_t out_entry;
1121541Srgrimes		vm_prot_t out_prot;
11332702Sdyson		boolean_t wired;
11412904Sbde		vm_pindex_t pindex;
1151541Srgrimes		u_int len;
11624666Sdyson		vm_page_t m;
1171541Srgrimes
11816889Sdyson		object = NULL;
11916889Sdyson
1201541Srgrimes		uva = (vm_offset_t) uio->uio_offset;
1211541Srgrimes
1221541Srgrimes		/*
1231541Srgrimes		 * Get the page number of this segment.
1241541Srgrimes		 */
1251541Srgrimes		pageno = trunc_page(uva);
1261541Srgrimes		page_offset = uva - pageno;
1271541Srgrimes
1281541Srgrimes		/*
1291541Srgrimes		 * How many bytes to copy
1301541Srgrimes		 */
1311541Srgrimes		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
1321541Srgrimes
13313627Speter		if (uva >= VM_MAXUSER_ADDRESS) {
13424666Sdyson			vm_offset_t tkva;
13524666Sdyson
13636168Stegge			if (writing ||
13736168Stegge			    uva >= VM_MAXUSER_ADDRESS + UPAGES * PAGE_SIZE ||
13836168Stegge			    (ptrace_read_u_check(p,
13936168Stegge						 uva - (vm_offset_t) VM_MAXUSER_ADDRESS,
14036168Stegge						 (size_t) len) &&
14136168Stegge			     !procfs_kmemaccess(curp))) {
14213627Speter				error = 0;
14313627Speter				break;
14413627Speter			}
14513627Speter
14613627Speter			/* we are reading the "U area", force it into core */
14713627Speter			PHOLD(p);
14813627Speter
14913627Speter			/* sanity check */
15013627Speter			if (!(p->p_flag & P_INMEM)) {
15113627Speter				/* aiee! */
15216889Sdyson				PRELE(p);
15313627Speter				error = EFAULT;
15413627Speter				break;
15513627Speter			}
15613627Speter
15713627Speter			/* populate the ptrace/procfs area */
15813627Speter			p->p_addr->u_kproc.kp_proc = *p;
15913627Speter			fill_eproc (p, &p->p_addr->u_kproc.kp_eproc);
16013627Speter
16113627Speter			/* locate the in-core address */
16237649Sbde			tkva = (uintptr_t)p->p_addr + uva - VM_MAXUSER_ADDRESS;
16313627Speter
16413627Speter			/* transfer it */
16524666Sdyson			error = uiomove((caddr_t)tkva, len, uio);
16613627Speter
16713627Speter			/* let the pages go */
16813627Speter			PRELE(p);
16913627Speter
17013627Speter			continue;
17113627Speter		}
17213627Speter
1731541Srgrimes		/*
17424666Sdyson		 * Fault the page on behalf of the process
1751541Srgrimes		 */
17624666Sdyson		error = vm_fault(map, pageno, reqprot, FALSE);
17724666Sdyson		if (error) {
17824666Sdyson			error = EFAULT;
17924666Sdyson			break;
1801541Srgrimes		}
1811541Srgrimes
1821541Srgrimes		/*
1831541Srgrimes		 * Now we need to get the page.  out_entry, out_prot, wired,
1841541Srgrimes		 * and single_use aren't used.  One would think the vm code
1851541Srgrimes		 * would be a *bit* nicer...  We use tmap because
1861541Srgrimes		 * vm_map_lookup() can change the map argument.
1871541Srgrimes		 */
1881541Srgrimes		tmap = map;
18924666Sdyson		error = vm_map_lookup(&tmap, pageno, reqprot,
19016889Sdyson			      &out_entry, &object, &pindex, &out_prot,
19132702Sdyson			      &wired);
19216889Sdyson
19316889Sdyson		if (error) {
19424666Sdyson			error = EFAULT;
19524666Sdyson
19616889Sdyson			/*
19716889Sdyson			 * Make sure that there is no residue in 'object' from
19816889Sdyson			 * an error return on vm_map_lookup.
19916889Sdyson			 */
20016889Sdyson			object = NULL;
20124666Sdyson
20216889Sdyson			break;
20316889Sdyson		}
20416889Sdyson
20524666Sdyson		m = vm_page_lookup(object, pindex);
20625055Sdyson
20725055Sdyson		/* Allow fallback to backing objects if we are reading */
20825055Sdyson
20925055Sdyson		while (m == NULL && !writing && object->backing_object) {
21025055Sdyson
21125055Sdyson		  pindex += OFF_TO_IDX(object->backing_object_offset);
21225055Sdyson		  object = object->backing_object;
21325055Sdyson
21425055Sdyson		  m = vm_page_lookup(object, pindex);
21525055Sdyson		}
21625055Sdyson
21724666Sdyson		if (m == NULL) {
21824666Sdyson			error = EFAULT;
21924666Sdyson
22024666Sdyson			/*
22124666Sdyson			 * Make sure that there is no residue in 'object' from
22224666Sdyson			 * an error return on vm_map_lookup.
22324666Sdyson			 */
22424666Sdyson			object = NULL;
22524666Sdyson
22625055Sdyson			vm_map_lookup_done(tmap, out_entry);
22725055Sdyson
22824666Sdyson			break;
22924666Sdyson		}
23024666Sdyson
2311541Srgrimes		/*
23224666Sdyson		 * Wire the page into memory
23324666Sdyson		 */
23424666Sdyson		vm_page_wire(m);
23524666Sdyson
23624666Sdyson		/*
2371541Srgrimes		 * We're done with tmap now.
23816889Sdyson		 * But reference the object first, so that we won't loose
23916889Sdyson		 * it.
2401541Srgrimes		 */
24116889Sdyson		vm_object_reference(object);
24216889Sdyson		vm_map_lookup_done(tmap, out_entry);
2438876Srgrimes
24424666Sdyson		pmap_kenter(kva, VM_PAGE_TO_PHYS(m));
24524666Sdyson
2461541Srgrimes		/*
24724666Sdyson		 * Now do the i/o move.
2481541Srgrimes		 */
24924666Sdyson		error = uiomove((caddr_t)(kva + page_offset), len, uio);
2501541Srgrimes
25124666Sdyson		pmap_kremove(kva);
2521541Srgrimes
25316889Sdyson		/*
25424666Sdyson		 * release the page and the object
25516889Sdyson		 */
25640700Sdg		vm_page_unwire(m, 1);
25724666Sdyson		vm_object_deallocate(object);
2581541Srgrimes
25916889Sdyson		object = NULL;
26016889Sdyson
2611541Srgrimes	} while (error == 0 && uio->uio_resid > 0);
2621541Srgrimes
26316889Sdyson	if (object)
26416889Sdyson		vm_object_deallocate(object);
26516889Sdyson
26624666Sdyson	kmem_free(kernel_map, kva, PAGE_SIZE);
26716308Sdyson	vmspace_free(vm);
2681541Srgrimes	return (error);
2691541Srgrimes}
2701541Srgrimes
2711541Srgrimes/*
2721541Srgrimes * Copy data in and out of the target process.
2731541Srgrimes * We do this by mapping the process's page into
2741541Srgrimes * the kernel and then doing a uiomove direct
2751541Srgrimes * from the kernel address space.
2761541Srgrimes */
2771541Srgrimesint
2781541Srgrimesprocfs_domem(curp, p, pfs, uio)
2791541Srgrimes	struct proc *curp;
2801541Srgrimes	struct proc *p;
2811541Srgrimes	struct pfsnode *pfs;
2821541Srgrimes	struct uio *uio;
2831541Srgrimes{
2841541Srgrimes
2851541Srgrimes	if (uio->uio_resid == 0)
2861541Srgrimes		return (0);
2871541Srgrimes
28828086Ssef 	/*
28928086Ssef 	 * XXX
29028086Ssef 	 * We need to check for KMEM_GROUP because ps is sgid kmem;
29128086Ssef 	 * not allowing it here causes ps to not work properly.  Arguably,
29228086Ssef 	 * this is a bug with what ps does.  We only need to do this
29328086Ssef 	 * for Pmem nodes, and only if it's reading.  This is still not
29428086Ssef 	 * good, as it may still be possible to grab illicit data if
29528086Ssef 	 * a process somehow gets to be KMEM_GROUP.  Note that this also
29628086Ssef 	 * means that KMEM_GROUP can't change without editing procfs.h!
29728086Ssef 	 * All in all, quite yucky.
29828086Ssef 	 */
29928086Ssef
30028086Ssef 	if (!CHECKIO(curp, p) &&
30136168Stegge	    !(uio->uio_rw == UIO_READ &&
30236168Stegge	      procfs_kmemaccess(curp)))
30328086Ssef 		return EPERM;
30428086Ssef
30536168Stegge	return (procfs_rwmem(curp, p, uio));
3061541Srgrimes}
3071541Srgrimes
3081541Srgrimes/*
3091541Srgrimes * Given process (p), find the vnode from which
31035256Sdes * its text segment is being executed.
3111541Srgrimes *
3121541Srgrimes * It would be nice to grab this information from
3131541Srgrimes * the VM system, however, there is no sure-fire
3141541Srgrimes * way of doing that.  Instead, fork(), exec() and
3151541Srgrimes * wait() all maintain the p_textvp field in the
3161541Srgrimes * process proc structure which contains a held
3171541Srgrimes * reference to the exec'ed vnode.
3181541Srgrimes */
3191541Srgrimesstruct vnode *
3201541Srgrimesprocfs_findtextvp(p)
3211541Srgrimes	struct proc *p;
3221541Srgrimes{
32322521Sdyson
3241541Srgrimes	return (p->p_textvp);
3251541Srgrimes}
32636168Stegge
32736168Steggeint procfs_kmemaccess(curp)
32836168Stegge	struct proc *curp;
32936168Stegge{
33036168Stegge	int i;
33136168Stegge	struct ucred *cred;
33236168Stegge
33336168Stegge	cred = curp->p_cred->pc_ucred;
33446116Sphk	if (suser(curp))
33536168Stegge		return 1;
33646116Sphk
33746116Sphk	/* XXX: Why isn't this done with file-perms ??? */
33836168Stegge	for (i = 0; i < cred->cr_ngroups; i++)
33946116Sphk		if (cred->cr_groups[i] == KMEM_GROUP)
34036168Stegge			return 1;
34136168Stegge
34236168Stegge	return 0;
34336168Stegge}
344