procfs_mem.c revision 77031
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1993 Jan-Simon Pendry
31541Srgrimes * Copyright (c) 1993 Sean Eric Fagan
41541Srgrimes * Copyright (c) 1993
51541Srgrimes *	The Regents of the University of California.  All rights reserved.
61541Srgrimes *
71541Srgrimes * This code is derived from software contributed to Berkeley by
81541Srgrimes * Jan-Simon Pendry and Sean Eric Fagan.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 3. All advertising materials mentioning features or use of this software
191541Srgrimes *    must display the following acknowledgement:
201541Srgrimes *	This product includes software developed by the University of
211541Srgrimes *	California, Berkeley and its contributors.
221541Srgrimes * 4. Neither the name of the University nor the names of its contributors
231541Srgrimes *    may be used to endorse or promote products derived from this software
241541Srgrimes *    without specific prior written permission.
251541Srgrimes *
261541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
271541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
281541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
291541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
301541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
311541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
321541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
331541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
341541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
351541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
361541Srgrimes * SUCH DAMAGE.
371541Srgrimes *
3822521Sdyson *	@(#)procfs_mem.c	8.5 (Berkeley) 6/15/94
391541Srgrimes *
4050477Speter * $FreeBSD: head/sys/fs/procfs/procfs_mem.c 77031 2001-05-23 09:42:29Z ru $
411541Srgrimes */
421541Srgrimes
431541Srgrimes/*
441541Srgrimes * This is a lightly hacked and merged version
451541Srgrimes * of sef's pread/pwrite functions
461541Srgrimes */
471541Srgrimes
481541Srgrimes#include <sys/param.h>
491541Srgrimes#include <sys/systm.h>
5076166Smarkm#include <sys/lock.h>
5176827Salfred#include <sys/mutex.h>
521541Srgrimes#include <sys/proc.h>
5376166Smarkm#include <sys/ptrace.h>
5476166Smarkm#include <sys/user.h>
551541Srgrimes#include <sys/vnode.h>
5676166Smarkm
5777031Sru#include <fs/procfs/procfs.h>
5876166Smarkm
591541Srgrimes#include <vm/vm.h>
6012662Sdg#include <vm/vm_param.h>
6112662Sdg#include <vm/pmap.h>
6254908Seivind#include <vm/vm_extern.h>
6312662Sdg#include <vm/vm_map.h>
641541Srgrimes#include <vm/vm_kern.h>
6512662Sdg#include <vm/vm_object.h>
661541Srgrimes#include <vm/vm_page.h>
671541Srgrimes
6836168Steggestatic int	procfs_rwmem __P((struct proc *curp,
6936168Stegge				  struct proc *p, struct uio *uio));
7012595Sbde
711541Srgrimesstatic int
7236168Steggeprocfs_rwmem(curp, p, uio)
7336168Stegge	struct proc *curp;
741541Srgrimes	struct proc *p;
751541Srgrimes	struct uio *uio;
761541Srgrimes{
771541Srgrimes	int error;
781541Srgrimes	int writing;
7916308Sdyson	struct vmspace *vm;
8016889Sdyson	vm_map_t map;
8116889Sdyson	vm_object_t object = NULL;
8216889Sdyson	vm_offset_t pageno = 0;		/* page number */
8324666Sdyson	vm_prot_t reqprot;
8424666Sdyson	vm_offset_t kva;
851541Srgrimes
8616308Sdyson	/*
8716308Sdyson	 * if the vmspace is in the midst of being deallocated or the
8816308Sdyson	 * process is exiting, don't try to grab anything.  The page table
8916308Sdyson	 * usage in that process can be messed up.
9016308Sdyson	 */
9116308Sdyson	vm = p->p_vmspace;
9276827Salfred	if ((p->p_flag & P_WEXIT))
9316308Sdyson		return EFAULT;
9476827Salfred
9576827Salfred	mtx_lock(&vm_mtx);
9676827Salfred	if (vm->vm_refcnt < 1) {
9776827Salfred		mtx_unlock(&vm_mtx);
9876827Salfred		return EFAULT;
9976827Salfred	}
10016308Sdyson	++vm->vm_refcnt;
10116889Sdyson	/*
10216889Sdyson	 * The map we want...
10316889Sdyson	 */
10416889Sdyson	map = &vm->vm_map;
10516308Sdyson
1061541Srgrimes	writing = uio->uio_rw == UIO_WRITE;
10724666Sdyson	reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) : VM_PROT_READ;
1081541Srgrimes
10924666Sdyson	kva = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
11024666Sdyson
1111541Srgrimes	/*
1121541Srgrimes	 * Only map in one page at a time.  We don't have to, but it
1131541Srgrimes	 * makes things easier.  This way is trivial - right?
1141541Srgrimes	 */
1151541Srgrimes	do {
11616889Sdyson		vm_map_t tmap;
1171541Srgrimes		vm_offset_t uva;
1181541Srgrimes		int page_offset;		/* offset into page */
1191541Srgrimes		vm_map_entry_t out_entry;
1201541Srgrimes		vm_prot_t out_prot;
12132702Sdyson		boolean_t wired;
12212904Sbde		vm_pindex_t pindex;
1231541Srgrimes		u_int len;
12424666Sdyson		vm_page_t m;
1251541Srgrimes
12616889Sdyson		object = NULL;
12716889Sdyson
1281541Srgrimes		uva = (vm_offset_t) uio->uio_offset;
1291541Srgrimes
1301541Srgrimes		/*
1311541Srgrimes		 * Get the page number of this segment.
1321541Srgrimes		 */
1331541Srgrimes		pageno = trunc_page(uva);
1341541Srgrimes		page_offset = uva - pageno;
1351541Srgrimes
1361541Srgrimes		/*
1371541Srgrimes		 * How many bytes to copy
1381541Srgrimes		 */
1391541Srgrimes		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
1401541Srgrimes
1411541Srgrimes		/*
14224666Sdyson		 * Fault the page on behalf of the process
1431541Srgrimes		 */
14453045Salc		error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL);
14524666Sdyson		if (error) {
14624666Sdyson			error = EFAULT;
14724666Sdyson			break;
1481541Srgrimes		}
1491541Srgrimes
1501541Srgrimes		/*
1511541Srgrimes		 * Now we need to get the page.  out_entry, out_prot, wired,
1521541Srgrimes		 * and single_use aren't used.  One would think the vm code
1531541Srgrimes		 * would be a *bit* nicer...  We use tmap because
1541541Srgrimes		 * vm_map_lookup() can change the map argument.
1551541Srgrimes		 */
1561541Srgrimes		tmap = map;
15724666Sdyson		error = vm_map_lookup(&tmap, pageno, reqprot,
15816889Sdyson			      &out_entry, &object, &pindex, &out_prot,
15932702Sdyson			      &wired);
16016889Sdyson
16116889Sdyson		if (error) {
16224666Sdyson			error = EFAULT;
16324666Sdyson
16416889Sdyson			/*
16516889Sdyson			 * Make sure that there is no residue in 'object' from
16616889Sdyson			 * an error return on vm_map_lookup.
16716889Sdyson			 */
16816889Sdyson			object = NULL;
16924666Sdyson
17016889Sdyson			break;
17116889Sdyson		}
17216889Sdyson
17324666Sdyson		m = vm_page_lookup(object, pindex);
17425055Sdyson
17525055Sdyson		/* Allow fallback to backing objects if we are reading */
17625055Sdyson
17725055Sdyson		while (m == NULL && !writing && object->backing_object) {
17825055Sdyson
17925055Sdyson		  pindex += OFF_TO_IDX(object->backing_object_offset);
18025055Sdyson		  object = object->backing_object;
18125055Sdyson
18225055Sdyson		  m = vm_page_lookup(object, pindex);
18325055Sdyson		}
18425055Sdyson
18524666Sdyson		if (m == NULL) {
18624666Sdyson			error = EFAULT;
18724666Sdyson
18824666Sdyson			/*
18924666Sdyson			 * Make sure that there is no residue in 'object' from
19024666Sdyson			 * an error return on vm_map_lookup.
19124666Sdyson			 */
19224666Sdyson			object = NULL;
19324666Sdyson
19425055Sdyson			vm_map_lookup_done(tmap, out_entry);
19525055Sdyson
19624666Sdyson			break;
19724666Sdyson		}
19824666Sdyson
1991541Srgrimes		/*
20024666Sdyson		 * Wire the page into memory
20124666Sdyson		 */
20224666Sdyson		vm_page_wire(m);
20324666Sdyson
20424666Sdyson		/*
2051541Srgrimes		 * We're done with tmap now.
20616889Sdyson		 * But reference the object first, so that we won't loose
20716889Sdyson		 * it.
2081541Srgrimes		 */
20916889Sdyson		vm_object_reference(object);
21016889Sdyson		vm_map_lookup_done(tmap, out_entry);
2118876Srgrimes
21224666Sdyson		pmap_kenter(kva, VM_PAGE_TO_PHYS(m));
21324666Sdyson
2141541Srgrimes		/*
21524666Sdyson		 * Now do the i/o move.
2161541Srgrimes		 */
21776827Salfred		mtx_unlock(&vm_mtx);
21824666Sdyson		error = uiomove((caddr_t)(kva + page_offset), len, uio);
21976827Salfred		mtx_lock(&vm_mtx);
2201541Srgrimes
22124666Sdyson		pmap_kremove(kva);
2221541Srgrimes
22316889Sdyson		/*
22424666Sdyson		 * release the page and the object
22516889Sdyson		 */
22640700Sdg		vm_page_unwire(m, 1);
22724666Sdyson		vm_object_deallocate(object);
2281541Srgrimes
22916889Sdyson		object = NULL;
23016889Sdyson
2311541Srgrimes	} while (error == 0 && uio->uio_resid > 0);
2321541Srgrimes
23316889Sdyson	if (object)
23416889Sdyson		vm_object_deallocate(object);
23516889Sdyson
23624666Sdyson	kmem_free(kernel_map, kva, PAGE_SIZE);
23716308Sdyson	vmspace_free(vm);
23876827Salfred	mtx_unlock(&vm_mtx);
2391541Srgrimes	return (error);
2401541Srgrimes}
2411541Srgrimes
2421541Srgrimes/*
2431541Srgrimes * Copy data in and out of the target process.
2441541Srgrimes * We do this by mapping the process's page into
2451541Srgrimes * the kernel and then doing a uiomove direct
2461541Srgrimes * from the kernel address space.
2471541Srgrimes */
2481541Srgrimesint
2491541Srgrimesprocfs_domem(curp, p, pfs, uio)
2501541Srgrimes	struct proc *curp;
2511541Srgrimes	struct proc *p;
2521541Srgrimes	struct pfsnode *pfs;
2531541Srgrimes	struct uio *uio;
2541541Srgrimes{
2551541Srgrimes
2561541Srgrimes	if (uio->uio_resid == 0)
2571541Srgrimes		return (0);
2581541Srgrimes
25928086Ssef 	/*
26028086Ssef 	 * XXX
26128086Ssef 	 * We need to check for KMEM_GROUP because ps is sgid kmem;
26228086Ssef 	 * not allowing it here causes ps to not work properly.  Arguably,
26328086Ssef 	 * this is a bug with what ps does.  We only need to do this
26428086Ssef 	 * for Pmem nodes, and only if it's reading.  This is still not
26528086Ssef 	 * good, as it may still be possible to grab illicit data if
26628086Ssef 	 * a process somehow gets to be KMEM_GROUP.  Note that this also
26728086Ssef 	 * means that KMEM_GROUP can't change without editing procfs.h!
26828086Ssef 	 * All in all, quite yucky.
26928086Ssef 	 */
27028086Ssef
27165237Srwatson 	if (p_can(curp, p, P_CAN_DEBUG, NULL) &&
27236168Stegge	    !(uio->uio_rw == UIO_READ &&
27336168Stegge	      procfs_kmemaccess(curp)))
27428086Ssef 		return EPERM;
27528086Ssef
27636168Stegge	return (procfs_rwmem(curp, p, uio));
2771541Srgrimes}
2781541Srgrimes
2791541Srgrimes/*
2801541Srgrimes * Given process (p), find the vnode from which
28135256Sdes * its text segment is being executed.
2821541Srgrimes *
2831541Srgrimes * It would be nice to grab this information from
2841541Srgrimes * the VM system, however, there is no sure-fire
2851541Srgrimes * way of doing that.  Instead, fork(), exec() and
2861541Srgrimes * wait() all maintain the p_textvp field in the
2871541Srgrimes * process proc structure which contains a held
2881541Srgrimes * reference to the exec'ed vnode.
28952990Ssef *
29052990Ssef * XXX - Currently, this is not not used, as the
29152990Ssef * /proc/pid/file object exposes an information leak
29252990Ssef * that shouldn't happen.  Using a mount option would
29352990Ssef * make it configurable on a per-system (or, at least,
29452990Ssef * per-mount) basis; however, that's not really best.
29552990Ssef * The best way to do it, I think, would be as an
29652990Ssef * ioctl; this would restrict it to the uid running
29752990Ssef * program, or root, which seems a reasonable compromise.
29852990Ssef * However, the number of applications for this is
29952990Ssef * minimal, if it can't be seen in the filesytem space,
30052990Ssef * and doint it as an ioctl makes it somewhat less
30152990Ssef * useful due to the, well, inelegance.
30252990Ssef *
3031541Srgrimes */
3041541Srgrimesstruct vnode *
3051541Srgrimesprocfs_findtextvp(p)
3061541Srgrimes	struct proc *p;
3071541Srgrimes{
30822521Sdyson
3091541Srgrimes	return (p->p_textvp);
3101541Srgrimes}
31136168Stegge
31236168Steggeint procfs_kmemaccess(curp)
31336168Stegge	struct proc *curp;
31436168Stegge{
31536168Stegge	int i;
31636168Stegge	struct ucred *cred;
31736168Stegge
31853503Sphk	cred = curp->p_ucred;
31946116Sphk	if (suser(curp))
32036168Stegge		return 1;
32146116Sphk
32246116Sphk	/* XXX: Why isn't this done with file-perms ??? */
32336168Stegge	for (i = 0; i < cred->cr_ngroups; i++)
32446116Sphk		if (cred->cr_groups[i] == KMEM_GROUP)
32536168Stegge			return 1;
32636168Stegge
32736168Stegge	return 0;
32836168Stegge}
329