procfs_mem.c revision 28086
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1993 Jan-Simon Pendry
31541Srgrimes * Copyright (c) 1993 Sean Eric Fagan
41541Srgrimes * Copyright (c) 1993
51541Srgrimes *	The Regents of the University of California.  All rights reserved.
61541Srgrimes *
71541Srgrimes * This code is derived from software contributed to Berkeley by
81541Srgrimes * Jan-Simon Pendry and Sean Eric Fagan.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 3. All advertising materials mentioning features or use of this software
191541Srgrimes *    must display the following acknowledgement:
201541Srgrimes *	This product includes software developed by the University of
211541Srgrimes *	California, Berkeley and its contributors.
221541Srgrimes * 4. Neither the name of the University nor the names of its contributors
231541Srgrimes *    may be used to endorse or promote products derived from this software
241541Srgrimes *    without specific prior written permission.
251541Srgrimes *
261541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
271541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
281541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
291541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
301541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
311541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
321541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
331541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
341541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
351541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
361541Srgrimes * SUCH DAMAGE.
371541Srgrimes *
3822521Sdyson *	@(#)procfs_mem.c	8.5 (Berkeley) 6/15/94
391541Srgrimes *
4028086Ssef *	$Id: procfs_mem.c,v 1.26 1997/08/02 14:32:14 bde Exp $
411541Srgrimes */
421541Srgrimes
431541Srgrimes/*
441541Srgrimes * This is a lightly hacked and merged version
451541Srgrimes * of sef's pread/pwrite functions
461541Srgrimes */
471541Srgrimes
481541Srgrimes#include <sys/param.h>
491541Srgrimes#include <sys/systm.h>
501541Srgrimes#include <sys/proc.h>
511541Srgrimes#include <sys/vnode.h>
521541Srgrimes#include <miscfs/procfs/procfs.h>
531541Srgrimes#include <vm/vm.h>
5412662Sdg#include <vm/vm_param.h>
5512662Sdg#include <vm/vm_prot.h>
5622521Sdyson#include <sys/lock.h>
5712662Sdg#include <vm/pmap.h>
5812662Sdg#include <vm/vm_map.h>
591541Srgrimes#include <vm/vm_kern.h>
6012662Sdg#include <vm/vm_object.h>
611541Srgrimes#include <vm/vm_page.h>
6212662Sdg#include <vm/vm_extern.h>
6313608Speter#include <sys/user.h>
641541Srgrimes
6512595Sbdestatic int	procfs_rwmem __P((struct proc *p, struct uio *uio));
6612595Sbde
671541Srgrimesstatic int
681541Srgrimesprocfs_rwmem(p, uio)
691541Srgrimes	struct proc *p;
701541Srgrimes	struct uio *uio;
711541Srgrimes{
721541Srgrimes	int error;
731541Srgrimes	int writing;
7416308Sdyson	struct vmspace *vm;
7516889Sdyson	vm_map_t map;
7616889Sdyson	vm_object_t object = NULL;
7716889Sdyson	vm_offset_t pageno = 0;		/* page number */
7824666Sdyson	vm_prot_t reqprot;
7924666Sdyson	vm_offset_t kva;
801541Srgrimes
8116308Sdyson	/*
8216308Sdyson	 * if the vmspace is in the midst of being deallocated or the
8316308Sdyson	 * process is exiting, don't try to grab anything.  The page table
8416308Sdyson	 * usage in that process can be messed up.
8516308Sdyson	 */
8616308Sdyson	vm = p->p_vmspace;
8716308Sdyson	if ((p->p_flag & P_WEXIT) || (vm->vm_refcnt < 1))
8816308Sdyson		return EFAULT;
8916308Sdyson	++vm->vm_refcnt;
9016889Sdyson	/*
9116889Sdyson	 * The map we want...
9216889Sdyson	 */
9316889Sdyson	map = &vm->vm_map;
9416308Sdyson
951541Srgrimes	writing = uio->uio_rw == UIO_WRITE;
9624666Sdyson	reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) : VM_PROT_READ;
971541Srgrimes
9824666Sdyson	kva = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
9924666Sdyson
1001541Srgrimes	/*
1011541Srgrimes	 * Only map in one page at a time.  We don't have to, but it
1021541Srgrimes	 * makes things easier.  This way is trivial - right?
1031541Srgrimes	 */
1041541Srgrimes	do {
10516889Sdyson		vm_map_t tmap;
1061541Srgrimes		vm_offset_t uva;
1071541Srgrimes		int page_offset;		/* offset into page */
1081541Srgrimes		vm_map_entry_t out_entry;
1091541Srgrimes		vm_prot_t out_prot;
1101541Srgrimes		boolean_t wired, single_use;
11112904Sbde		vm_pindex_t pindex;
1121541Srgrimes		u_int len;
11324666Sdyson		vm_page_t m;
1141541Srgrimes
11516889Sdyson		object = NULL;
11616889Sdyson
1171541Srgrimes		uva = (vm_offset_t) uio->uio_offset;
1181541Srgrimes
1191541Srgrimes		/*
1201541Srgrimes		 * Get the page number of this segment.
1211541Srgrimes		 */
1221541Srgrimes		pageno = trunc_page(uva);
1231541Srgrimes		page_offset = uva - pageno;
1241541Srgrimes
1251541Srgrimes		/*
1261541Srgrimes		 * How many bytes to copy
1271541Srgrimes		 */
1281541Srgrimes		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
1291541Srgrimes
13013627Speter		if (uva >= VM_MAXUSER_ADDRESS) {
13124666Sdyson			vm_offset_t tkva;
13224666Sdyson
13313627Speter			if (writing || (uva >= (VM_MAXUSER_ADDRESS + UPAGES * PAGE_SIZE))) {
13413627Speter				error = 0;
13513627Speter				break;
13613627Speter			}
13713627Speter
13813627Speter			/* we are reading the "U area", force it into core */
13913627Speter			PHOLD(p);
14013627Speter
14113627Speter			/* sanity check */
14213627Speter			if (!(p->p_flag & P_INMEM)) {
14313627Speter				/* aiee! */
14416889Sdyson				PRELE(p);
14513627Speter				error = EFAULT;
14613627Speter				break;
14713627Speter			}
14813627Speter
14913627Speter			/* populate the ptrace/procfs area */
15013627Speter			p->p_addr->u_kproc.kp_proc = *p;
15113627Speter			fill_eproc (p, &p->p_addr->u_kproc.kp_eproc);
15213627Speter
15313627Speter			/* locate the in-core address */
15424666Sdyson			tkva = (u_int)p->p_addr + uva - VM_MAXUSER_ADDRESS;
15513627Speter
15613627Speter			/* transfer it */
15724666Sdyson			error = uiomove((caddr_t)tkva, len, uio);
15813627Speter
15913627Speter			/* let the pages go */
16013627Speter			PRELE(p);
16113627Speter
16213627Speter			continue;
16313627Speter		}
16413627Speter
1651541Srgrimes		/*
16624666Sdyson		 * Fault the page on behalf of the process
1671541Srgrimes		 */
16824666Sdyson		error = vm_fault(map, pageno, reqprot, FALSE);
16924666Sdyson		if (error) {
17024666Sdyson			error = EFAULT;
17124666Sdyson			break;
1721541Srgrimes		}
1731541Srgrimes
1741541Srgrimes		/*
1751541Srgrimes		 * Now we need to get the page.  out_entry, out_prot, wired,
1761541Srgrimes		 * and single_use aren't used.  One would think the vm code
1771541Srgrimes		 * would be a *bit* nicer...  We use tmap because
1781541Srgrimes		 * vm_map_lookup() can change the map argument.
1791541Srgrimes		 */
1801541Srgrimes		tmap = map;
18124666Sdyson		error = vm_map_lookup(&tmap, pageno, reqprot,
18216889Sdyson			      &out_entry, &object, &pindex, &out_prot,
18316889Sdyson			      &wired, &single_use);
18416889Sdyson
18516889Sdyson		if (error) {
18624666Sdyson			error = EFAULT;
18724666Sdyson
18816889Sdyson			/*
18916889Sdyson			 * Make sure that there is no residue in 'object' from
19016889Sdyson			 * an error return on vm_map_lookup.
19116889Sdyson			 */
19216889Sdyson			object = NULL;
19324666Sdyson
19416889Sdyson			break;
19516889Sdyson		}
19616889Sdyson
19724666Sdyson		m = vm_page_lookup(object, pindex);
19825055Sdyson
19925055Sdyson		/* Allow fallback to backing objects if we are reading */
20025055Sdyson
20125055Sdyson		while (m == NULL && !writing && object->backing_object) {
20225055Sdyson
20325055Sdyson		  pindex += OFF_TO_IDX(object->backing_object_offset);
20425055Sdyson		  object = object->backing_object;
20525055Sdyson
20625055Sdyson		  m = vm_page_lookup(object, pindex);
20725055Sdyson		}
20825055Sdyson
20924666Sdyson		if (m == NULL) {
21024666Sdyson			error = EFAULT;
21124666Sdyson
21224666Sdyson			/*
21324666Sdyson			 * Make sure that there is no residue in 'object' from
21424666Sdyson			 * an error return on vm_map_lookup.
21524666Sdyson			 */
21624666Sdyson			object = NULL;
21724666Sdyson
21825055Sdyson			vm_map_lookup_done(tmap, out_entry);
21925055Sdyson
22024666Sdyson			break;
22124666Sdyson		}
22224666Sdyson
2231541Srgrimes		/*
22424666Sdyson		 * Wire the page into memory
22524666Sdyson		 */
22624666Sdyson		vm_page_wire(m);
22724666Sdyson
22824666Sdyson		/*
2291541Srgrimes		 * We're done with tmap now.
23016889Sdyson		 * But reference the object first, so that we won't loose
23116889Sdyson		 * it.
2321541Srgrimes		 */
23316889Sdyson		vm_object_reference(object);
23416889Sdyson		vm_map_lookup_done(tmap, out_entry);
2358876Srgrimes
23624666Sdyson		pmap_kenter(kva, VM_PAGE_TO_PHYS(m));
23724666Sdyson
2381541Srgrimes		/*
23924666Sdyson		 * Now do the i/o move.
2401541Srgrimes		 */
24124666Sdyson		error = uiomove((caddr_t)(kva + page_offset), len, uio);
2421541Srgrimes
24324666Sdyson		pmap_kremove(kva);
2441541Srgrimes
24516889Sdyson		/*
24624666Sdyson		 * release the page and the object
24716889Sdyson		 */
24824666Sdyson		vm_page_unwire(m);
24924666Sdyson		vm_object_deallocate(object);
2501541Srgrimes
25116889Sdyson		object = NULL;
25216889Sdyson
2531541Srgrimes	} while (error == 0 && uio->uio_resid > 0);
2541541Srgrimes
25516889Sdyson	if (object)
25616889Sdyson		vm_object_deallocate(object);
25716889Sdyson
25824666Sdyson	kmem_free(kernel_map, kva, PAGE_SIZE);
25916308Sdyson	vmspace_free(vm);
2601541Srgrimes	return (error);
2611541Srgrimes}
2621541Srgrimes
2631541Srgrimes/*
2641541Srgrimes * Copy data in and out of the target process.
2651541Srgrimes * We do this by mapping the process's page into
2661541Srgrimes * the kernel and then doing a uiomove direct
2671541Srgrimes * from the kernel address space.
2681541Srgrimes */
2691541Srgrimesint
2701541Srgrimesprocfs_domem(curp, p, pfs, uio)
2711541Srgrimes	struct proc *curp;
2721541Srgrimes	struct proc *p;
2731541Srgrimes	struct pfsnode *pfs;
2741541Srgrimes	struct uio *uio;
2751541Srgrimes{
2761541Srgrimes
2771541Srgrimes	if (uio->uio_resid == 0)
2781541Srgrimes		return (0);
2791541Srgrimes
28028086Ssef 	/*
28128086Ssef 	 * XXX
28228086Ssef 	 * We need to check for KMEM_GROUP because ps is sgid kmem;
28328086Ssef 	 * not allowing it here causes ps to not work properly.  Arguably,
28428086Ssef 	 * this is a bug with what ps does.  We only need to do this
28528086Ssef 	 * for Pmem nodes, and only if it's reading.  This is still not
28628086Ssef 	 * good, as it may still be possible to grab illicit data if
28728086Ssef 	 * a process somehow gets to be KMEM_GROUP.  Note that this also
28828086Ssef 	 * means that KMEM_GROUP can't change without editing procfs.h!
28928086Ssef 	 * All in all, quite yucky.
29028086Ssef 	 */
29128086Ssef
29228086Ssef 	if (!CHECKIO(curp, p) &&
29328086Ssef	    !(curp->p_cred->pc_ucred->cr_gid == KMEM_GROUP &&
29428086Ssef	      uio->uio_rw == UIO_READ))
29528086Ssef 		return EPERM;
29628086Ssef
29722521Sdyson	return (procfs_rwmem(p, uio));
2981541Srgrimes}
2991541Srgrimes
3001541Srgrimes/*
3011541Srgrimes * Given process (p), find the vnode from which
3021541Srgrimes * it's text segment is being executed.
3031541Srgrimes *
3041541Srgrimes * It would be nice to grab this information from
3051541Srgrimes * the VM system, however, there is no sure-fire
3061541Srgrimes * way of doing that.  Instead, fork(), exec() and
3071541Srgrimes * wait() all maintain the p_textvp field in the
3081541Srgrimes * process proc structure which contains a held
3091541Srgrimes * reference to the exec'ed vnode.
3101541Srgrimes */
3111541Srgrimesstruct vnode *
3121541Srgrimesprocfs_findtextvp(p)
3131541Srgrimes	struct proc *p;
3141541Srgrimes{
31522521Sdyson
3161541Srgrimes	return (p->p_textvp);
3171541Srgrimes}
318