procfs_mem.c revision 53045
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1993 Jan-Simon Pendry 31541Srgrimes * Copyright (c) 1993 Sean Eric Fagan 41541Srgrimes * Copyright (c) 1993 51541Srgrimes * The Regents of the University of California. All rights reserved. 61541Srgrimes * 71541Srgrimes * This code is derived from software contributed to Berkeley by 81541Srgrimes * Jan-Simon Pendry and Sean Eric Fagan. 91541Srgrimes * 101541Srgrimes * Redistribution and use in source and binary forms, with or without 111541Srgrimes * modification, are permitted provided that the following conditions 121541Srgrimes * are met: 131541Srgrimes * 1. Redistributions of source code must retain the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer. 151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer in the 171541Srgrimes * documentation and/or other materials provided with the distribution. 181541Srgrimes * 3. All advertising materials mentioning features or use of this software 191541Srgrimes * must display the following acknowledgement: 201541Srgrimes * This product includes software developed by the University of 211541Srgrimes * California, Berkeley and its contributors. 221541Srgrimes * 4. Neither the name of the University nor the names of its contributors 231541Srgrimes * may be used to endorse or promote products derived from this software 241541Srgrimes * without specific prior written permission. 251541Srgrimes * 261541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 271541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 281541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 291541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 301541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 311541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 321541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 331541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 341541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 351541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 361541Srgrimes * SUCH DAMAGE. 371541Srgrimes * 3822521Sdyson * @(#)procfs_mem.c 8.5 (Berkeley) 6/15/94 391541Srgrimes * 4050477Speter * $FreeBSD: head/sys/fs/procfs/procfs_mem.c 53045 1999-11-09 01:44:28Z alc $ 411541Srgrimes */ 421541Srgrimes 431541Srgrimes/* 441541Srgrimes * This is a lightly hacked and merged version 451541Srgrimes * of sef's pread/pwrite functions 461541Srgrimes */ 471541Srgrimes 481541Srgrimes#include <sys/param.h> 491541Srgrimes#include <sys/systm.h> 501541Srgrimes#include <sys/proc.h> 511541Srgrimes#include <sys/vnode.h> 521541Srgrimes#include <miscfs/procfs/procfs.h> 531541Srgrimes#include <vm/vm.h> 5412662Sdg#include <vm/vm_param.h> 5522521Sdyson#include <sys/lock.h> 5612662Sdg#include <vm/pmap.h> 5712662Sdg#include <vm/vm_map.h> 581541Srgrimes#include <vm/vm_kern.h> 5912662Sdg#include <vm/vm_object.h> 601541Srgrimes#include <vm/vm_page.h> 6112662Sdg#include <vm/vm_extern.h> 6213608Speter#include <sys/user.h> 6336168Stegge#include <sys/ptrace.h> 641541Srgrimes 6536168Steggestatic int procfs_rwmem __P((struct proc *curp, 6636168Stegge struct proc *p, struct uio *uio)); 6712595Sbde 681541Srgrimesstatic int 6936168Steggeprocfs_rwmem(curp, p, uio) 7036168Stegge struct proc *curp; 711541Srgrimes struct proc *p; 721541Srgrimes struct uio *uio; 731541Srgrimes{ 741541Srgrimes int error; 751541Srgrimes int writing; 7616308Sdyson struct vmspace *vm; 7716889Sdyson vm_map_t map; 7816889Sdyson vm_object_t object = NULL; 7916889Sdyson vm_offset_t pageno = 0; /* page number */ 8024666Sdyson vm_prot_t reqprot; 8124666Sdyson vm_offset_t kva; 821541Srgrimes 8316308Sdyson /* 8416308Sdyson * if the vmspace is in the midst of being deallocated or the 8516308Sdyson * process is exiting, don't try to grab anything. The page table 8616308Sdyson * usage in that process can be messed up. 8716308Sdyson */ 8816308Sdyson vm = p->p_vmspace; 8916308Sdyson if ((p->p_flag & P_WEXIT) || (vm->vm_refcnt < 1)) 9016308Sdyson return EFAULT; 9116308Sdyson ++vm->vm_refcnt; 9216889Sdyson /* 9316889Sdyson * The map we want... 9416889Sdyson */ 9516889Sdyson map = &vm->vm_map; 9616308Sdyson 971541Srgrimes writing = uio->uio_rw == UIO_WRITE; 9824666Sdyson reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) : VM_PROT_READ; 991541Srgrimes 10024666Sdyson kva = kmem_alloc_pageable(kernel_map, PAGE_SIZE); 10124666Sdyson 1021541Srgrimes /* 1031541Srgrimes * Only map in one page at a time. We don't have to, but it 1041541Srgrimes * makes things easier. This way is trivial - right? 1051541Srgrimes */ 1061541Srgrimes do { 10716889Sdyson vm_map_t tmap; 1081541Srgrimes vm_offset_t uva; 1091541Srgrimes int page_offset; /* offset into page */ 1101541Srgrimes vm_map_entry_t out_entry; 1111541Srgrimes vm_prot_t out_prot; 11232702Sdyson boolean_t wired; 11312904Sbde vm_pindex_t pindex; 1141541Srgrimes u_int len; 11524666Sdyson vm_page_t m; 1161541Srgrimes 11716889Sdyson object = NULL; 11816889Sdyson 1191541Srgrimes uva = (vm_offset_t) uio->uio_offset; 1201541Srgrimes 1211541Srgrimes /* 1221541Srgrimes * Get the page number of this segment. 1231541Srgrimes */ 1241541Srgrimes pageno = trunc_page(uva); 1251541Srgrimes page_offset = uva - pageno; 1261541Srgrimes 1271541Srgrimes /* 1281541Srgrimes * How many bytes to copy 1291541Srgrimes */ 1301541Srgrimes len = min(PAGE_SIZE - page_offset, uio->uio_resid); 1311541Srgrimes 13213627Speter if (uva >= VM_MAXUSER_ADDRESS) { 13324666Sdyson vm_offset_t tkva; 13424666Sdyson 13536168Stegge if (writing || 13636168Stegge uva >= VM_MAXUSER_ADDRESS + UPAGES * PAGE_SIZE || 13736168Stegge (ptrace_read_u_check(p, 13836168Stegge uva - (vm_offset_t) VM_MAXUSER_ADDRESS, 13936168Stegge (size_t) len) && 14036168Stegge !procfs_kmemaccess(curp))) { 14113627Speter error = 0; 14213627Speter break; 14313627Speter } 14413627Speter 14513627Speter /* we are reading the "U area", force it into core */ 14613627Speter PHOLD(p); 14713627Speter 14813627Speter /* sanity check */ 14913627Speter if (!(p->p_flag & P_INMEM)) { 15013627Speter /* aiee! */ 15116889Sdyson PRELE(p); 15213627Speter error = EFAULT; 15313627Speter break; 15413627Speter } 15513627Speter 15613627Speter /* populate the ptrace/procfs area */ 15713627Speter p->p_addr->u_kproc.kp_proc = *p; 15813627Speter fill_eproc (p, &p->p_addr->u_kproc.kp_eproc); 15913627Speter 16013627Speter /* locate the in-core address */ 16137649Sbde tkva = (uintptr_t)p->p_addr + uva - VM_MAXUSER_ADDRESS; 16213627Speter 16313627Speter /* transfer it */ 16424666Sdyson error = uiomove((caddr_t)tkva, len, uio); 16513627Speter 16613627Speter /* let the pages go */ 16713627Speter PRELE(p); 16813627Speter 16913627Speter continue; 17013627Speter } 17113627Speter 1721541Srgrimes /* 17324666Sdyson * Fault the page on behalf of the process 1741541Srgrimes */ 17553045Salc error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL); 17624666Sdyson if (error) { 17724666Sdyson error = EFAULT; 17824666Sdyson break; 1791541Srgrimes } 1801541Srgrimes 1811541Srgrimes /* 1821541Srgrimes * Now we need to get the page. out_entry, out_prot, wired, 1831541Srgrimes * and single_use aren't used. One would think the vm code 1841541Srgrimes * would be a *bit* nicer... We use tmap because 1851541Srgrimes * vm_map_lookup() can change the map argument. 1861541Srgrimes */ 1871541Srgrimes tmap = map; 18824666Sdyson error = vm_map_lookup(&tmap, pageno, reqprot, 18916889Sdyson &out_entry, &object, &pindex, &out_prot, 19032702Sdyson &wired); 19116889Sdyson 19216889Sdyson if (error) { 19324666Sdyson error = EFAULT; 19424666Sdyson 19516889Sdyson /* 19616889Sdyson * Make sure that there is no residue in 'object' from 19716889Sdyson * an error return on vm_map_lookup. 19816889Sdyson */ 19916889Sdyson object = NULL; 20024666Sdyson 20116889Sdyson break; 20216889Sdyson } 20316889Sdyson 20424666Sdyson m = vm_page_lookup(object, pindex); 20525055Sdyson 20625055Sdyson /* Allow fallback to backing objects if we are reading */ 20725055Sdyson 20825055Sdyson while (m == NULL && !writing && object->backing_object) { 20925055Sdyson 21025055Sdyson pindex += OFF_TO_IDX(object->backing_object_offset); 21125055Sdyson object = object->backing_object; 21225055Sdyson 21325055Sdyson m = vm_page_lookup(object, pindex); 21425055Sdyson } 21525055Sdyson 21624666Sdyson if (m == NULL) { 21724666Sdyson error = EFAULT; 21824666Sdyson 21924666Sdyson /* 22024666Sdyson * Make sure that there is no residue in 'object' from 22124666Sdyson * an error return on vm_map_lookup. 22224666Sdyson */ 22324666Sdyson object = NULL; 22424666Sdyson 22525055Sdyson vm_map_lookup_done(tmap, out_entry); 22625055Sdyson 22724666Sdyson break; 22824666Sdyson } 22924666Sdyson 2301541Srgrimes /* 23124666Sdyson * Wire the page into memory 23224666Sdyson */ 23324666Sdyson vm_page_wire(m); 23424666Sdyson 23524666Sdyson /* 2361541Srgrimes * We're done with tmap now. 23716889Sdyson * But reference the object first, so that we won't loose 23816889Sdyson * it. 2391541Srgrimes */ 24016889Sdyson vm_object_reference(object); 24116889Sdyson vm_map_lookup_done(tmap, out_entry); 2428876Srgrimes 24324666Sdyson pmap_kenter(kva, VM_PAGE_TO_PHYS(m)); 24424666Sdyson 2451541Srgrimes /* 24624666Sdyson * Now do the i/o move. 2471541Srgrimes */ 24824666Sdyson error = uiomove((caddr_t)(kva + page_offset), len, uio); 2491541Srgrimes 25024666Sdyson pmap_kremove(kva); 2511541Srgrimes 25216889Sdyson /* 25324666Sdyson * release the page and the object 25416889Sdyson */ 25540700Sdg vm_page_unwire(m, 1); 25624666Sdyson vm_object_deallocate(object); 2571541Srgrimes 25816889Sdyson object = NULL; 25916889Sdyson 2601541Srgrimes } while (error == 0 && uio->uio_resid > 0); 2611541Srgrimes 26216889Sdyson if (object) 26316889Sdyson vm_object_deallocate(object); 26416889Sdyson 26524666Sdyson kmem_free(kernel_map, kva, PAGE_SIZE); 26616308Sdyson vmspace_free(vm); 2671541Srgrimes return (error); 2681541Srgrimes} 2691541Srgrimes 2701541Srgrimes/* 2711541Srgrimes * Copy data in and out of the target process. 2721541Srgrimes * We do this by mapping the process's page into 2731541Srgrimes * the kernel and then doing a uiomove direct 2741541Srgrimes * from the kernel address space. 2751541Srgrimes */ 2761541Srgrimesint 2771541Srgrimesprocfs_domem(curp, p, pfs, uio) 2781541Srgrimes struct proc *curp; 2791541Srgrimes struct proc *p; 2801541Srgrimes struct pfsnode *pfs; 2811541Srgrimes struct uio *uio; 2821541Srgrimes{ 2831541Srgrimes 2841541Srgrimes if (uio->uio_resid == 0) 2851541Srgrimes return (0); 2861541Srgrimes 28728086Ssef /* 28828086Ssef * XXX 28928086Ssef * We need to check for KMEM_GROUP because ps is sgid kmem; 29028086Ssef * not allowing it here causes ps to not work properly. Arguably, 29128086Ssef * this is a bug with what ps does. We only need to do this 29228086Ssef * for Pmem nodes, and only if it's reading. This is still not 29328086Ssef * good, as it may still be possible to grab illicit data if 29428086Ssef * a process somehow gets to be KMEM_GROUP. Note that this also 29528086Ssef * means that KMEM_GROUP can't change without editing procfs.h! 29628086Ssef * All in all, quite yucky. 29728086Ssef */ 29828086Ssef 29928086Ssef if (!CHECKIO(curp, p) && 30036168Stegge !(uio->uio_rw == UIO_READ && 30136168Stegge procfs_kmemaccess(curp))) 30228086Ssef return EPERM; 30328086Ssef 30436168Stegge return (procfs_rwmem(curp, p, uio)); 3051541Srgrimes} 3061541Srgrimes 3071541Srgrimes/* 3081541Srgrimes * Given process (p), find the vnode from which 30935256Sdes * its text segment is being executed. 3101541Srgrimes * 3111541Srgrimes * It would be nice to grab this information from 3121541Srgrimes * the VM system, however, there is no sure-fire 3131541Srgrimes * way of doing that. Instead, fork(), exec() and 3141541Srgrimes * wait() all maintain the p_textvp field in the 3151541Srgrimes * process proc structure which contains a held 3161541Srgrimes * reference to the exec'ed vnode. 31752990Ssef * 31852990Ssef * XXX - Currently, this is not not used, as the 31952990Ssef * /proc/pid/file object exposes an information leak 32052990Ssef * that shouldn't happen. Using a mount option would 32152990Ssef * make it configurable on a per-system (or, at least, 32252990Ssef * per-mount) basis; however, that's not really best. 32352990Ssef * The best way to do it, I think, would be as an 32452990Ssef * ioctl; this would restrict it to the uid running 32552990Ssef * program, or root, which seems a reasonable compromise. 32652990Ssef * However, the number of applications for this is 32752990Ssef * minimal, if it can't be seen in the filesytem space, 32852990Ssef * and doint it as an ioctl makes it somewhat less 32952990Ssef * useful due to the, well, inelegance. 33052990Ssef * 3311541Srgrimes */ 3321541Srgrimesstruct vnode * 3331541Srgrimesprocfs_findtextvp(p) 3341541Srgrimes struct proc *p; 3351541Srgrimes{ 33622521Sdyson 3371541Srgrimes return (p->p_textvp); 3381541Srgrimes} 33936168Stegge 34036168Steggeint procfs_kmemaccess(curp) 34136168Stegge struct proc *curp; 34236168Stegge{ 34336168Stegge int i; 34436168Stegge struct ucred *cred; 34536168Stegge 34636168Stegge cred = curp->p_cred->pc_ucred; 34746116Sphk if (suser(curp)) 34836168Stegge return 1; 34946116Sphk 35046116Sphk /* XXX: Why isn't this done with file-perms ??? */ 35136168Stegge for (i = 0; i < cred->cr_ngroups; i++) 35246116Sphk if (cred->cr_groups[i] == KMEM_GROUP) 35336168Stegge return 1; 35436168Stegge 35536168Stegge return 0; 35636168Stegge} 357