subr_uio.c revision 111937
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1982, 1986, 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * (c) UNIX System Laboratories, Inc. 51541Srgrimes * All or some portions of this file are derived from material licensed 61541Srgrimes * to the University of California by American Telephone and Telegraph 71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 81541Srgrimes * the permission of UNIX System Laboratories, Inc. 91541Srgrimes * 101541Srgrimes * Redistribution and use in source and binary forms, with or without 111541Srgrimes * modification, are permitted provided that the following conditions 121541Srgrimes * are met: 131541Srgrimes * 1. Redistributions of source code must retain the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer. 151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer in the 171541Srgrimes * documentation and/or other materials provided with the distribution. 181541Srgrimes * 3. All advertising materials mentioning features or use of this software 191541Srgrimes * must display the following acknowledgement: 201541Srgrimes * This product includes software developed by the University of 211541Srgrimes * California, Berkeley and its contributors. 221541Srgrimes * 4. Neither the name of the University nor the names of its contributors 231541Srgrimes * may be used to endorse or promote products derived from this software 241541Srgrimes * without specific prior written permission. 251541Srgrimes * 261541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 271541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 281541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 291541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 301541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 311541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 321541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 331541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 341541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 351541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 361541Srgrimes * SUCH DAMAGE. 371541Srgrimes * 381541Srgrimes * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 3950477Speter * $FreeBSD: head/sys/kern/kern_subr.c 111937 2003-03-06 03:41:02Z alc $ 401541Srgrimes */ 411541Srgrimes 4298849Sken#include "opt_zero.h" 4398849Sken 441541Srgrimes#include <sys/param.h> 451541Srgrimes#include <sys/systm.h> 4644218Sbde#include <sys/kernel.h> 4765557Sjasone#include <sys/ktr.h> 4876166Smarkm#include <sys/lock.h> 4976166Smarkm#include <sys/mutex.h> 501541Srgrimes#include <sys/proc.h> 511541Srgrimes#include <sys/malloc.h> 5243529Sbde#include <sys/resourcevar.h> 53104964Sjeff#include <sys/sched.h> 5478431Swollman#include <sys/sysctl.h> 5532702Sdyson#include <sys/vnode.h> 561541Srgrimes 5731853Sdyson#include <vm/vm.h> 5831853Sdyson#include <vm/vm_page.h> 5931853Sdyson#include <vm/vm_map.h> 6099848Sken#ifdef ZERO_COPY_SOCKETS 6199848Sken#include <vm/vm_param.h> 6299848Sken#include <vm/vm_object.h> 6399848Sken#endif 6431853Sdyson 65111737SdesSYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, 6678431Swollman "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); 6778431Swollman 6898849Sken#ifdef ZERO_COPY_SOCKETS 6998849Sken/* Declared in uipc_socket.c */ 7098849Skenextern int so_zero_copy_receive; 7198849Sken 7298849Skenstatic int 73111737Sdesvm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, 74111737Sdes vm_offset_t uaddr) 7598849Sken{ 7698849Sken vm_map_t map = mapa; 7798849Sken vm_page_t kern_pg, user_pg; 7898849Sken vm_object_t uobject; 7998849Sken vm_map_entry_t entry; 8098849Sken vm_pindex_t upindex, kpindex; 8198849Sken vm_prot_t prot; 8298849Sken boolean_t wired; 8398849Sken 8498849Sken /* 8598849Sken * First lookup the kernel page. 8698849Sken */ 8798849Sken kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); 8898849Sken 8998849Sken if ((vm_map_lookup(&map, uaddr, 9098849Sken VM_PROT_READ, &entry, &uobject, 9198849Sken &upindex, &prot, &wired)) != KERN_SUCCESS) { 9298849Sken return(EFAULT); 9398849Sken } 9498849Sken if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { 95107371Salc do 96107371Salc vm_page_lock_queues(); 97107371Salc while (vm_page_sleep_if_busy(user_pg, 1, "vm_pgmoveco")); 9898849Sken vm_page_busy(user_pg); 99107371Salc pmap_remove_all(user_pg); 10098849Sken vm_page_free(user_pg); 101108139Salc } else 102108139Salc vm_page_lock_queues(); 10398849Sken if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || 10498849Sken (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { 10598849Sken printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " 10698849Sken "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, 10798849Sken kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, 10898849Sken kern_pg->hold_count, (u_long)kern_pg->phys_addr); 10998849Sken if ((kern_pg->queue - kern_pg->pc) == PQ_FREE) 11098849Sken panic("vm_pgmoveco: renaming free page"); 11198849Sken else 11298849Sken panic("vm_pgmoveco: renaming busy page"); 11398849Sken } 11498849Sken kpindex = kern_pg->pindex; 11598849Sken vm_page_busy(kern_pg); 11698849Sken vm_page_rename(kern_pg, uobject, upindex); 11798849Sken vm_page_flag_clear(kern_pg, PG_BUSY); 11898849Sken kern_pg->valid = VM_PAGE_BITS_ALL; 119108139Salc vm_page_unlock_queues(); 120111737Sdes 12198849Sken vm_map_lookup_done(map, entry); 12298849Sken return(KERN_SUCCESS); 12398849Sken} 12498849Sken#endif /* ZERO_COPY_SOCKETS */ 12598849Sken 1261549Srgrimesint 127111739Sdesuiomove(void *cp, int n, struct uio *uio) 1281541Srgrimes{ 12983366Sjulian struct thread *td = curthread; 130111737Sdes struct iovec *iov; 1311541Srgrimes u_int cnt; 13244681Sjulian int error = 0; 13344681Sjulian int save = 0; 1341541Srgrimes 13542408Seivind KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 13642453Seivind ("uiomove: mode")); 13783366Sjulian KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 13842453Seivind ("uiomove proc")); 13942408Seivind 14083366Sjulian if (td) { 14183420Sjhb mtx_lock_spin(&sched_lock); 14283366Sjulian save = td->td_flags & TDF_DEADLKTREAT; 14383366Sjulian td->td_flags |= TDF_DEADLKTREAT; 14483420Sjhb mtx_unlock_spin(&sched_lock); 14544681Sjulian } 14644681Sjulian 1471541Srgrimes while (n > 0 && uio->uio_resid) { 1481541Srgrimes iov = uio->uio_iov; 1491541Srgrimes cnt = iov->iov_len; 1501541Srgrimes if (cnt == 0) { 1511541Srgrimes uio->uio_iov++; 1521541Srgrimes uio->uio_iovcnt--; 1531541Srgrimes continue; 1541541Srgrimes } 1551541Srgrimes if (cnt > n) 1561541Srgrimes cnt = n; 1576324Sdg 1581541Srgrimes switch (uio->uio_segflg) { 1591541Srgrimes 1601541Srgrimes case UIO_USERSPACE: 16170861Sjake if (ticks - PCPU_GET(switchticks) >= hogticks) 16243529Sbde uio_yield(); 1631541Srgrimes if (uio->uio_rw == UIO_READ) 1641541Srgrimes error = copyout(cp, iov->iov_base, cnt); 1651541Srgrimes else 1661541Srgrimes error = copyin(iov->iov_base, cp, cnt); 1671541Srgrimes if (error) 16890413Stmm goto out; 1691541Srgrimes break; 1701541Srgrimes 1711541Srgrimes case UIO_SYSSPACE: 1721541Srgrimes if (uio->uio_rw == UIO_READ) 17398998Salfred bcopy(cp, iov->iov_base, cnt); 1741541Srgrimes else 17598998Salfred bcopy(iov->iov_base, cp, cnt); 1761541Srgrimes break; 1777611Sdg case UIO_NOCOPY: 1787611Sdg break; 1791541Srgrimes } 180104908Smike iov->iov_base = (char *)iov->iov_base + cnt; 1811541Srgrimes iov->iov_len -= cnt; 1821541Srgrimes uio->uio_resid -= cnt; 1831541Srgrimes uio->uio_offset += cnt; 184111739Sdes cp = (char *)cp + cnt; 1851541Srgrimes n -= cnt; 1861541Srgrimes } 18790413Stmmout: 18883366Sjulian if (td != curthread) printf("uiomove: IT CHANGED!"); 18983366Sjulian td = curthread; /* Might things have changed in copyin/copyout? */ 19083420Sjhb if (td) { 19183420Sjhb mtx_lock_spin(&sched_lock); 19283366Sjulian td->td_flags = (td->td_flags & ~TDF_DEADLKTREAT) | save; 19383420Sjhb mtx_unlock_spin(&sched_lock); 19483420Sjhb } 19544681Sjulian return (error); 1961541Srgrimes} 1971541Srgrimes 198111937Salc#ifdef ZERO_COPY_SOCKETS 19996080Salc/* 20096080Salc * Experimental support for zero-copy I/O 20196080Salc */ 20298849Skenstatic int 203111739Sdesuserspaceco(void *cp, u_int cnt, struct uio *uio, struct vm_object *obj, 204111737Sdes int disposable) 20598849Sken{ 20698849Sken struct iovec *iov; 20798849Sken int error; 20898849Sken 20998849Sken iov = uio->uio_iov; 21098849Sken if (uio->uio_rw == UIO_READ) { 21198849Sken if ((so_zero_copy_receive != 0) 21298849Sken && (obj != NULL) 21398849Sken && ((cnt & PAGE_MASK) == 0) 21498849Sken && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) 21598849Sken && ((uio->uio_offset & PAGE_MASK) == 0) 21698849Sken && ((((intptr_t) cp) & PAGE_MASK) == 0) 21798849Sken && (obj->type == OBJT_DEFAULT) 21898849Sken && (disposable != 0)) { 21998849Sken /* SOCKET: use page-trading */ 22098849Sken /* 22198849Sken * We only want to call vm_pgmoveco() on 22298849Sken * disposeable pages, since it gives the 22398849Sken * kernel page to the userland process. 22498849Sken */ 22598849Sken error = vm_pgmoveco(&curproc->p_vmspace->vm_map, 226111737Sdes obj, (vm_offset_t)cp, 22798849Sken (vm_offset_t)iov->iov_base); 22898849Sken 22998849Sken /* 23098849Sken * If we get an error back, attempt 23198849Sken * to use copyout() instead. The 23298849Sken * disposable page should be freed 23398849Sken * automatically if we weren't able to move 23498849Sken * it into userland. 23598849Sken */ 23698849Sken if (error != 0) 23798849Sken error = copyout(cp, iov->iov_base, cnt); 23898849Sken } else { 23998849Sken error = copyout(cp, iov->iov_base, cnt); 24098849Sken } 24198849Sken } else { 24298849Sken error = copyin(iov->iov_base, cp, cnt); 24398849Sken } 24498849Sken return (error); 24598849Sken} 24698849Sken 24731853Sdysonint 248111739Sdesuiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, 249111737Sdes int disposable) 25031853Sdyson{ 25131853Sdyson struct iovec *iov; 25231853Sdyson u_int cnt; 25331853Sdyson int error; 25431853Sdyson 25542408Seivind KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 25642453Seivind ("uiomoveco: mode")); 25783366Sjulian KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 25842453Seivind ("uiomoveco proc")); 25942408Seivind 26031853Sdyson while (n > 0 && uio->uio_resid) { 26131853Sdyson iov = uio->uio_iov; 26231853Sdyson cnt = iov->iov_len; 26331853Sdyson if (cnt == 0) { 26431853Sdyson uio->uio_iov++; 26531853Sdyson uio->uio_iovcnt--; 26631853Sdyson continue; 26731853Sdyson } 26831853Sdyson if (cnt > n) 26931853Sdyson cnt = n; 27031853Sdyson 27131853Sdyson switch (uio->uio_segflg) { 27231853Sdyson 27331853Sdyson case UIO_USERSPACE: 27470861Sjake if (ticks - PCPU_GET(switchticks) >= hogticks) 27543529Sbde uio_yield(); 27698849Sken 27798849Sken error = userspaceco(cp, cnt, uio, obj, disposable); 27898849Sken 27931853Sdyson if (error) 28031853Sdyson return (error); 28131853Sdyson break; 28231853Sdyson 28331853Sdyson case UIO_SYSSPACE: 28431853Sdyson if (uio->uio_rw == UIO_READ) 28598998Salfred bcopy(cp, iov->iov_base, cnt); 28631853Sdyson else 28798998Salfred bcopy(iov->iov_base, cp, cnt); 28831853Sdyson break; 28931853Sdyson case UIO_NOCOPY: 29031853Sdyson break; 29131853Sdyson } 292104908Smike iov->iov_base = (char *)iov->iov_base + cnt; 29331853Sdyson iov->iov_len -= cnt; 29431853Sdyson uio->uio_resid -= cnt; 29531853Sdyson uio->uio_offset += cnt; 296111739Sdes cp = (char *)cp + cnt; 29731853Sdyson n -= cnt; 29831853Sdyson } 29931853Sdyson return (0); 30031853Sdyson} 301111937Salc#endif /* ZERO_COPY_SOCKETS */ 30231853Sdyson 30396080Salc/* 3041541Srgrimes * Give next character to user as result of read. 3051541Srgrimes */ 3061549Srgrimesint 307111737Sdesureadc(int c, struct uio *uio) 3081541Srgrimes{ 309111737Sdes struct iovec *iov; 310111737Sdes char *iov_base; 3111541Srgrimes 3121541Srgrimesagain: 3131541Srgrimes if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) 3141541Srgrimes panic("ureadc"); 3151541Srgrimes iov = uio->uio_iov; 3161541Srgrimes if (iov->iov_len == 0) { 3171541Srgrimes uio->uio_iovcnt--; 3181541Srgrimes uio->uio_iov++; 3191541Srgrimes goto again; 3201541Srgrimes } 3211541Srgrimes switch (uio->uio_segflg) { 3221541Srgrimes 3231541Srgrimes case UIO_USERSPACE: 3241541Srgrimes if (subyte(iov->iov_base, c) < 0) 3251541Srgrimes return (EFAULT); 3261541Srgrimes break; 3271541Srgrimes 3281541Srgrimes case UIO_SYSSPACE: 329104908Smike iov_base = iov->iov_base; 330104908Smike *iov_base = c; 331104908Smike iov->iov_base = iov_base; 3321541Srgrimes break; 3331541Srgrimes 3348177Sdg case UIO_NOCOPY: 3358177Sdg break; 3361541Srgrimes } 337104908Smike iov->iov_base = (char *)iov->iov_base + 1; 3381541Srgrimes iov->iov_len--; 3391541Srgrimes uio->uio_resid--; 3401541Srgrimes uio->uio_offset++; 3411541Srgrimes return (0); 3421541Srgrimes} 3431541Srgrimes 3441541Srgrimes/* 3451541Srgrimes * General routine to allocate a hash table. 3461541Srgrimes */ 3471541Srgrimesvoid * 348111737Sdeshashinit(int elements, struct malloc_type *type, u_long *hashmask) 3491541Srgrimes{ 3501541Srgrimes long hashsize; 35160938Sjake LIST_HEAD(generic, generic) *hashtbl; 3521541Srgrimes int i; 3531541Srgrimes 3541541Srgrimes if (elements <= 0) 3558364Sdg panic("hashinit: bad elements"); 3561541Srgrimes for (hashsize = 1; hashsize <= elements; hashsize <<= 1) 3571541Srgrimes continue; 3581541Srgrimes hashsize >>= 1; 359111119Simp hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 3601541Srgrimes for (i = 0; i < hashsize; i++) 3611541Srgrimes LIST_INIT(&hashtbl[i]); 3621541Srgrimes *hashmask = hashsize - 1; 3631541Srgrimes return (hashtbl); 3641541Srgrimes} 3657611Sdg 36699098Siedowsevoid 367111737Sdeshashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) 36899098Siedowse{ 36999098Siedowse LIST_HEAD(generic, generic) *hashtbl, *hp; 37099098Siedowse 37199098Siedowse hashtbl = vhashtbl; 37299098Siedowse for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) 37399098Siedowse if (!LIST_EMPTY(hp)) 37499098Siedowse panic("hashdestroy: hash not empty"); 37599098Siedowse free(hashtbl, type); 37699098Siedowse} 37799098Siedowse 3787683Sdgstatic int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, 3797683Sdg 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 3807611Sdg 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; 38126205Salex#define NPRIMES (sizeof(primes) / sizeof(primes[0])) 3827611Sdg 3837611Sdg/* 3847611Sdg * General routine to allocate a prime number sized hash table. 3857611Sdg */ 3867611Sdgvoid * 387111737Sdesphashinit(int elements, struct malloc_type *type, u_long *nentries) 3887611Sdg{ 3897611Sdg long hashsize; 39060938Sjake LIST_HEAD(generic, generic) *hashtbl; 3917611Sdg int i; 3927611Sdg 3937611Sdg if (elements <= 0) 3948364Sdg panic("phashinit: bad elements"); 3957611Sdg for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 3967611Sdg i++; 3977611Sdg if (i == NPRIMES) 3987611Sdg break; 3997611Sdg hashsize = primes[i]; 4007611Sdg } 4017611Sdg hashsize = primes[i - 1]; 402111119Simp hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 4037611Sdg for (i = 0; i < hashsize; i++) 4047611Sdg LIST_INIT(&hashtbl[i]); 4057611Sdg *nentries = hashsize; 4067611Sdg return (hashtbl); 4077611Sdg} 40843529Sbde 40983959Sdillonvoid 410111737Sdesuio_yield(void) 41143529Sbde{ 41283366Sjulian struct thread *td; 41343529Sbde 41483366Sjulian td = curthread; 41572200Sbmilekic mtx_lock_spin(&sched_lock); 41688900Sjhb DROP_GIANT(); 417104964Sjeff sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ 41883366Sjulian td->td_proc->p_stats->p_ru.ru_nivcsw++; 41943529Sbde mi_switch(); 42072200Sbmilekic mtx_unlock_spin(&sched_lock); 42168808Sjhb PICKUP_GIANT(); 42243529Sbde} 42372537Sjlemon 42472537Sjlemonint 42572537Sjlemoncopyinfrom(const void *src, void *dst, size_t len, int seg) 42672537Sjlemon{ 42772537Sjlemon int error = 0; 42872537Sjlemon 42972537Sjlemon switch (seg) { 43072537Sjlemon case UIO_USERSPACE: 43172537Sjlemon error = copyin(src, dst, len); 43272537Sjlemon break; 43372537Sjlemon case UIO_SYSSPACE: 43472537Sjlemon bcopy(src, dst, len); 43572537Sjlemon break; 43672537Sjlemon default: 43772537Sjlemon panic("copyinfrom: bad seg %d\n", seg); 43872537Sjlemon } 43972537Sjlemon return (error); 44072537Sjlemon} 44172537Sjlemon 44272537Sjlemonint 44372537Sjlemoncopyinstrfrom(const void *src, void *dst, size_t len, size_t *copied, int seg) 44472537Sjlemon{ 44572537Sjlemon int error = 0; 44672537Sjlemon 44772537Sjlemon switch (seg) { 44872537Sjlemon case UIO_USERSPACE: 44972537Sjlemon error = copyinstr(src, dst, len, copied); 45072537Sjlemon break; 45172537Sjlemon case UIO_SYSSPACE: 45272537Sjlemon error = copystr(src, dst, len, copied); 45372537Sjlemon break; 45472537Sjlemon default: 45572537Sjlemon panic("copyinstrfrom: bad seg %d\n", seg); 45672537Sjlemon } 45772537Sjlemon return (error); 45872537Sjlemon} 459