1/* 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 35 */ 36 37#include <sys/cdefs.h>
| 1/* 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 35 */ 36 37#include <sys/cdefs.h>
|
39 40#include "opt_zero.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/kernel.h> 45#include <sys/ktr.h> 46#include <sys/limits.h> 47#include <sys/lock.h> 48#include <sys/mutex.h> 49#include <sys/proc.h> 50#include <sys/malloc.h> 51#include <sys/resourcevar.h> 52#include <sys/sched.h> 53#include <sys/sysctl.h> 54#include <sys/vnode.h> 55 56#include <vm/vm.h> 57#include <vm/vm_page.h> 58#include <vm/vm_map.h> 59#ifdef ZERO_COPY_SOCKETS 60#include <vm/vm_param.h> 61#include <vm/vm_object.h> 62#endif 63 64SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, 65 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); 66 67#ifdef ZERO_COPY_SOCKETS 68/* Declared in uipc_socket.c */ 69extern int so_zero_copy_receive; 70 71static int 72vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, 73 vm_offset_t uaddr) 74{ 75 vm_map_t map = mapa; 76 vm_page_t kern_pg, user_pg; 77 vm_object_t uobject; 78 vm_map_entry_t entry; 79 vm_pindex_t upindex, kpindex; 80 vm_prot_t prot; 81 boolean_t wired; 82 83 /* 84 * First lookup the kernel page. 85 */ 86 kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); 87 /* 88 * XXX The vm object containing kern_pg needs locking. 89 */ 90 if ((vm_map_lookup(&map, uaddr, 91 VM_PROT_WRITE, &entry, &uobject, 92 &upindex, &prot, &wired)) != KERN_SUCCESS) { 93 return(EFAULT); 94 } 95 VM_OBJECT_LOCK(uobject); 96 if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { 97 do 98 vm_page_lock_queues(); 99 while (vm_page_sleep_if_busy(user_pg, 1, "vm_pgmoveco")); 100 vm_page_busy(user_pg); 101 pmap_remove_all(user_pg); 102 vm_page_free(user_pg); 103 } else 104 vm_page_lock_queues(); 105 if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || 106 (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { 107 printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " 108 "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, 109 kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, 110 kern_pg->hold_count, (u_long)kern_pg->phys_addr); 111 if ((kern_pg->queue - kern_pg->pc) == PQ_FREE) 112 panic("vm_pgmoveco: renaming free page"); 113 else 114 panic("vm_pgmoveco: renaming busy page"); 115 } 116 kpindex = kern_pg->pindex; 117 vm_page_busy(kern_pg); 118 vm_page_rename(kern_pg, uobject, upindex); 119 vm_page_flag_clear(kern_pg, PG_BUSY); 120 kern_pg->valid = VM_PAGE_BITS_ALL; 121 vm_page_unlock_queues(); 122 VM_OBJECT_UNLOCK(uobject); 123 vm_map_lookup_done(map, entry); 124 return(KERN_SUCCESS); 125} 126#endif /* ZERO_COPY_SOCKETS */ 127 128int 129uiomove(void *cp, int n, struct uio *uio) 130{ 131 struct thread *td = curthread; 132 struct iovec *iov; 133 u_int cnt; 134 int error = 0; 135 int save = 0; 136 137 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 138 ("uiomove: mode")); 139 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 140 ("uiomove proc")); 141 142 if (td) {
| 39 40#include "opt_zero.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/kernel.h> 45#include <sys/ktr.h> 46#include <sys/limits.h> 47#include <sys/lock.h> 48#include <sys/mutex.h> 49#include <sys/proc.h> 50#include <sys/malloc.h> 51#include <sys/resourcevar.h> 52#include <sys/sched.h> 53#include <sys/sysctl.h> 54#include <sys/vnode.h> 55 56#include <vm/vm.h> 57#include <vm/vm_page.h> 58#include <vm/vm_map.h> 59#ifdef ZERO_COPY_SOCKETS 60#include <vm/vm_param.h> 61#include <vm/vm_object.h> 62#endif 63 64SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, 65 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); 66 67#ifdef ZERO_COPY_SOCKETS 68/* Declared in uipc_socket.c */ 69extern int so_zero_copy_receive; 70 71static int 72vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, 73 vm_offset_t uaddr) 74{ 75 vm_map_t map = mapa; 76 vm_page_t kern_pg, user_pg; 77 vm_object_t uobject; 78 vm_map_entry_t entry; 79 vm_pindex_t upindex, kpindex; 80 vm_prot_t prot; 81 boolean_t wired; 82 83 /* 84 * First lookup the kernel page. 85 */ 86 kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); 87 /* 88 * XXX The vm object containing kern_pg needs locking. 89 */ 90 if ((vm_map_lookup(&map, uaddr, 91 VM_PROT_WRITE, &entry, &uobject, 92 &upindex, &prot, &wired)) != KERN_SUCCESS) { 93 return(EFAULT); 94 } 95 VM_OBJECT_LOCK(uobject); 96 if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { 97 do 98 vm_page_lock_queues(); 99 while (vm_page_sleep_if_busy(user_pg, 1, "vm_pgmoveco")); 100 vm_page_busy(user_pg); 101 pmap_remove_all(user_pg); 102 vm_page_free(user_pg); 103 } else 104 vm_page_lock_queues(); 105 if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || 106 (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { 107 printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " 108 "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, 109 kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, 110 kern_pg->hold_count, (u_long)kern_pg->phys_addr); 111 if ((kern_pg->queue - kern_pg->pc) == PQ_FREE) 112 panic("vm_pgmoveco: renaming free page"); 113 else 114 panic("vm_pgmoveco: renaming busy page"); 115 } 116 kpindex = kern_pg->pindex; 117 vm_page_busy(kern_pg); 118 vm_page_rename(kern_pg, uobject, upindex); 119 vm_page_flag_clear(kern_pg, PG_BUSY); 120 kern_pg->valid = VM_PAGE_BITS_ALL; 121 vm_page_unlock_queues(); 122 VM_OBJECT_UNLOCK(uobject); 123 vm_map_lookup_done(map, entry); 124 return(KERN_SUCCESS); 125} 126#endif /* ZERO_COPY_SOCKETS */ 127 128int 129uiomove(void *cp, int n, struct uio *uio) 130{ 131 struct thread *td = curthread; 132 struct iovec *iov; 133 u_int cnt; 134 int error = 0; 135 int save = 0; 136 137 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 138 ("uiomove: mode")); 139 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 140 ("uiomove proc")); 141 142 if (td) {
|
195 return (error); 196} 197 198/* 199 * Wrapper for uiomove() that validates the arguments against a known-good 200 * kernel buffer. Currently, uiomove accepts a signed (n) argument, which 201 * is almost definitely a bad thing, so we catch that here as well. We 202 * return a runtime failure, but it might be desirable to generate a runtime 203 * assertion failure instead. 204 */ 205int 206uiomove_frombuf(void *buf, int buflen, struct uio *uio) 207{ 208 unsigned int offset, n; 209 210 if (uio->uio_offset < 0 || uio->uio_resid < 0 || 211 (offset = uio->uio_offset) != uio->uio_offset) 212 return (EINVAL); 213 if (buflen <= 0 || offset >= buflen) 214 return (0); 215 if ((n = buflen - offset) > INT_MAX) 216 return (EINVAL); 217 return (uiomove((char *)buf + offset, n, uio)); 218} 219 220#ifdef ZERO_COPY_SOCKETS 221/* 222 * Experimental support for zero-copy I/O 223 */ 224static int 225userspaceco(void *cp, u_int cnt, struct uio *uio, struct vm_object *obj, 226 int disposable) 227{ 228 struct iovec *iov; 229 int error; 230 231 iov = uio->uio_iov; 232 if (uio->uio_rw == UIO_READ) { 233 if ((so_zero_copy_receive != 0) 234 && (obj != NULL) 235 && ((cnt & PAGE_MASK) == 0) 236 && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) 237 && ((uio->uio_offset & PAGE_MASK) == 0) 238 && ((((intptr_t) cp) & PAGE_MASK) == 0) 239 && (obj->type == OBJT_DEFAULT) 240 && (disposable != 0)) { 241 /* SOCKET: use page-trading */ 242 /* 243 * We only want to call vm_pgmoveco() on 244 * disposeable pages, since it gives the 245 * kernel page to the userland process. 246 */ 247 error = vm_pgmoveco(&curproc->p_vmspace->vm_map, 248 obj, (vm_offset_t)cp, 249 (vm_offset_t)iov->iov_base); 250 251 /* 252 * If we get an error back, attempt 253 * to use copyout() instead. The 254 * disposable page should be freed 255 * automatically if we weren't able to move 256 * it into userland. 257 */ 258 if (error != 0) 259 error = copyout(cp, iov->iov_base, cnt); 260 } else { 261 error = copyout(cp, iov->iov_base, cnt); 262 } 263 } else { 264 error = copyin(iov->iov_base, cp, cnt); 265 } 266 return (error); 267} 268 269int 270uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, 271 int disposable) 272{ 273 struct iovec *iov; 274 u_int cnt; 275 int error; 276 277 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 278 ("uiomoveco: mode")); 279 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 280 ("uiomoveco proc")); 281 282 while (n > 0 && uio->uio_resid) { 283 iov = uio->uio_iov; 284 cnt = iov->iov_len; 285 if (cnt == 0) { 286 uio->uio_iov++; 287 uio->uio_iovcnt--; 288 continue; 289 } 290 if (cnt > n) 291 cnt = n; 292 293 switch (uio->uio_segflg) { 294 295 case UIO_USERSPACE: 296 if (ticks - PCPU_GET(switchticks) >= hogticks) 297 uio_yield(); 298 299 error = userspaceco(cp, cnt, uio, obj, disposable); 300 301 if (error) 302 return (error); 303 break; 304 305 case UIO_SYSSPACE: 306 if (uio->uio_rw == UIO_READ) 307 bcopy(cp, iov->iov_base, cnt); 308 else 309 bcopy(iov->iov_base, cp, cnt); 310 break; 311 case UIO_NOCOPY: 312 break; 313 } 314 iov->iov_base = (char *)iov->iov_base + cnt; 315 iov->iov_len -= cnt; 316 uio->uio_resid -= cnt; 317 uio->uio_offset += cnt; 318 cp = (char *)cp + cnt; 319 n -= cnt; 320 } 321 return (0); 322} 323#endif /* ZERO_COPY_SOCKETS */ 324 325/* 326 * Give next character to user as result of read. 327 */ 328int 329ureadc(int c, struct uio *uio) 330{ 331 struct iovec *iov; 332 char *iov_base; 333 334again: 335 if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) 336 panic("ureadc"); 337 iov = uio->uio_iov; 338 if (iov->iov_len == 0) { 339 uio->uio_iovcnt--; 340 uio->uio_iov++; 341 goto again; 342 } 343 switch (uio->uio_segflg) { 344 345 case UIO_USERSPACE: 346 if (subyte(iov->iov_base, c) < 0) 347 return (EFAULT); 348 break; 349 350 case UIO_SYSSPACE: 351 iov_base = iov->iov_base; 352 *iov_base = c; 353 iov->iov_base = iov_base; 354 break; 355 356 case UIO_NOCOPY: 357 break; 358 } 359 iov->iov_base = (char *)iov->iov_base + 1; 360 iov->iov_len--; 361 uio->uio_resid--; 362 uio->uio_offset++; 363 return (0); 364} 365 366/* 367 * General routine to allocate a hash table. 368 */ 369void * 370hashinit(int elements, struct malloc_type *type, u_long *hashmask) 371{ 372 long hashsize; 373 LIST_HEAD(generic, generic) *hashtbl; 374 int i; 375 376 if (elements <= 0) 377 panic("hashinit: bad elements"); 378 for (hashsize = 1; hashsize <= elements; hashsize <<= 1) 379 continue; 380 hashsize >>= 1; 381 hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 382 for (i = 0; i < hashsize; i++) 383 LIST_INIT(&hashtbl[i]); 384 *hashmask = hashsize - 1; 385 return (hashtbl); 386} 387 388void 389hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) 390{ 391 LIST_HEAD(generic, generic) *hashtbl, *hp; 392 393 hashtbl = vhashtbl; 394 for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) 395 if (!LIST_EMPTY(hp)) 396 panic("hashdestroy: hash not empty"); 397 free(hashtbl, type); 398} 399 400static int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, 401 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 402 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; 403#define NPRIMES (sizeof(primes) / sizeof(primes[0])) 404 405/* 406 * General routine to allocate a prime number sized hash table. 407 */ 408void * 409phashinit(int elements, struct malloc_type *type, u_long *nentries) 410{ 411 long hashsize; 412 LIST_HEAD(generic, generic) *hashtbl; 413 int i; 414 415 if (elements <= 0) 416 panic("phashinit: bad elements"); 417 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 418 i++; 419 if (i == NPRIMES) 420 break; 421 hashsize = primes[i]; 422 } 423 hashsize = primes[i - 1]; 424 hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 425 for (i = 0; i < hashsize; i++) 426 LIST_INIT(&hashtbl[i]); 427 *nentries = hashsize; 428 return (hashtbl); 429} 430 431void 432uio_yield(void) 433{ 434 struct thread *td; 435 436 td = curthread; 437 mtx_lock_spin(&sched_lock); 438 DROP_GIANT(); 439 sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ 440 mi_switch(SW_INVOL); 441 mtx_unlock_spin(&sched_lock); 442 PICKUP_GIANT(); 443} 444 445int 446copyinfrom(const void * __restrict src, void * __restrict dst, size_t len, 447 int seg) 448{ 449 int error = 0; 450 451 switch (seg) { 452 case UIO_USERSPACE: 453 error = copyin(src, dst, len); 454 break; 455 case UIO_SYSSPACE: 456 bcopy(src, dst, len); 457 break; 458 default: 459 panic("copyinfrom: bad seg %d\n", seg); 460 } 461 return (error); 462} 463 464int 465copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, 466 size_t * __restrict copied, int seg) 467{ 468 int error = 0; 469 470 switch (seg) { 471 case UIO_USERSPACE: 472 error = copyinstr(src, dst, len, copied); 473 break; 474 case UIO_SYSSPACE: 475 error = copystr(src, dst, len, copied); 476 break; 477 default: 478 panic("copyinstrfrom: bad seg %d\n", seg); 479 } 480 return (error); 481} 482 483int 484uiofromiov(struct iovec *iovp, u_int iovcnt, struct uio *uio) 485{ 486 struct iovec *iov; 487 u_int iovlen; 488 int error, i; 489 490 /* note: can't use iovlen until iovcnt is validated */ 491 iovlen = iovcnt * sizeof (struct iovec); 492 if (iovcnt > UIO_MAXIOV) { 493 error = EINVAL; 494 goto done; 495 } 496 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 497 uio->uio_iov = iov; 498 uio->uio_iovcnt = iovcnt; 499 uio->uio_segflg = UIO_USERSPACE; 500 uio->uio_offset = -1; 501 if ((error = copyin(iovp, iov, iovlen))) 502 goto done; 503 uio->uio_resid = 0; 504 for (i = 0; i < iovcnt; i++) { 505 if (iov->iov_len > INT_MAX - uio->uio_resid) { 506 error = EINVAL; 507 goto done; 508 } 509 uio->uio_resid += iov->iov_len; 510 iov++; 511 } 512 513done: 514 if (error && uio->uio_iov) { 515 FREE(uio->uio_iov, M_IOV); 516 uio->uio_iov = NULL; 517 } 518 return (error); 519 520}
| 190 return (error); 191} 192 193/* 194 * Wrapper for uiomove() that validates the arguments against a known-good 195 * kernel buffer. Currently, uiomove accepts a signed (n) argument, which 196 * is almost definitely a bad thing, so we catch that here as well. We 197 * return a runtime failure, but it might be desirable to generate a runtime 198 * assertion failure instead. 199 */ 200int 201uiomove_frombuf(void *buf, int buflen, struct uio *uio) 202{ 203 unsigned int offset, n; 204 205 if (uio->uio_offset < 0 || uio->uio_resid < 0 || 206 (offset = uio->uio_offset) != uio->uio_offset) 207 return (EINVAL); 208 if (buflen <= 0 || offset >= buflen) 209 return (0); 210 if ((n = buflen - offset) > INT_MAX) 211 return (EINVAL); 212 return (uiomove((char *)buf + offset, n, uio)); 213} 214 215#ifdef ZERO_COPY_SOCKETS 216/* 217 * Experimental support for zero-copy I/O 218 */ 219static int 220userspaceco(void *cp, u_int cnt, struct uio *uio, struct vm_object *obj, 221 int disposable) 222{ 223 struct iovec *iov; 224 int error; 225 226 iov = uio->uio_iov; 227 if (uio->uio_rw == UIO_READ) { 228 if ((so_zero_copy_receive != 0) 229 && (obj != NULL) 230 && ((cnt & PAGE_MASK) == 0) 231 && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) 232 && ((uio->uio_offset & PAGE_MASK) == 0) 233 && ((((intptr_t) cp) & PAGE_MASK) == 0) 234 && (obj->type == OBJT_DEFAULT) 235 && (disposable != 0)) { 236 /* SOCKET: use page-trading */ 237 /* 238 * We only want to call vm_pgmoveco() on 239 * disposeable pages, since it gives the 240 * kernel page to the userland process. 241 */ 242 error = vm_pgmoveco(&curproc->p_vmspace->vm_map, 243 obj, (vm_offset_t)cp, 244 (vm_offset_t)iov->iov_base); 245 246 /* 247 * If we get an error back, attempt 248 * to use copyout() instead. The 249 * disposable page should be freed 250 * automatically if we weren't able to move 251 * it into userland. 252 */ 253 if (error != 0) 254 error = copyout(cp, iov->iov_base, cnt); 255 } else { 256 error = copyout(cp, iov->iov_base, cnt); 257 } 258 } else { 259 error = copyin(iov->iov_base, cp, cnt); 260 } 261 return (error); 262} 263 264int 265uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, 266 int disposable) 267{ 268 struct iovec *iov; 269 u_int cnt; 270 int error; 271 272 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 273 ("uiomoveco: mode")); 274 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 275 ("uiomoveco proc")); 276 277 while (n > 0 && uio->uio_resid) { 278 iov = uio->uio_iov; 279 cnt = iov->iov_len; 280 if (cnt == 0) { 281 uio->uio_iov++; 282 uio->uio_iovcnt--; 283 continue; 284 } 285 if (cnt > n) 286 cnt = n; 287 288 switch (uio->uio_segflg) { 289 290 case UIO_USERSPACE: 291 if (ticks - PCPU_GET(switchticks) >= hogticks) 292 uio_yield(); 293 294 error = userspaceco(cp, cnt, uio, obj, disposable); 295 296 if (error) 297 return (error); 298 break; 299 300 case UIO_SYSSPACE: 301 if (uio->uio_rw == UIO_READ) 302 bcopy(cp, iov->iov_base, cnt); 303 else 304 bcopy(iov->iov_base, cp, cnt); 305 break; 306 case UIO_NOCOPY: 307 break; 308 } 309 iov->iov_base = (char *)iov->iov_base + cnt; 310 iov->iov_len -= cnt; 311 uio->uio_resid -= cnt; 312 uio->uio_offset += cnt; 313 cp = (char *)cp + cnt; 314 n -= cnt; 315 } 316 return (0); 317} 318#endif /* ZERO_COPY_SOCKETS */ 319 320/* 321 * Give next character to user as result of read. 322 */ 323int 324ureadc(int c, struct uio *uio) 325{ 326 struct iovec *iov; 327 char *iov_base; 328 329again: 330 if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) 331 panic("ureadc"); 332 iov = uio->uio_iov; 333 if (iov->iov_len == 0) { 334 uio->uio_iovcnt--; 335 uio->uio_iov++; 336 goto again; 337 } 338 switch (uio->uio_segflg) { 339 340 case UIO_USERSPACE: 341 if (subyte(iov->iov_base, c) < 0) 342 return (EFAULT); 343 break; 344 345 case UIO_SYSSPACE: 346 iov_base = iov->iov_base; 347 *iov_base = c; 348 iov->iov_base = iov_base; 349 break; 350 351 case UIO_NOCOPY: 352 break; 353 } 354 iov->iov_base = (char *)iov->iov_base + 1; 355 iov->iov_len--; 356 uio->uio_resid--; 357 uio->uio_offset++; 358 return (0); 359} 360 361/* 362 * General routine to allocate a hash table. 363 */ 364void * 365hashinit(int elements, struct malloc_type *type, u_long *hashmask) 366{ 367 long hashsize; 368 LIST_HEAD(generic, generic) *hashtbl; 369 int i; 370 371 if (elements <= 0) 372 panic("hashinit: bad elements"); 373 for (hashsize = 1; hashsize <= elements; hashsize <<= 1) 374 continue; 375 hashsize >>= 1; 376 hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 377 for (i = 0; i < hashsize; i++) 378 LIST_INIT(&hashtbl[i]); 379 *hashmask = hashsize - 1; 380 return (hashtbl); 381} 382 383void 384hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) 385{ 386 LIST_HEAD(generic, generic) *hashtbl, *hp; 387 388 hashtbl = vhashtbl; 389 for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) 390 if (!LIST_EMPTY(hp)) 391 panic("hashdestroy: hash not empty"); 392 free(hashtbl, type); 393} 394 395static int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, 396 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 397 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; 398#define NPRIMES (sizeof(primes) / sizeof(primes[0])) 399 400/* 401 * General routine to allocate a prime number sized hash table. 402 */ 403void * 404phashinit(int elements, struct malloc_type *type, u_long *nentries) 405{ 406 long hashsize; 407 LIST_HEAD(generic, generic) *hashtbl; 408 int i; 409 410 if (elements <= 0) 411 panic("phashinit: bad elements"); 412 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 413 i++; 414 if (i == NPRIMES) 415 break; 416 hashsize = primes[i]; 417 } 418 hashsize = primes[i - 1]; 419 hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 420 for (i = 0; i < hashsize; i++) 421 LIST_INIT(&hashtbl[i]); 422 *nentries = hashsize; 423 return (hashtbl); 424} 425 426void 427uio_yield(void) 428{ 429 struct thread *td; 430 431 td = curthread; 432 mtx_lock_spin(&sched_lock); 433 DROP_GIANT(); 434 sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ 435 mi_switch(SW_INVOL); 436 mtx_unlock_spin(&sched_lock); 437 PICKUP_GIANT(); 438} 439 440int 441copyinfrom(const void * __restrict src, void * __restrict dst, size_t len, 442 int seg) 443{ 444 int error = 0; 445 446 switch (seg) { 447 case UIO_USERSPACE: 448 error = copyin(src, dst, len); 449 break; 450 case UIO_SYSSPACE: 451 bcopy(src, dst, len); 452 break; 453 default: 454 panic("copyinfrom: bad seg %d\n", seg); 455 } 456 return (error); 457} 458 459int 460copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, 461 size_t * __restrict copied, int seg) 462{ 463 int error = 0; 464 465 switch (seg) { 466 case UIO_USERSPACE: 467 error = copyinstr(src, dst, len, copied); 468 break; 469 case UIO_SYSSPACE: 470 error = copystr(src, dst, len, copied); 471 break; 472 default: 473 panic("copyinstrfrom: bad seg %d\n", seg); 474 } 475 return (error); 476} 477 478int 479uiofromiov(struct iovec *iovp, u_int iovcnt, struct uio *uio) 480{ 481 struct iovec *iov; 482 u_int iovlen; 483 int error, i; 484 485 /* note: can't use iovlen until iovcnt is validated */ 486 iovlen = iovcnt * sizeof (struct iovec); 487 if (iovcnt > UIO_MAXIOV) { 488 error = EINVAL; 489 goto done; 490 } 491 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 492 uio->uio_iov = iov; 493 uio->uio_iovcnt = iovcnt; 494 uio->uio_segflg = UIO_USERSPACE; 495 uio->uio_offset = -1; 496 if ((error = copyin(iovp, iov, iovlen))) 497 goto done; 498 uio->uio_resid = 0; 499 for (i = 0; i < iovcnt; i++) { 500 if (iov->iov_len > INT_MAX - uio->uio_resid) { 501 error = EINVAL; 502 goto done; 503 } 504 uio->uio_resid += iov->iov_len; 505 iov++; 506 } 507 508done: 509 if (error && uio->uio_iov) { 510 FREE(uio->uio_iov, M_IOV); 511 uio->uio_iov = NULL; 512 } 513 return (error); 514 515}
|