subr_hash.c revision 109623
1/* 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 39 * $FreeBSD: head/sys/kern/kern_subr.c 109623 2003-01-21 08:56:16Z alfred $ 40 */ 41 42#include "opt_zero.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/kernel.h> 47#include <sys/ktr.h> 48#include <sys/lock.h> 49#include <sys/mutex.h> 50#include <sys/proc.h> 51#include <sys/malloc.h> 52#include <sys/resourcevar.h> 53#include <sys/sched.h> 54#include <sys/sysctl.h> 55#include <sys/vnode.h> 56 57#include <vm/vm.h> 58#include <vm/vm_page.h> 59#include <vm/vm_map.h> 60#ifdef ZERO_COPY_SOCKETS 61#include <vm/vm_param.h> 62#endif 63#if defined(ZERO_COPY_SOCKETS) || defined(ENABLE_VFS_IOOPT) 64#include <vm/vm_object.h> 65#endif 66 67SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, 68 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); 69 70#if defined(ZERO_COPY_SOCKETS) || defined(ENABLE_VFS_IOOPT) 71static int userspaceco(caddr_t cp, u_int cnt, struct uio *uio, 72 struct vm_object *obj, int disposable); 73#endif 74 75#ifdef ZERO_COPY_SOCKETS 76/* Declared in uipc_socket.c */ 77extern int so_zero_copy_receive; 78 79static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, 80 vm_offset_t uaddr); 81 82static int 83vm_pgmoveco(mapa, srcobj, kaddr, uaddr) 84 vm_map_t mapa; 85 vm_object_t srcobj; 86 vm_offset_t kaddr, uaddr; 87{ 88 vm_map_t map = mapa; 89 vm_page_t kern_pg, user_pg; 90 vm_object_t uobject; 91 vm_map_entry_t entry; 92 vm_pindex_t upindex, kpindex; 93 vm_prot_t prot; 94 boolean_t wired; 95 96 /* 97 * First lookup the kernel page. 98 */ 99 kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); 100 101 if ((vm_map_lookup(&map, uaddr, 102 VM_PROT_READ, &entry, &uobject, 103 &upindex, &prot, &wired)) != KERN_SUCCESS) { 104 return(EFAULT); 105 } 106 if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { 107 do 108 vm_page_lock_queues(); 109 while (vm_page_sleep_if_busy(user_pg, 1, "vm_pgmoveco")); 110 vm_page_busy(user_pg); 111 pmap_remove_all(user_pg); 112 vm_page_free(user_pg); 113 } else 114 vm_page_lock_queues(); 115 if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || 116 (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { 117 printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " 118 "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, 119 kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, 120 kern_pg->hold_count, (u_long)kern_pg->phys_addr); 121 if ((kern_pg->queue - kern_pg->pc) == PQ_FREE) 122 panic("vm_pgmoveco: renaming free page"); 123 else 124 panic("vm_pgmoveco: renaming busy page"); 125 } 126 kpindex = kern_pg->pindex; 127 vm_page_busy(kern_pg); 128 vm_page_rename(kern_pg, uobject, upindex); 129 vm_page_flag_clear(kern_pg, PG_BUSY); 130 kern_pg->valid = VM_PAGE_BITS_ALL; 131 vm_page_unlock_queues(); 132 133 vm_map_lookup_done(map, entry); 134 return(KERN_SUCCESS); 135} 136#endif /* ZERO_COPY_SOCKETS */ 137 138int 139uiomove(cp, n, uio) 140 register caddr_t cp; 141 register int n; 142 register struct uio *uio; 143{ 144 struct thread *td = curthread; 145 register struct iovec *iov; 146 u_int cnt; 147 int error = 0; 148 int save = 0; 149 150 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 151 ("uiomove: mode")); 152 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 153 ("uiomove proc")); 154 155 if (td) { 156 mtx_lock_spin(&sched_lock); 157 save = td->td_flags & TDF_DEADLKTREAT; 158 td->td_flags |= TDF_DEADLKTREAT; 159 mtx_unlock_spin(&sched_lock); 160 } 161 162 while (n > 0 && uio->uio_resid) { 163 iov = uio->uio_iov; 164 cnt = iov->iov_len; 165 if (cnt == 0) { 166 uio->uio_iov++; 167 uio->uio_iovcnt--; 168 continue; 169 } 170 if (cnt > n) 171 cnt = n; 172 173 switch (uio->uio_segflg) { 174 175 case UIO_USERSPACE: 176 if (ticks - PCPU_GET(switchticks) >= hogticks) 177 uio_yield(); 178 if (uio->uio_rw == UIO_READ) 179 error = copyout(cp, iov->iov_base, cnt); 180 else 181 error = copyin(iov->iov_base, cp, cnt); 182 if (error) 183 goto out; 184 break; 185 186 case UIO_SYSSPACE: 187 if (uio->uio_rw == UIO_READ) 188 bcopy(cp, iov->iov_base, cnt); 189 else 190 bcopy(iov->iov_base, cp, cnt); 191 break; 192 case UIO_NOCOPY: 193 break; 194 } 195 iov->iov_base = (char *)iov->iov_base + cnt; 196 iov->iov_len -= cnt; 197 uio->uio_resid -= cnt; 198 uio->uio_offset += cnt; 199 cp += cnt; 200 n -= cnt; 201 } 202out: 203 if (td != curthread) printf("uiomove: IT CHANGED!"); 204 td = curthread; /* Might things have changed in copyin/copyout? */ 205 if (td) { 206 mtx_lock_spin(&sched_lock); 207 td->td_flags = (td->td_flags & ~TDF_DEADLKTREAT) | save; 208 mtx_unlock_spin(&sched_lock); 209 } 210 return (error); 211} 212 213#if defined(ENABLE_VFS_IOOPT) || defined(ZERO_COPY_SOCKETS) 214/* 215 * Experimental support for zero-copy I/O 216 */ 217static int 218userspaceco(cp, cnt, uio, obj, disposable) 219 caddr_t cp; 220 u_int cnt; 221 struct uio *uio; 222 struct vm_object *obj; 223 int disposable; 224{ 225 struct iovec *iov; 226 int error; 227 228 iov = uio->uio_iov; 229 230#ifdef ZERO_COPY_SOCKETS 231 232 if (uio->uio_rw == UIO_READ) { 233 if ((so_zero_copy_receive != 0) 234 && (obj != NULL) 235 && ((cnt & PAGE_MASK) == 0) 236 && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) 237 && ((uio->uio_offset & PAGE_MASK) == 0) 238 && ((((intptr_t) cp) & PAGE_MASK) == 0) 239 && (obj->type == OBJT_DEFAULT) 240 && (disposable != 0)) { 241 /* SOCKET: use page-trading */ 242 /* 243 * We only want to call vm_pgmoveco() on 244 * disposeable pages, since it gives the 245 * kernel page to the userland process. 246 */ 247 error = vm_pgmoveco(&curproc->p_vmspace->vm_map, 248 obj, (vm_offset_t)cp, 249 (vm_offset_t)iov->iov_base); 250 251 /* 252 * If we get an error back, attempt 253 * to use copyout() instead. The 254 * disposable page should be freed 255 * automatically if we weren't able to move 256 * it into userland. 257 */ 258 if (error != 0) 259 error = copyout(cp, iov->iov_base, cnt); 260#ifdef ENABLE_VFS_IOOPT 261 } else if ((vfs_ioopt != 0) 262 && ((cnt & PAGE_MASK) == 0) 263 && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) 264 && ((uio->uio_offset & PAGE_MASK) == 0) 265 && ((((intptr_t) cp) & PAGE_MASK) == 0)) { 266 error = vm_uiomove(&curproc->p_vmspace->vm_map, obj, 267 uio->uio_offset, cnt, 268 (vm_offset_t) iov->iov_base, NULL); 269#endif /* ENABLE_VFS_IOOPT */ 270 } else { 271 error = copyout(cp, iov->iov_base, cnt); 272 } 273 } else { 274 error = copyin(iov->iov_base, cp, cnt); 275 } 276#else /* ZERO_COPY_SOCKETS */ 277 if (uio->uio_rw == UIO_READ) { 278#ifdef ENABLE_VFS_IOOPT 279 if ((vfs_ioopt != 0) 280 && ((cnt & PAGE_MASK) == 0) 281 && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) 282 && ((uio->uio_offset & PAGE_MASK) == 0) 283 && ((((intptr_t) cp) & PAGE_MASK) == 0)) { 284 error = vm_uiomove(&curproc->p_vmspace->vm_map, obj, 285 uio->uio_offset, cnt, 286 (vm_offset_t) iov->iov_base, NULL); 287 } else 288#endif /* ENABLE_VFS_IOOPT */ 289 { 290 error = copyout(cp, iov->iov_base, cnt); 291 } 292 } else { 293 error = copyin(iov->iov_base, cp, cnt); 294 } 295#endif /* ZERO_COPY_SOCKETS */ 296 297 return (error); 298} 299 300int 301uiomoveco(cp, n, uio, obj, disposable) 302 caddr_t cp; 303 int n; 304 struct uio *uio; 305 struct vm_object *obj; 306 int disposable; 307{ 308 struct iovec *iov; 309 u_int cnt; 310 int error; 311 312 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 313 ("uiomoveco: mode")); 314 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 315 ("uiomoveco proc")); 316 317 while (n > 0 && uio->uio_resid) { 318 iov = uio->uio_iov; 319 cnt = iov->iov_len; 320 if (cnt == 0) { 321 uio->uio_iov++; 322 uio->uio_iovcnt--; 323 continue; 324 } 325 if (cnt > n) 326 cnt = n; 327 328 switch (uio->uio_segflg) { 329 330 case UIO_USERSPACE: 331 if (ticks - PCPU_GET(switchticks) >= hogticks) 332 uio_yield(); 333 334 error = userspaceco(cp, cnt, uio, obj, disposable); 335 336 if (error) 337 return (error); 338 break; 339 340 case UIO_SYSSPACE: 341 if (uio->uio_rw == UIO_READ) 342 bcopy(cp, iov->iov_base, cnt); 343 else 344 bcopy(iov->iov_base, cp, cnt); 345 break; 346 case UIO_NOCOPY: 347 break; 348 } 349 iov->iov_base = (char *)iov->iov_base + cnt; 350 iov->iov_len -= cnt; 351 uio->uio_resid -= cnt; 352 uio->uio_offset += cnt; 353 cp += cnt; 354 n -= cnt; 355 } 356 return (0); 357} 358#endif /* ENABLE_VFS_IOOPT || ZERO_COPY_SOCKETS */ 359 360#ifdef ENABLE_VFS_IOOPT 361 362/* 363 * Experimental support for zero-copy I/O 364 */ 365int 366uioread(n, uio, obj, nread) 367 int n; 368 struct uio *uio; 369 struct vm_object *obj; 370 int *nread; 371{ 372 int npagesmoved; 373 struct iovec *iov; 374 u_int cnt, tcnt; 375 int error; 376 377 *nread = 0; 378 if (vfs_ioopt < 2) 379 return 0; 380 381 error = 0; 382 383 while (n > 0 && uio->uio_resid) { 384 iov = uio->uio_iov; 385 cnt = iov->iov_len; 386 if (cnt == 0) { 387 uio->uio_iov++; 388 uio->uio_iovcnt--; 389 continue; 390 } 391 if (cnt > n) 392 cnt = n; 393 394 if ((uio->uio_segflg == UIO_USERSPACE) && 395 ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) && 396 ((uio->uio_offset & PAGE_MASK) == 0) ) { 397 398 if (cnt < PAGE_SIZE) 399 break; 400 401 cnt &= ~PAGE_MASK; 402 403 if (ticks - PCPU_GET(switchticks) >= hogticks) 404 uio_yield(); 405 error = vm_uiomove(&curproc->p_vmspace->vm_map, obj, 406 uio->uio_offset, cnt, 407 (vm_offset_t) iov->iov_base, &npagesmoved); 408 409 if (npagesmoved == 0) 410 break; 411 412 tcnt = npagesmoved * PAGE_SIZE; 413 cnt = tcnt; 414 415 if (error) 416 break; 417 418 iov->iov_base = (char *)iov->iov_base + cnt; 419 iov->iov_len -= cnt; 420 uio->uio_resid -= cnt; 421 uio->uio_offset += cnt; 422 *nread += cnt; 423 n -= cnt; 424 } else { 425 break; 426 } 427 } 428 return error; 429} 430#endif /* ENABLE_VFS_IOOPT */ 431 432/* 433 * Give next character to user as result of read. 434 */ 435int 436ureadc(c, uio) 437 register int c; 438 register struct uio *uio; 439{ 440 register struct iovec *iov; 441 register char *iov_base; 442 443again: 444 if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) 445 panic("ureadc"); 446 iov = uio->uio_iov; 447 if (iov->iov_len == 0) { 448 uio->uio_iovcnt--; 449 uio->uio_iov++; 450 goto again; 451 } 452 switch (uio->uio_segflg) { 453 454 case UIO_USERSPACE: 455 if (subyte(iov->iov_base, c) < 0) 456 return (EFAULT); 457 break; 458 459 case UIO_SYSSPACE: 460 iov_base = iov->iov_base; 461 *iov_base = c; 462 iov->iov_base = iov_base; 463 break; 464 465 case UIO_NOCOPY: 466 break; 467 } 468 iov->iov_base = (char *)iov->iov_base + 1; 469 iov->iov_len--; 470 uio->uio_resid--; 471 uio->uio_offset++; 472 return (0); 473} 474 475/* 476 * General routine to allocate a hash table. 477 */ 478void * 479hashinit(elements, type, hashmask) 480 int elements; 481 struct malloc_type *type; 482 u_long *hashmask; 483{ 484 long hashsize; 485 LIST_HEAD(generic, generic) *hashtbl; 486 int i; 487 488 if (elements <= 0) 489 panic("hashinit: bad elements"); 490 for (hashsize = 1; hashsize <= elements; hashsize <<= 1) 491 continue; 492 hashsize >>= 1; 493 hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, 0); 494 for (i = 0; i < hashsize; i++) 495 LIST_INIT(&hashtbl[i]); 496 *hashmask = hashsize - 1; 497 return (hashtbl); 498} 499 500void 501hashdestroy(vhashtbl, type, hashmask) 502 void *vhashtbl; 503 struct malloc_type *type; 504 u_long hashmask; 505{ 506 LIST_HEAD(generic, generic) *hashtbl, *hp; 507 508 hashtbl = vhashtbl; 509 for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) 510 if (!LIST_EMPTY(hp)) 511 panic("hashdestroy: hash not empty"); 512 free(hashtbl, type); 513} 514 515static int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, 516 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 517 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; 518#define NPRIMES (sizeof(primes) / sizeof(primes[0])) 519 520/* 521 * General routine to allocate a prime number sized hash table. 522 */ 523void * 524phashinit(elements, type, nentries) 525 int elements; 526 struct malloc_type *type; 527 u_long *nentries; 528{ 529 long hashsize; 530 LIST_HEAD(generic, generic) *hashtbl; 531 int i; 532 533 if (elements <= 0) 534 panic("phashinit: bad elements"); 535 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 536 i++; 537 if (i == NPRIMES) 538 break; 539 hashsize = primes[i]; 540 } 541 hashsize = primes[i - 1]; 542 hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, 0); 543 for (i = 0; i < hashsize; i++) 544 LIST_INIT(&hashtbl[i]); 545 *nentries = hashsize; 546 return (hashtbl); 547} 548 549void 550uio_yield() 551{ 552 struct thread *td; 553 554 td = curthread; 555 mtx_lock_spin(&sched_lock); 556 DROP_GIANT(); 557 sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ 558 td->td_proc->p_stats->p_ru.ru_nivcsw++; 559 mi_switch(); 560 mtx_unlock_spin(&sched_lock); 561 PICKUP_GIANT(); 562} 563 564int 565copyinfrom(const void *src, void *dst, size_t len, int seg) 566{ 567 int error = 0; 568 569 switch (seg) { 570 case UIO_USERSPACE: 571 error = copyin(src, dst, len); 572 break; 573 case UIO_SYSSPACE: 574 bcopy(src, dst, len); 575 break; 576 default: 577 panic("copyinfrom: bad seg %d\n", seg); 578 } 579 return (error); 580} 581 582int 583copyinstrfrom(const void *src, void *dst, size_t len, size_t *copied, int seg) 584{ 585 int error = 0; 586 587 switch (seg) { 588 case UIO_USERSPACE: 589 error = copyinstr(src, dst, len, copied); 590 break; 591 case UIO_SYSSPACE: 592 error = copystr(src, dst, len, copied); 593 break; 594 default: 595 panic("copyinstrfrom: bad seg %d\n", seg); 596 } 597 return (error); 598} 599