subr_hash.c revision 99098
1/* 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 39 * $FreeBSD: head/sys/kern/kern_subr.c 99098 2002-06-30 02:07:26Z iedowse $ 40 */ 41 42#include "opt_zero.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/kernel.h> 47#include <sys/ktr.h> 48#include <sys/lock.h> 49#include <sys/mutex.h> 50#include <sys/proc.h> 51#include <sys/malloc.h> 52#include <sys/resourcevar.h> 53#include <sys/sysctl.h> 54#include <sys/vnode.h> 55 56#include <vm/vm.h> 57#include <vm/vm_page.h> 58#include <vm/vm_map.h> 59 60SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, 61 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); 62 63#ifdef ZERO_COPY_SOCKETS 64#include <vm/vm.h> 65#include <vm/vm_param.h> 66#include <sys/lock.h> 67#include <vm/pmap.h> 68#include <vm/vm_map.h> 69#include <vm/vm_page.h> 70#include <vm/vm_object.h> 71#include <vm/vm_pager.h> 72#include <vm/vm_kern.h> 73#include <vm/vm_extern.h> 74#include <vm/swap_pager.h> 75#include <sys/mbuf.h> 76#include <machine/cpu.h> 77 78/* Declared in uipc_socket.c */ 79extern int so_zero_copy_receive; 80 81static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, 82 vm_offset_t uaddr); 83static int userspaceco(caddr_t cp, u_int cnt, struct uio *uio, 84 struct vm_object *obj, int disposable); 85 86static int 87vm_pgmoveco(mapa, srcobj, kaddr, uaddr) 88 vm_map_t mapa; 89 vm_object_t srcobj; 90 vm_offset_t kaddr, uaddr; 91{ 92 vm_map_t map = mapa; 93 vm_page_t kern_pg, user_pg; 94 vm_object_t uobject; 95 vm_map_entry_t entry; 96 vm_pindex_t upindex, kpindex; 97 vm_prot_t prot; 98 boolean_t wired; 99 100 /* 101 * First lookup the kernel page. 102 */ 103 kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); 104 105 if ((vm_map_lookup(&map, uaddr, 106 VM_PROT_READ, &entry, &uobject, 107 &upindex, &prot, &wired)) != KERN_SUCCESS) { 108 return(EFAULT); 109 } 110 if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { 111 vm_page_sleep_busy(user_pg, 1, "vm_pgmoveco"); 112 pmap_remove(map->pmap, uaddr, uaddr+PAGE_SIZE); 113 vm_page_busy(user_pg); 114 vm_page_free(user_pg); 115 } 116 117 if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || 118 (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { 119 printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " 120 "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, 121 kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, 122 kern_pg->hold_count, (u_long)kern_pg->phys_addr); 123 if ((kern_pg->queue - kern_pg->pc) == PQ_FREE) 124 panic("vm_pgmoveco: renaming free page"); 125 else 126 panic("vm_pgmoveco: renaming busy page"); 127 } 128 kpindex = kern_pg->pindex; 129 vm_page_busy(kern_pg); 130 vm_page_rename(kern_pg, uobject, upindex); 131 vm_page_flag_clear(kern_pg, PG_BUSY); 132 kern_pg->valid = VM_PAGE_BITS_ALL; 133 134 vm_map_lookup_done(map, entry); 135 return(KERN_SUCCESS); 136} 137#endif /* ZERO_COPY_SOCKETS */ 138 139int 140uiomove(cp, n, uio) 141 register caddr_t cp; 142 register int n; 143 register struct uio *uio; 144{ 145 struct thread *td = curthread; 146 register struct iovec *iov; 147 u_int cnt; 148 int error = 0; 149 int save = 0; 150 151 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 152 ("uiomove: mode")); 153 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 154 ("uiomove proc")); 155 156 if (td) { 157 mtx_lock_spin(&sched_lock); 158 save = td->td_flags & TDF_DEADLKTREAT; 159 td->td_flags |= TDF_DEADLKTREAT; 160 mtx_unlock_spin(&sched_lock); 161 } 162 163 while (n > 0 && uio->uio_resid) { 164 iov = uio->uio_iov; 165 cnt = iov->iov_len; 166 if (cnt == 0) { 167 uio->uio_iov++; 168 uio->uio_iovcnt--; 169 continue; 170 } 171 if (cnt > n) 172 cnt = n; 173 174 switch (uio->uio_segflg) { 175 176 case UIO_USERSPACE: 177 if (ticks - PCPU_GET(switchticks) >= hogticks) 178 uio_yield(); 179 if (uio->uio_rw == UIO_READ) 180 error = copyout(cp, iov->iov_base, cnt); 181 else 182 error = copyin(iov->iov_base, cp, cnt); 183 if (error) 184 goto out; 185 break; 186 187 case UIO_SYSSPACE: 188 if (uio->uio_rw == UIO_READ) 189 bcopy(cp, iov->iov_base, cnt); 190 else 191 bcopy(iov->iov_base, cp, cnt); 192 break; 193 case UIO_NOCOPY: 194 break; 195 } 196 iov->iov_base += cnt; 197 iov->iov_len -= cnt; 198 uio->uio_resid -= cnt; 199 uio->uio_offset += cnt; 200 cp += cnt; 201 n -= cnt; 202 } 203out: 204 if (td != curthread) printf("uiomove: IT CHANGED!"); 205 td = curthread; /* Might things have changed in copyin/copyout? */ 206 if (td) { 207 mtx_lock_spin(&sched_lock); 208 td->td_flags = (td->td_flags & ~TDF_DEADLKTREAT) | save; 209 mtx_unlock_spin(&sched_lock); 210 } 211 return (error); 212} 213 214#if defined(ENABLE_VFS_IOOPT) || defined(ZERO_COPY_SOCKETS) 215/* 216 * Experimental support for zero-copy I/O 217 */ 218static int 219userspaceco(cp, cnt, uio, obj, disposable) 220 caddr_t cp; 221 u_int cnt; 222 struct uio *uio; 223 struct vm_object *obj; 224 int disposable; 225{ 226 struct iovec *iov; 227 int error; 228 229 iov = uio->uio_iov; 230 231#ifdef ZERO_COPY_SOCKETS 232 233 if (uio->uio_rw == UIO_READ) { 234 if ((so_zero_copy_receive != 0) 235 && (obj != NULL) 236 && ((cnt & PAGE_MASK) == 0) 237 && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) 238 && ((uio->uio_offset & PAGE_MASK) == 0) 239 && ((((intptr_t) cp) & PAGE_MASK) == 0) 240 && (obj->type == OBJT_DEFAULT) 241 && (disposable != 0)) { 242 /* SOCKET: use page-trading */ 243 /* 244 * We only want to call vm_pgmoveco() on 245 * disposeable pages, since it gives the 246 * kernel page to the userland process. 247 */ 248 error = vm_pgmoveco(&curproc->p_vmspace->vm_map, 249 obj, (vm_offset_t)cp, 250 (vm_offset_t)iov->iov_base); 251 252 /* 253 * If we get an error back, attempt 254 * to use copyout() instead. The 255 * disposable page should be freed 256 * automatically if we weren't able to move 257 * it into userland. 258 */ 259 if (error != 0) 260 error = copyout(cp, iov->iov_base, cnt); 261#ifdef ENABLE_VFS_IOOPT 262 } else if ((vfs_ioopt != 0) 263 && ((cnt & PAGE_MASK) == 0) 264 && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) 265 && ((uio->uio_offset & PAGE_MASK) == 0) 266 && ((((intptr_t) cp) & PAGE_MASK) == 0)) { 267 error = vm_uiomove(&curproc->p_vmspace->vm_map, obj, 268 uio->uio_offset, cnt, 269 (vm_offset_t) iov->iov_base, NULL); 270#endif /* ENABLE_VFS_IOOPT */ 271 } else { 272 error = copyout(cp, iov->iov_base, cnt); 273 } 274 } else { 275 error = copyin(iov->iov_base, cp, cnt); 276 } 277#else /* ZERO_COPY_SOCKETS */ 278 if (uio->uio_rw == UIO_READ) { 279#ifdef ENABLE_VFS_IOOPT 280 if ((vfs_ioopt != 0) 281 && ((cnt & PAGE_MASK) == 0) 282 && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) 283 && ((uio->uio_offset & PAGE_MASK) == 0) 284 && ((((intptr_t) cp) & PAGE_MASK) == 0)) { 285 error = vm_uiomove(&curproc->p_vmspace->vm_map, obj, 286 uio->uio_offset, cnt, 287 (vm_offset_t) iov->iov_base, NULL); 288 } else 289#endif /* ENABLE_VFS_IOOPT */ 290 { 291 error = copyout(cp, iov->iov_base, cnt); 292 } 293 } else { 294 error = copyin(iov->iov_base, cp, cnt); 295 } 296#endif /* ZERO_COPY_SOCKETS */ 297 298 return (error); 299} 300 301int 302uiomoveco(cp, n, uio, obj, disposable) 303 caddr_t cp; 304 int n; 305 struct uio *uio; 306 struct vm_object *obj; 307 int disposable; 308{ 309 struct iovec *iov; 310 u_int cnt; 311 int error; 312 313 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 314 ("uiomoveco: mode")); 315 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 316 ("uiomoveco proc")); 317 318 while (n > 0 && uio->uio_resid) { 319 iov = uio->uio_iov; 320 cnt = iov->iov_len; 321 if (cnt == 0) { 322 uio->uio_iov++; 323 uio->uio_iovcnt--; 324 continue; 325 } 326 if (cnt > n) 327 cnt = n; 328 329 switch (uio->uio_segflg) { 330 331 case UIO_USERSPACE: 332 if (ticks - PCPU_GET(switchticks) >= hogticks) 333 uio_yield(); 334 335 error = userspaceco(cp, cnt, uio, obj, disposable); 336 337 if (error) 338 return (error); 339 break; 340 341 case UIO_SYSSPACE: 342 if (uio->uio_rw == UIO_READ) 343 bcopy(cp, iov->iov_base, cnt); 344 else 345 bcopy(iov->iov_base, cp, cnt); 346 break; 347 case UIO_NOCOPY: 348 break; 349 } 350 iov->iov_base += cnt; 351 iov->iov_len -= cnt; 352 uio->uio_resid -= cnt; 353 uio->uio_offset += cnt; 354 cp += cnt; 355 n -= cnt; 356 } 357 return (0); 358} 359#endif /* ENABLE_VFS_IOOPT || ZERO_COPY_SOCKETS */ 360 361#ifdef ENABLE_VFS_IOOPT 362 363/* 364 * Experimental support for zero-copy I/O 365 */ 366int 367uioread(n, uio, obj, nread) 368 int n; 369 struct uio *uio; 370 struct vm_object *obj; 371 int *nread; 372{ 373 int npagesmoved; 374 struct iovec *iov; 375 u_int cnt, tcnt; 376 int error; 377 378 *nread = 0; 379 if (vfs_ioopt < 2) 380 return 0; 381 382 error = 0; 383 384 while (n > 0 && uio->uio_resid) { 385 iov = uio->uio_iov; 386 cnt = iov->iov_len; 387 if (cnt == 0) { 388 uio->uio_iov++; 389 uio->uio_iovcnt--; 390 continue; 391 } 392 if (cnt > n) 393 cnt = n; 394 395 if ((uio->uio_segflg == UIO_USERSPACE) && 396 ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) && 397 ((uio->uio_offset & PAGE_MASK) == 0) ) { 398 399 if (cnt < PAGE_SIZE) 400 break; 401 402 cnt &= ~PAGE_MASK; 403 404 if (ticks - PCPU_GET(switchticks) >= hogticks) 405 uio_yield(); 406 error = vm_uiomove(&curproc->p_vmspace->vm_map, obj, 407 uio->uio_offset, cnt, 408 (vm_offset_t) iov->iov_base, &npagesmoved); 409 410 if (npagesmoved == 0) 411 break; 412 413 tcnt = npagesmoved * PAGE_SIZE; 414 cnt = tcnt; 415 416 if (error) 417 break; 418 419 iov->iov_base += cnt; 420 iov->iov_len -= cnt; 421 uio->uio_resid -= cnt; 422 uio->uio_offset += cnt; 423 *nread += cnt; 424 n -= cnt; 425 } else { 426 break; 427 } 428 } 429 return error; 430} 431#endif /* ENABLE_VFS_IOOPT */ 432 433/* 434 * Give next character to user as result of read. 435 */ 436int 437ureadc(c, uio) 438 register int c; 439 register struct uio *uio; 440{ 441 register struct iovec *iov; 442 443again: 444 if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) 445 panic("ureadc"); 446 iov = uio->uio_iov; 447 if (iov->iov_len == 0) { 448 uio->uio_iovcnt--; 449 uio->uio_iov++; 450 goto again; 451 } 452 switch (uio->uio_segflg) { 453 454 case UIO_USERSPACE: 455 if (subyte(iov->iov_base, c) < 0) 456 return (EFAULT); 457 break; 458 459 case UIO_SYSSPACE: 460 *iov->iov_base = c; 461 break; 462 463 case UIO_NOCOPY: 464 break; 465 } 466 iov->iov_base++; 467 iov->iov_len--; 468 uio->uio_resid--; 469 uio->uio_offset++; 470 return (0); 471} 472 473/* 474 * General routine to allocate a hash table. 475 */ 476void * 477hashinit(elements, type, hashmask) 478 int elements; 479 struct malloc_type *type; 480 u_long *hashmask; 481{ 482 long hashsize; 483 LIST_HEAD(generic, generic) *hashtbl; 484 int i; 485 486 if (elements <= 0) 487 panic("hashinit: bad elements"); 488 for (hashsize = 1; hashsize <= elements; hashsize <<= 1) 489 continue; 490 hashsize >>= 1; 491 hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 492 for (i = 0; i < hashsize; i++) 493 LIST_INIT(&hashtbl[i]); 494 *hashmask = hashsize - 1; 495 return (hashtbl); 496} 497 498void 499hashdestroy(vhashtbl, type, hashmask) 500 void *vhashtbl; 501 struct malloc_type *type; 502 u_long hashmask; 503{ 504 LIST_HEAD(generic, generic) *hashtbl, *hp; 505 506 hashtbl = vhashtbl; 507 for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) 508 if (!LIST_EMPTY(hp)) 509 panic("hashdestroy: hash not empty"); 510 free(hashtbl, type); 511} 512 513static int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, 514 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 515 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; 516#define NPRIMES (sizeof(primes) / sizeof(primes[0])) 517 518/* 519 * General routine to allocate a prime number sized hash table. 520 */ 521void * 522phashinit(elements, type, nentries) 523 int elements; 524 struct malloc_type *type; 525 u_long *nentries; 526{ 527 long hashsize; 528 LIST_HEAD(generic, generic) *hashtbl; 529 int i; 530 531 if (elements <= 0) 532 panic("phashinit: bad elements"); 533 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 534 i++; 535 if (i == NPRIMES) 536 break; 537 hashsize = primes[i]; 538 } 539 hashsize = primes[i - 1]; 540 hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 541 for (i = 0; i < hashsize; i++) 542 LIST_INIT(&hashtbl[i]); 543 *nentries = hashsize; 544 return (hashtbl); 545} 546 547void 548uio_yield() 549{ 550 struct thread *td; 551 552 td = curthread; 553 mtx_lock_spin(&sched_lock); 554 DROP_GIANT(); 555 td->td_priority = td->td_ksegrp->kg_user_pri; /* XXXKSE */ 556 td->td_proc->p_stats->p_ru.ru_nivcsw++; 557 mi_switch(); 558 mtx_unlock_spin(&sched_lock); 559 PICKUP_GIANT(); 560} 561 562int 563copyinfrom(const void *src, void *dst, size_t len, int seg) 564{ 565 int error = 0; 566 567 switch (seg) { 568 case UIO_USERSPACE: 569 error = copyin(src, dst, len); 570 break; 571 case UIO_SYSSPACE: 572 bcopy(src, dst, len); 573 break; 574 default: 575 panic("copyinfrom: bad seg %d\n", seg); 576 } 577 return (error); 578} 579 580int 581copyinstrfrom(const void *src, void *dst, size_t len, size_t *copied, int seg) 582{ 583 int error = 0; 584 585 switch (seg) { 586 case UIO_USERSPACE: 587 error = copyinstr(src, dst, len, copied); 588 break; 589 case UIO_SYSSPACE: 590 error = copystr(src, dst, len, copied); 591 break; 592 default: 593 panic("copyinstrfrom: bad seg %d\n", seg); 594 } 595 return (error); 596} 597