subr_hash.c revision 111977
1/* 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 39 * $FreeBSD: head/sys/kern/kern_subr.c 111977 2003-03-08 06:58:18Z ken $ 40 */ 41 42#include "opt_zero.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/kernel.h> 47#include <sys/ktr.h> 48#include <sys/lock.h> 49#include <sys/mutex.h> 50#include <sys/proc.h> 51#include <sys/malloc.h> 52#include <sys/resourcevar.h> 53#include <sys/sched.h> 54#include <sys/sysctl.h> 55#include <sys/vnode.h> 56 57#include <vm/vm.h> 58#include <vm/vm_page.h> 59#include <vm/vm_map.h> 60#ifdef ZERO_COPY_SOCKETS 61#include <vm/vm_param.h> 62#include <vm/vm_object.h> 63#endif 64 65SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, 66 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); 67 68#ifdef ZERO_COPY_SOCKETS 69/* Declared in uipc_socket.c */ 70extern int so_zero_copy_receive; 71 72static int 73vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, 74 vm_offset_t uaddr) 75{ 76 vm_map_t map = mapa; 77 vm_page_t kern_pg, user_pg; 78 vm_object_t uobject; 79 vm_map_entry_t entry; 80 vm_pindex_t upindex, kpindex; 81 vm_prot_t prot; 82 boolean_t wired; 83 84 /* 85 * First lookup the kernel page. 86 */ 87 kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); 88 89 if ((vm_map_lookup(&map, uaddr, 90 VM_PROT_WRITE, &entry, &uobject, 91 &upindex, &prot, &wired)) != KERN_SUCCESS) { 92 return(EFAULT); 93 } 94 if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { 95 do 96 vm_page_lock_queues(); 97 while (vm_page_sleep_if_busy(user_pg, 1, "vm_pgmoveco")); 98 vm_page_busy(user_pg); 99 pmap_remove_all(user_pg); 100 vm_page_free(user_pg); 101 } else 102 vm_page_lock_queues(); 103 if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || 104 (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { 105 printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " 106 "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, 107 kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, 108 kern_pg->hold_count, (u_long)kern_pg->phys_addr); 109 if ((kern_pg->queue - kern_pg->pc) == PQ_FREE) 110 panic("vm_pgmoveco: renaming free page"); 111 else 112 panic("vm_pgmoveco: renaming busy page"); 113 } 114 kpindex = kern_pg->pindex; 115 vm_page_busy(kern_pg); 116 vm_page_rename(kern_pg, uobject, upindex); 117 vm_page_flag_clear(kern_pg, PG_BUSY); 118 kern_pg->valid = VM_PAGE_BITS_ALL; 119 vm_page_unlock_queues(); 120 121 vm_map_lookup_done(map, entry); 122 return(KERN_SUCCESS); 123} 124#endif /* ZERO_COPY_SOCKETS */ 125 126int 127uiomove(void *cp, int n, struct uio *uio) 128{ 129 struct thread *td = curthread; 130 struct iovec *iov; 131 u_int cnt; 132 int error = 0; 133 int save = 0; 134 135 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 136 ("uiomove: mode")); 137 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 138 ("uiomove proc")); 139 140 if (td) { 141 mtx_lock_spin(&sched_lock); 142 save = td->td_flags & TDF_DEADLKTREAT; 143 td->td_flags |= TDF_DEADLKTREAT; 144 mtx_unlock_spin(&sched_lock); 145 } 146 147 while (n > 0 && uio->uio_resid) { 148 iov = uio->uio_iov; 149 cnt = iov->iov_len; 150 if (cnt == 0) { 151 uio->uio_iov++; 152 uio->uio_iovcnt--; 153 continue; 154 } 155 if (cnt > n) 156 cnt = n; 157 158 switch (uio->uio_segflg) { 159 160 case UIO_USERSPACE: 161 if (ticks - PCPU_GET(switchticks) >= hogticks) 162 uio_yield(); 163 if (uio->uio_rw == UIO_READ) 164 error = copyout(cp, iov->iov_base, cnt); 165 else 166 error = copyin(iov->iov_base, cp, cnt); 167 if (error) 168 goto out; 169 break; 170 171 case UIO_SYSSPACE: 172 if (uio->uio_rw == UIO_READ) 173 bcopy(cp, iov->iov_base, cnt); 174 else 175 bcopy(iov->iov_base, cp, cnt); 176 break; 177 case UIO_NOCOPY: 178 break; 179 } 180 iov->iov_base = (char *)iov->iov_base + cnt; 181 iov->iov_len -= cnt; 182 uio->uio_resid -= cnt; 183 uio->uio_offset += cnt; 184 cp = (char *)cp + cnt; 185 n -= cnt; 186 } 187out: 188 if (td != curthread) printf("uiomove: IT CHANGED!"); 189 td = curthread; /* Might things have changed in copyin/copyout? */ 190 if (td) { 191 mtx_lock_spin(&sched_lock); 192 td->td_flags = (td->td_flags & ~TDF_DEADLKTREAT) | save; 193 mtx_unlock_spin(&sched_lock); 194 } 195 return (error); 196} 197 198#ifdef ZERO_COPY_SOCKETS 199/* 200 * Experimental support for zero-copy I/O 201 */ 202static int 203userspaceco(void *cp, u_int cnt, struct uio *uio, struct vm_object *obj, 204 int disposable) 205{ 206 struct iovec *iov; 207 int error; 208 209 iov = uio->uio_iov; 210 if (uio->uio_rw == UIO_READ) { 211 if ((so_zero_copy_receive != 0) 212 && (obj != NULL) 213 && ((cnt & PAGE_MASK) == 0) 214 && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) 215 && ((uio->uio_offset & PAGE_MASK) == 0) 216 && ((((intptr_t) cp) & PAGE_MASK) == 0) 217 && (obj->type == OBJT_DEFAULT) 218 && (disposable != 0)) { 219 /* SOCKET: use page-trading */ 220 /* 221 * We only want to call vm_pgmoveco() on 222 * disposeable pages, since it gives the 223 * kernel page to the userland process. 224 */ 225 error = vm_pgmoveco(&curproc->p_vmspace->vm_map, 226 obj, (vm_offset_t)cp, 227 (vm_offset_t)iov->iov_base); 228 229 /* 230 * If we get an error back, attempt 231 * to use copyout() instead. The 232 * disposable page should be freed 233 * automatically if we weren't able to move 234 * it into userland. 235 */ 236 if (error != 0) 237 error = copyout(cp, iov->iov_base, cnt); 238 } else { 239 error = copyout(cp, iov->iov_base, cnt); 240 } 241 } else { 242 error = copyin(iov->iov_base, cp, cnt); 243 } 244 return (error); 245} 246 247int 248uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, 249 int disposable) 250{ 251 struct iovec *iov; 252 u_int cnt; 253 int error; 254 255 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 256 ("uiomoveco: mode")); 257 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 258 ("uiomoveco proc")); 259 260 while (n > 0 && uio->uio_resid) { 261 iov = uio->uio_iov; 262 cnt = iov->iov_len; 263 if (cnt == 0) { 264 uio->uio_iov++; 265 uio->uio_iovcnt--; 266 continue; 267 } 268 if (cnt > n) 269 cnt = n; 270 271 switch (uio->uio_segflg) { 272 273 case UIO_USERSPACE: 274 if (ticks - PCPU_GET(switchticks) >= hogticks) 275 uio_yield(); 276 277 error = userspaceco(cp, cnt, uio, obj, disposable); 278 279 if (error) 280 return (error); 281 break; 282 283 case UIO_SYSSPACE: 284 if (uio->uio_rw == UIO_READ) 285 bcopy(cp, iov->iov_base, cnt); 286 else 287 bcopy(iov->iov_base, cp, cnt); 288 break; 289 case UIO_NOCOPY: 290 break; 291 } 292 iov->iov_base = (char *)iov->iov_base + cnt; 293 iov->iov_len -= cnt; 294 uio->uio_resid -= cnt; 295 uio->uio_offset += cnt; 296 cp = (char *)cp + cnt; 297 n -= cnt; 298 } 299 return (0); 300} 301#endif /* ZERO_COPY_SOCKETS */ 302 303/* 304 * Give next character to user as result of read. 305 */ 306int 307ureadc(int c, struct uio *uio) 308{ 309 struct iovec *iov; 310 char *iov_base; 311 312again: 313 if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) 314 panic("ureadc"); 315 iov = uio->uio_iov; 316 if (iov->iov_len == 0) { 317 uio->uio_iovcnt--; 318 uio->uio_iov++; 319 goto again; 320 } 321 switch (uio->uio_segflg) { 322 323 case UIO_USERSPACE: 324 if (subyte(iov->iov_base, c) < 0) 325 return (EFAULT); 326 break; 327 328 case UIO_SYSSPACE: 329 iov_base = iov->iov_base; 330 *iov_base = c; 331 iov->iov_base = iov_base; 332 break; 333 334 case UIO_NOCOPY: 335 break; 336 } 337 iov->iov_base = (char *)iov->iov_base + 1; 338 iov->iov_len--; 339 uio->uio_resid--; 340 uio->uio_offset++; 341 return (0); 342} 343 344/* 345 * General routine to allocate a hash table. 346 */ 347void * 348hashinit(int elements, struct malloc_type *type, u_long *hashmask) 349{ 350 long hashsize; 351 LIST_HEAD(generic, generic) *hashtbl; 352 int i; 353 354 if (elements <= 0) 355 panic("hashinit: bad elements"); 356 for (hashsize = 1; hashsize <= elements; hashsize <<= 1) 357 continue; 358 hashsize >>= 1; 359 hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 360 for (i = 0; i < hashsize; i++) 361 LIST_INIT(&hashtbl[i]); 362 *hashmask = hashsize - 1; 363 return (hashtbl); 364} 365 366void 367hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) 368{ 369 LIST_HEAD(generic, generic) *hashtbl, *hp; 370 371 hashtbl = vhashtbl; 372 for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) 373 if (!LIST_EMPTY(hp)) 374 panic("hashdestroy: hash not empty"); 375 free(hashtbl, type); 376} 377 378static int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, 379 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 380 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; 381#define NPRIMES (sizeof(primes) / sizeof(primes[0])) 382 383/* 384 * General routine to allocate a prime number sized hash table. 385 */ 386void * 387phashinit(int elements, struct malloc_type *type, u_long *nentries) 388{ 389 long hashsize; 390 LIST_HEAD(generic, generic) *hashtbl; 391 int i; 392 393 if (elements <= 0) 394 panic("phashinit: bad elements"); 395 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 396 i++; 397 if (i == NPRIMES) 398 break; 399 hashsize = primes[i]; 400 } 401 hashsize = primes[i - 1]; 402 hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 403 for (i = 0; i < hashsize; i++) 404 LIST_INIT(&hashtbl[i]); 405 *nentries = hashsize; 406 return (hashtbl); 407} 408 409void 410uio_yield(void) 411{ 412 struct thread *td; 413 414 td = curthread; 415 mtx_lock_spin(&sched_lock); 416 DROP_GIANT(); 417 sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ 418 td->td_proc->p_stats->p_ru.ru_nivcsw++; 419 mi_switch(); 420 mtx_unlock_spin(&sched_lock); 421 PICKUP_GIANT(); 422} 423 424int 425copyinfrom(const void *src, void *dst, size_t len, int seg) 426{ 427 int error = 0; 428 429 switch (seg) { 430 case UIO_USERSPACE: 431 error = copyin(src, dst, len); 432 break; 433 case UIO_SYSSPACE: 434 bcopy(src, dst, len); 435 break; 436 default: 437 panic("copyinfrom: bad seg %d\n", seg); 438 } 439 return (error); 440} 441 442int 443copyinstrfrom(const void *src, void *dst, size_t len, size_t *copied, int seg) 444{ 445 int error = 0; 446 447 switch (seg) { 448 case UIO_USERSPACE: 449 error = copyinstr(src, dst, len, copied); 450 break; 451 case UIO_SYSSPACE: 452 error = copystr(src, dst, len, copied); 453 break; 454 default: 455 panic("copyinstrfrom: bad seg %d\n", seg); 456 } 457 return (error); 458} 459