1/*- 2 * Copyright (c) 1982, 1986 The Regents of the University of California. 3 * Copyright (c) 1989, 1990 William Jolitz 4 * Copyright (c) 1994 John Dyson 5 * Copyright (c) 2001 Jake Burkholder. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * the Systems Programming Group of the University of Utah Computer 10 * Science Department, and William Jolitz. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 37 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ 38 * from: FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.167 2001/07/12 39 */ 40 41#include <sys/cdefs.h> 42__FBSDID("$FreeBSD$"); 43 44#include "opt_pmap.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/bio.h> 49#include <sys/buf.h> 50#include <sys/kernel.h> 51#include <sys/malloc.h> 52#include <sys/mbuf.h> 53#include <sys/mutex.h> 54#include <sys/proc.h> 55#include <sys/sysent.h> 56#include <sys/sf_buf.h> 57#include <sys/sched.h> 58#include <sys/sysctl.h> 59#include <sys/unistd.h> 60#include <sys/vmmeter.h> 61 62#include <dev/ofw/openfirm.h> 63 64#include <vm/vm.h> 65#include <vm/vm_extern.h> 66#include <vm/pmap.h> 67#include <vm/vm_kern.h> 68#include <vm/vm_map.h> 69#include <vm/vm_page.h> 70#include <vm/vm_pageout.h> 71#include <vm/vm_param.h> 72#include <vm/uma.h> 73#include <vm/uma_int.h> 74 75#include <machine/cache.h> 76#include <machine/cpu.h> 77#include <machine/fp.h> 78#include <machine/frame.h> 79#include <machine/fsr.h> 80#include <machine/md_var.h> 81#include <machine/ofw_machdep.h> 82#include <machine/ofw_mem.h> 83#include <machine/pcb.h> 84#include <machine/tlb.h> 85#include <machine/tstate.h> 86 87#ifndef NSFBUFS 88#define NSFBUFS (512 + maxusers * 16) 89#endif 90 91static void sf_buf_init(void *arg); 92SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL); 93 94/* 95 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 96 * sf_freelist head with the sf_lock mutex. 97 */ 98static struct { 99 SLIST_HEAD(, sf_buf) sf_head; 100 struct mtx sf_lock; 101} sf_freelist; 102 103static u_int sf_buf_alloc_want; 104 105PMAP_STATS_VAR(uma_nsmall_alloc); 106PMAP_STATS_VAR(uma_nsmall_alloc_oc); 107PMAP_STATS_VAR(uma_nsmall_free); 108 109void 110cpu_exit(struct thread *td) 111{ 112 struct proc *p; 113 114 p = td->td_proc; 115 p->p_md.md_sigtramp = NULL; 116 if (p->p_md.md_utrap != NULL) { 117 utrap_free(p->p_md.md_utrap); 118 p->p_md.md_utrap = NULL; 119 } 120} 121 122void 123cpu_thread_exit(struct thread *td) 124{ 125 126} 127 128void 129cpu_thread_clean(struct thread *td) 130{ 131 132} 133 134void 135cpu_thread_alloc(struct thread *td) 136{ 137 struct pcb *pcb; 138 139 pcb = (struct pcb *)((td->td_kstack + td->td_kstack_pages * PAGE_SIZE - 140 sizeof(struct pcb)) & ~0x3fUL); 141 pcb->pcb_nsaved = 0; 142 td->td_frame = (struct trapframe *)pcb - 1; 143 td->td_pcb = pcb; 144} 145 146void 147cpu_thread_free(struct thread *td) 148{ 149 150} 151 152void 153cpu_thread_swapin(struct thread *td) 154{ 155 156} 157 158void 159cpu_thread_swapout(struct thread *td) 160{ 161 162} 163 164void 165cpu_set_syscall_retval(struct thread *td, int error) 166{ 167 168 switch (error) { 169 case 0: 170 td->td_frame->tf_out[0] = td->td_retval[0]; 171 td->td_frame->tf_out[1] = td->td_retval[1]; 172 td->td_frame->tf_tstate &= ~TSTATE_XCC_C; 173 break; 174 175 case ERESTART: 176 /* 177 * Undo the tpc advancement we have done on syscall 178 * enter, we want to reexecute the system call. 179 */ 180 td->td_frame->tf_tpc = td->td_pcb->pcb_tpc; 181 td->td_frame->tf_tnpc -= 4; 182 break; 183 184 case EJUSTRETURN: 185 break; 186 187 default: 188 if (td->td_proc->p_sysent->sv_errsize) { 189 if (error >= td->td_proc->p_sysent->sv_errsize) 190 error = -1; /* XXX */ 191 else 192 error = td->td_proc->p_sysent->sv_errtbl[error]; 193 } 194 td->td_frame->tf_out[0] = error; 195 td->td_frame->tf_tstate |= TSTATE_XCC_C; 196 break; 197 } 198} 199 200void 201cpu_set_upcall(struct thread *td, struct thread *td0) 202{ 203 struct trapframe *tf; 204 struct frame *fr; 205 struct pcb *pcb; 206 207 bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); 208 209 pcb = td->td_pcb; 210 tf = td->td_frame; 211 fr = (struct frame *)tf - 1; 212 fr->fr_local[0] = (u_long)fork_return; 213 fr->fr_local[1] = (u_long)td; 214 fr->fr_local[2] = (u_long)tf; 215 pcb->pcb_pc = (u_long)fork_trampoline - 8; 216 pcb->pcb_sp = (u_long)fr - SPOFF; 217 218 /* Setup to release the spin count in fork_exit(). */ 219 td->td_md.md_spinlock_count = 1; 220 td->td_md.md_saved_pil = 0; 221} 222 223void 224cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg, 225 stack_t *stack) 226{ 227 struct trapframe *tf; 228 uint64_t sp; 229 230 if (td == curthread) 231 flushw(); 232 tf = td->td_frame; 233 sp = (uint64_t)stack->ss_sp + stack->ss_size; 234 tf->tf_out[0] = (uint64_t)arg; 235 tf->tf_out[6] = sp - SPOFF - sizeof(struct frame); 236 tf->tf_tpc = (uint64_t)entry; 237 tf->tf_tnpc = tf->tf_tpc + 4; 238 239 td->td_retval[0] = tf->tf_out[0]; 240 td->td_retval[1] = tf->tf_out[1]; 241} 242 243int 244cpu_set_user_tls(struct thread *td, void *tls_base) 245{ 246 247 if (td == curthread) 248 flushw(); 249 td->td_frame->tf_global[7] = (uint64_t)tls_base; 250 return (0); 251} 252 253/* 254 * Finish a fork operation, with process p2 nearly set up. 255 * Copy and update the pcb, set up the stack so that the child 256 * ready to run and return to user mode. 257 */ 258void 259cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) 260{ 261 struct trapframe *tf; 262 struct frame *fp; 263 struct pcb *pcb1; 264 struct pcb *pcb2; 265 vm_offset_t sp; 266 int error; 267 int i; 268 269 KASSERT(td1 == curthread || td1 == &thread0, 270 ("cpu_fork: p1 not curproc and not proc0")); 271 272 if ((flags & RFPROC) == 0) 273 return; 274 275 p2->p_md.md_sigtramp = td1->td_proc->p_md.md_sigtramp; 276 p2->p_md.md_utrap = utrap_hold(td1->td_proc->p_md.md_utrap); 277 278 /* The pcb must be aligned on a 64-byte boundary. */ 279 pcb1 = td1->td_pcb; 280 pcb2 = (struct pcb *)((td2->td_kstack + td2->td_kstack_pages * 281 PAGE_SIZE - sizeof(struct pcb)) & ~0x3fUL); 282 td2->td_pcb = pcb2; 283 284 /* 285 * Ensure that p1's pcb is up to date. 286 */ 287 critical_enter(); 288 if ((td1->td_frame->tf_fprs & FPRS_FEF) != 0) 289 savefpctx(pcb1->pcb_ufp); 290 critical_exit(); 291 /* Make sure the copied windows are spilled. */ 292 flushw(); 293 /* Copy the pcb (this will copy the windows saved in the pcb, too). */ 294 bcopy(pcb1, pcb2, sizeof(*pcb1)); 295 296 /* 297 * If we're creating a new user process and we're sharing the address 298 * space, the parent's top most frame must be saved in the pcb. The 299 * child will pop the frame when it returns to user mode, and may 300 * overwrite it with its own data causing much suffering for the 301 * parent. We check if its already in the pcb, and if not copy it 302 * in. Its unlikely that the copyin will fail, but if so there's not 303 * much we can do. The parent will likely crash soon anyway in that 304 * case. 305 */ 306 if ((flags & RFMEM) != 0 && td1 != &thread0) { 307 sp = td1->td_frame->tf_sp; 308 for (i = 0; i < pcb1->pcb_nsaved; i++) { 309 if (pcb1->pcb_rwsp[i] == sp) 310 break; 311 } 312 if (i == pcb1->pcb_nsaved) { 313 error = copyin((caddr_t)sp + SPOFF, &pcb1->pcb_rw[i], 314 sizeof(struct rwindow)); 315 if (error == 0) { 316 pcb1->pcb_rwsp[i] = sp; 317 pcb1->pcb_nsaved++; 318 } 319 } 320 } 321 322 /* 323 * Create a new fresh stack for the new process. 324 * Copy the trap frame for the return to user mode as if from a 325 * syscall. This copies most of the user mode register values. 326 */ 327 tf = (struct trapframe *)pcb2 - 1; 328 bcopy(td1->td_frame, tf, sizeof(*tf)); 329 330 tf->tf_out[0] = 0; /* Child returns zero */ 331 tf->tf_out[1] = 0; 332 tf->tf_tstate &= ~TSTATE_XCC_C; /* success */ 333 tf->tf_fprs = 0; 334 335 td2->td_frame = tf; 336 fp = (struct frame *)tf - 1; 337 fp->fr_local[0] = (u_long)fork_return; 338 fp->fr_local[1] = (u_long)td2; 339 fp->fr_local[2] = (u_long)tf; 340 /* Terminate stack traces at this frame. */ 341 fp->fr_pc = fp->fr_fp = 0; 342 pcb2->pcb_sp = (u_long)fp - SPOFF; 343 pcb2->pcb_pc = (u_long)fork_trampoline - 8; 344 345 /* Setup to release the spin count in fork_exit(). */ 346 td2->td_md.md_spinlock_count = 1; 347 td2->td_md.md_saved_pil = 0; 348 349 /* 350 * Now, cpu_switch() can schedule the new process. 351 */ 352} 353 354void 355cpu_reset(void) 356{ 357 static char bspec[64] = ""; 358 phandle_t chosen; 359 static struct { 360 cell_t name; 361 cell_t nargs; 362 cell_t nreturns; 363 cell_t bootspec; 364 } args = { 365 (cell_t)"boot", 366 1, 367 0, 368 (cell_t)bspec 369 }; 370 371 if ((chosen = OF_finddevice("/chosen")) != 0) { 372 if (OF_getprop(chosen, "bootpath", bspec, sizeof(bspec)) == -1) 373 bspec[0] = '\0'; 374 bspec[sizeof(bspec) - 1] = '\0'; 375 } 376 377 cpu_shutdown(&args); 378} 379 380/* 381 * Intercept the return address from a freshly forked process that has NOT 382 * been scheduled yet. 383 * 384 * This is needed to make kernel threads stay in kernel mode. 385 */ 386void 387cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg) 388{ 389 struct frame *fp; 390 struct pcb *pcb; 391 392 pcb = td->td_pcb; 393 fp = (struct frame *)(pcb->pcb_sp + SPOFF); 394 fp->fr_local[0] = (u_long)func; 395 fp->fr_local[1] = (u_long)arg; 396} 397 398int 399is_physical_memory(vm_paddr_t addr) 400{ 401 struct ofw_mem_region *mr; 402 403 for (mr = sparc64_memreg; mr < sparc64_memreg + sparc64_nmemreg; mr++) 404 if (addr >= mr->mr_start && addr < mr->mr_start + mr->mr_size) 405 return (1); 406 return (0); 407} 408 409/* 410 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 411 */ 412static void 413sf_buf_init(void *arg) 414{ 415 struct sf_buf *sf_bufs; 416 vm_offset_t sf_base; 417 int i; 418 419 nsfbufs = NSFBUFS; 420 TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); 421 422 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); 423 SLIST_INIT(&sf_freelist.sf_head); 424 sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); 425 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 426 M_NOWAIT | M_ZERO); 427 for (i = 0; i < nsfbufs; i++) { 428 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 429 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 430 } 431 sf_buf_alloc_want = 0; 432} 433 434/* 435 * Get an sf_buf from the freelist. Will block if none are available. 436 */ 437struct sf_buf * 438sf_buf_alloc(struct vm_page *m, int flags) 439{ 440 struct sf_buf *sf; 441 int error; 442 443 mtx_lock(&sf_freelist.sf_lock); 444 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 445 if (flags & SFB_NOWAIT) 446 break; 447 sf_buf_alloc_want++; 448 mbstat.sf_allocwait++; 449 error = msleep(&sf_freelist, &sf_freelist.sf_lock, 450 (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0); 451 sf_buf_alloc_want--; 452 453 /* 454 * If we got a signal, don't risk going back to sleep. 455 */ 456 if (error) 457 break; 458 } 459 if (sf != NULL) { 460 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 461 sf->m = m; 462 nsfbufsused++; 463 nsfbufspeak = imax(nsfbufspeak, nsfbufsused); 464 pmap_qenter(sf->kva, &sf->m, 1); 465 } 466 mtx_unlock(&sf_freelist.sf_lock); 467 return (sf); 468} 469 470/* 471 * Release resources back to the system. 472 */ 473void 474sf_buf_free(struct sf_buf *sf) 475{ 476 477 pmap_qremove(sf->kva, 1); 478 mtx_lock(&sf_freelist.sf_lock); 479 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 480 nsfbufsused--; 481 if (sf_buf_alloc_want > 0) 482 wakeup(&sf_freelist); 483 mtx_unlock(&sf_freelist.sf_lock); 484} 485 486void 487swi_vm(void *v) 488{ 489 490 /* Nothing to do here - busdma bounce buffers are not implemented. */ 491} 492 493void * 494uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 495{ 496 static vm_pindex_t color; 497 vm_paddr_t pa; 498 vm_page_t m; 499 int pflags; 500 void *va; 501 502 PMAP_STATS_INC(uma_nsmall_alloc); 503 504 *flags = UMA_SLAB_PRIV; 505 506 if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT) 507 pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED; 508 else 509 pflags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED; 510 511 if (wait & M_ZERO) 512 pflags |= VM_ALLOC_ZERO; 513 514 for (;;) { 515 m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ); 516 if (m == NULL) { 517 if (wait & M_NOWAIT) 518 return (NULL); 519 else 520 VM_WAIT; 521 } else 522 break; 523 } 524 525 pa = VM_PAGE_TO_PHYS(m); 526 if (dcache_color_ignore == 0 && m->md.color != DCACHE_COLOR(pa)) { 527 KASSERT(m->md.colors[0] == 0 && m->md.colors[1] == 0, 528 ("uma_small_alloc: free page %p still has mappings!", m)); 529 PMAP_STATS_INC(uma_nsmall_alloc_oc); 530 m->md.color = DCACHE_COLOR(pa); 531 dcache_page_inval(pa); 532 } 533 va = (void *)TLB_PHYS_TO_DIRECT(pa); 534 if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) 535 cpu_block_zero(va, PAGE_SIZE); 536 return (va); 537} 538 539void 540uma_small_free(void *mem, int size, u_int8_t flags) 541{ 542 vm_page_t m; 543 544 PMAP_STATS_INC(uma_nsmall_free); 545 m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS((vm_offset_t)mem)); 546 m->wire_count--; 547 vm_page_free(m); 548 atomic_subtract_int(&cnt.v_wire_count, 1); 549} 550