vm_machdep.c revision 195486
159769Sgrog/*-
259769Sgrog * Copyright (c) 1982, 1986 The Regents of the University of California.
324424Swosch * Copyright (c) 1989, 1990 William Jolitz
424424Swosch * Copyright (c) 1994 John Dyson
524424Swosch * All rights reserved.
624424Swosch *
724424Swosch * This code is derived from software contributed to Berkeley by
824424Swosch * the Systems Programming Group of the University of Utah Computer
924424Swosch * Science Department, and William Jolitz.
1024424Swosch *
1124424Swosch * Redistribution and use in source and binary forms, with or without
1224424Swosch * modification, are permitted provided that the following conditions
1324424Swosch * are met:
1424424Swosch * 1. Redistributions of source code must retain the above copyright
1542704Swosch *    notice, this list of conditions and the following disclaimer.
1642704Swosch * 2. Redistributions in binary form must reproduce the above copyright
1742704Swosch *    notice, this list of conditions and the following disclaimer in the
1824424Swosch *    documentation and/or other materials provided with the distribution.
1942704Swosch * 3. All advertising materials mentioning features or use of this software
2042704Swosch *    must display the following acknowledgement:
2142704Swosch *	This product includes software developed by the University of
2242704Swosch *	California, Berkeley and its contributors.
2342704Swosch * 4. Neither the name of the University nor the names of its contributors
2442704Swosch *    may be used to endorse or promote products derived from this software
2542704Swosch *    without specific prior written permission.
2642704Swosch *
2742704Swosch * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2842704Swosch * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2942704Swosch * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3059769Sgrog * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3159769Sgrog * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3259769Sgrog * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3359769Sgrog * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3459769Sgrog * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3559769Sgrog * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3659769Sgrog * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3759769Sgrog * SUCH DAMAGE.
3859769Sgrog *
3924424Swosch *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
4042704Swosch *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
4124424Swosch */
4242704Swosch
4324424Swosch#include <sys/cdefs.h>
4442704Swosch__FBSDID("$FreeBSD: head/sys/amd64/amd64/vm_machdep.c 195486 2009-07-09 09:34:11Z kib $");
4524424Swosch
4624424Swosch#include "opt_isa.h"
4724424Swosch#include "opt_cpu.h"
4842704Swosch#include "opt_compat.h"
4925031Swosch
5059156Swosch#include <sys/param.h>
5125031Swosch#include <sys/systm.h>
5225031Swosch#include <sys/bio.h>
5324424Swosch#include <sys/buf.h>
5424424Swosch#include <sys/kernel.h>
5524424Swosch#include <sys/ktr.h>
5624424Swosch#include <sys/lock.h>
5771231Sitojun#include <sys/malloc.h>
5824424Swosch#include <sys/mbuf.h>
5971231Sitojun#include <sys/mutex.h>
6025031Swosch#include <sys/pioctl.h>
6171231Sitojun#include <sys/proc.h>
6224424Swosch#include <sys/sf_buf.h>
6325031Swosch#include <sys/smp.h>
6425031Swosch#include <sys/sysctl.h>
6571231Sitojun#include <sys/sysent.h>
6625031Swosch#include <sys/unistd.h>
6771231Sitojun#include <sys/vnode.h>
6870110Swosch#include <sys/vmmeter.h>
6970110Swosch
7070110Swosch#include <machine/cpu.h>
7170110Swosch#include <machine/md_var.h>
7270110Swosch#include <machine/pcb.h>
7370110Swosch#include <machine/specialreg.h>
7470110Swosch#include <machine/tss.h>
7570110Swosch
7670110Swosch#include <vm/vm.h>
7770110Swosch#include <vm/vm_extern.h>
7870110Swosch#include <vm/vm_kern.h>
7980675Sasmodai#include <vm/vm_page.h>
8080675Sasmodai#include <vm/vm_map.h>
8180675Sasmodai#include <vm/vm_param.h>
8280675Sasmodai
8380675Sasmodai#include <amd64/isa/isa.h>
8480675Sasmodai
8580675Sasmodaistatic void	cpu_reset_real(void);
8680675Sasmodai#ifdef SMP
8780675Sasmodaistatic void	cpu_reset_proxy(void);
8880675Sasmodaistatic u_int	cpu_reset_proxyid;
8980675Sasmodaistatic volatile u_int	cpu_reset_proxy_active;
9080675Sasmodai#endif
9180675Sasmodai
9280675Sasmodai/*
9380675Sasmodai * Finish a fork operation, with process p2 nearly set up.
9480675Sasmodai * Copy and update the pcb, set up the stack so that the child
9580675Sasmodai * ready to run and return to user mode.
9680675Sasmodai */
9780675Sasmodaivoid
9880675Sasmodaicpu_fork(td1, p2, td2, flags)
9980675Sasmodai	register struct thread *td1;
10080675Sasmodai	register struct proc *p2;
10180675Sasmodai	struct thread *td2;
10280675Sasmodai	int flags;
10380675Sasmodai{
10480675Sasmodai	register struct proc *p1;
10580675Sasmodai	struct pcb *pcb2;
10680675Sasmodai	struct mdproc *mdp1, *mdp2;
10780675Sasmodai	struct proc_ldt *pldt;
10880675Sasmodai	pmap_t pmap2;
10980675Sasmodai
11080675Sasmodai	p1 = td1->td_proc;
11180675Sasmodai	if ((flags & RFPROC) == 0) {
11280675Sasmodai		if ((flags & RFMEM) == 0) {
11380675Sasmodai			/* unshare user LDT */
11480675Sasmodai			mdp1 = &p1->p_md;
11580675Sasmodai			mtx_lock(&dt_lock);
11680675Sasmodai			if ((pldt = mdp1->md_ldt) != NULL &&
11780675Sasmodai			    pldt->ldt_refcnt > 1 &&
11880675Sasmodai			    user_ldt_alloc(p1, 1) == NULL)
11980675Sasmodai				panic("could not copy LDT");
12080675Sasmodai			mtx_unlock(&dt_lock);
12180675Sasmodai		}
12280675Sasmodai		return;
12380675Sasmodai	}
12480675Sasmodai
12580675Sasmodai	/* Ensure that p1's pcb is up to date. */
12680675Sasmodai	fpuexit(td1);
12780675Sasmodai
12880675Sasmodai	/* Point the pcb to the top of the stack */
12980675Sasmodai	pcb2 = (struct pcb *)(td2->td_kstack +
13080675Sasmodai	    td2->td_kstack_pages * PAGE_SIZE) - 1;
13180675Sasmodai	td2->td_pcb = pcb2;
13280675Sasmodai
13380675Sasmodai	/* Copy p1's pcb */
13480675Sasmodai	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
13580675Sasmodai
13680675Sasmodai	/* Point mdproc and then copy over td1's contents */
13780675Sasmodai	mdp2 = &p2->p_md;
138101401Swosch	bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
13980675Sasmodai
140147593Shrs	/*
14187200Swosch	 * Create a new fresh stack for the new process.
142147593Shrs	 * Copy the trap frame for the return to user mode as if from a
14380675Sasmodai	 * syscall.  This copies most of the user mode register values.
144104772Smaxim	 */
145104772Smaxim	td2->td_frame = (struct trapframe *)td2->td_pcb - 1;
146104772Smaxim	bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
147104772Smaxim
148104781Sjhb	td2->td_frame->tf_rax = 0;		/* Child returns zero */
149104781Sjhb	td2->td_frame->tf_rflags &= ~PSL_C;	/* success */
150104781Sjhb	td2->td_frame->tf_rdx = 1;
151104781Sjhb
152104781Sjhb	/*
153104781Sjhb	 * If the parent process has the trap bit set (i.e. a debugger had
154119217Smurray	 * single stepped the process to the system call), we need to clear
155147593Shrs	 * the trap flag from the new frame unless the debugger had set PF_FORK
156147593Shrs	 * on the parent.  Otherwise, the child will receive a (likely
157119217Smurray	 * unexpected) SIGTRAP when it executes the first instruction after
158119217Smurray	 * returning  to userland.
159119217Smurray	 */
160119217Smurray	if ((p1->p_pfsflags & PF_FORK) == 0)
161132652Sosa		td2->td_frame->tf_rflags &= ~PSL_T;
162132652Sosa
163132652Sosa	/*
164132652Sosa	 * Set registers for trampoline to user mode.  Leave space for the
165132652Sosa	 * return address on stack.  These are the kernel mode register values.
166132652Sosa	 */
167132652Sosa	pmap2 = vmspace_pmap(p2->p_vmspace);
168132652Sosa	pcb2->pcb_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap2->pm_pml4);
169132652Sosa	pcb2->pcb_r12 = (register_t)fork_return;	/* fork_trampoline argument */
170132652Sosa	pcb2->pcb_rbp = 0;
171140831Smaxim	pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *);
172140831Smaxim	pcb2->pcb_rbx = (register_t)td2;		/* fork_trampoline argument */
173132652Sosa	pcb2->pcb_rip = (register_t)fork_trampoline;
174132652Sosa	/*-
175132652Sosa	 * pcb2->pcb_dr*:	cloned above.
176132652Sosa	 * pcb2->pcb_savefpu:	cloned above.
177137120Smaxim	 * pcb2->pcb_flags:	cloned above.
178132652Sosa	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
179132652Sosa	 * pcb2->pcb_[fg]sbase:	cloned above
180132652Sosa	 */
181132652Sosa
182132652Sosa	/* Setup to release spin count in fork_exit(). */
183132652Sosa	td2->td_md.md_spinlock_count = 1;
184132652Sosa	td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
185158916Smaxim
186158916Smaxim	/* As an i386, do not copy io permission bitmap. */
187158916Smaxim	pcb2->pcb_tssp = NULL;
188158916Smaxim
189158916Smaxim	/* New segment registers. */
190158916Smaxim	pcb2->pcb_full_iret = 1;
191158916Smaxim
192158916Smaxim	/* Copy the LDT, if necessary. */
193158916Smaxim	mdp1 = &td1->td_proc->p_md;
194158916Smaxim	mdp2 = &p2->p_md;
195158916Smaxim	mtx_lock(&dt_lock);
196158916Smaxim	if (mdp1->md_ldt != NULL) {
197158916Smaxim		if (flags & RFMEM) {
198158916Smaxim			mdp1->md_ldt->ldt_refcnt++;
199158916Smaxim			mdp2->md_ldt = mdp1->md_ldt;
200158916Smaxim			bcopy(&mdp1->md_ldt_sd, &mdp2->md_ldt_sd, sizeof(struct
201160877Smaxim			    system_segment_descriptor));
202160877Smaxim		} else {
203160877Smaxim			mdp2->md_ldt = NULL;
204160877Smaxim			mdp2->md_ldt = user_ldt_alloc(p2, 0);
205160877Smaxim			if (mdp2->md_ldt == NULL)
206163859Smaxim				panic("could not copy LDT");
207163859Smaxim			amd64_set_ldt_data(td2, 0, max_ldt_segment,
208163859Smaxim			    (struct user_segment_descriptor *)
209160663Smaxim			    mdp1->md_ldt->ldt_base);
210152003Smaxim		}
211152003Smaxim	} else
212132652Sosa		mdp2->md_ldt = NULL;
21324424Swosch	mtx_unlock(&dt_lock);
21424424Swosch
21524424Swosch	/*
21624424Swosch	 * Now, cpu_switch() can schedule the new process.
21769277Sasmodai	 * pcb_rsp is loaded pointing to the cpu_switch() stack frame
21869277Sasmodai	 * containing the return address when exiting cpu_switch.
21924424Swosch	 * This will normally be to fork_trampoline(), which will have
22025031Swosch	 * %ebx loaded with the new proc's pointer.  fork_trampoline()
22125031Swosch	 * will set up a stack to call fork_return(p, frame); to complete
22225031Swosch	 * the return to user-mode.
22380675Sasmodai	 */
224104782Sjhb}
225144864Smaxim
22625031Swosch/*
227104782Sjhb * Intercept the return address from a freshly forked process that has NOT
228104782Sjhb * been scheduled yet.
229104782Sjhb *
230104797Sjhb * This is needed to make kernel threads stay in kernel mode.
231104797Sjhb */
23225031Swoschvoid
23325031Swoschcpu_set_fork_handler(td, func, arg)
23425031Swosch	struct thread *td;
23545349Swosch	void (*func)(void *);
23645349Swosch	void *arg;
237104782Sjhb{
238104782Sjhb	/*
239104782Sjhb	 * Note that the trap frame follows the args, so the function
240104782Sjhb	 * is really called like this:  func(arg, frame);
24142704Swosch	 */
24225031Swosch	td->td_pcb->pcb_r12 = (long) func;	/* function */
24324424Swosch	td->td_pcb->pcb_rbx = (long) arg;	/* first arg */
24459769Sgrog}
24525031Swosch
24625031Swoschvoid
24725031Swoschcpu_exit(struct thread *td)
24825031Swosch{
24959769Sgrog
25025031Swosch	/*
25125031Swosch	 * If this process has a custom LDT, release it.
25225031Swosch	 */
25325031Swosch	mtx_lock(&dt_lock);
25424424Swosch	if (td->td_proc->p_md.md_ldt != 0)
25525031Swosch		user_ldt_free(td);
25625031Swosch	else
25725031Swosch		mtx_unlock(&dt_lock);
25825031Swosch}
25925031Swosch
26059769Sgrogvoid
26159769Sgrogcpu_thread_exit(struct thread *td)
26242704Swosch{
26342704Swosch	struct pcb *pcb;
26442704Swosch
26570110Swosch	if (td == PCPU_GET(fpcurthread))
26642704Swosch		fpudrop();
26742704Swosch
26825031Swosch	pcb = td->td_pcb;
26925031Swosch
27024424Swosch	/* Disable any hardware breakpoints. */
27125031Swosch	if (pcb->pcb_flags & PCB_DBREGS) {
27225031Swosch		reset_dbregs();
27325031Swosch		pcb->pcb_flags &= ~PCB_DBREGS;
27425031Swosch	}
27525031Swosch}
27625031Swosch
27725031Swoschvoid
27825031Swoschcpu_thread_clean(struct thread *td)
27925031Swosch{
28024424Swosch	struct pcb *pcb;
28125031Swosch
28225031Swosch	pcb = td->td_pcb;
28325031Swosch
28425031Swosch	/*
28525031Swosch	 * Clean TSS/iomap
28625031Swosch	 */
28725031Swosch	if (pcb->pcb_tssp != NULL) {
28825031Swosch		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_tssp,
28959156Swosch		    ctob(IOPAGES + 1));
29025031Swosch		pcb->pcb_tssp = NULL;
29125031Swosch	}
29225031Swosch}
29325031Swosch
29425031Swoschvoid
29525031Swoschcpu_thread_swapin(struct thread *td)
29689981Sjoe{
29725031Swosch}
29825031Swosch
29925031Swoschvoid
30024424Swoschcpu_thread_swapout(struct thread *td)
30125031Swosch{
30225031Swosch}
30389981Sjoe
30425031Swoschvoid
30589981Sjoecpu_thread_alloc(struct thread *td)
30689981Sjoe{
30725031Swosch
30889981Sjoe	td->td_pcb = (struct pcb *)(td->td_kstack +
30989981Sjoe	    td->td_kstack_pages * PAGE_SIZE) - 1;
31089981Sjoe	td->td_frame = (struct trapframe *)td->td_pcb - 1;
31170110Swosch}
31271231Sitojun
31370110Swoschvoid
31425031Swoschcpu_thread_free(struct thread *td)
31571231Sitojun{
31671231Sitojun
31769278Sasmodai	cpu_thread_clean(td);
31825031Swosch}
31971231Sitojun
32070110Swosch/*
32171231Sitojun * Initialize machine state (pcb and trap frame) for a new thread about to
32270110Swosch * upcall. Put enough state in the new thread's PCB to get it to go back
32370110Swosch * userret(), where we can intercept it again to set the return (upcall)
32471231Sitojun * Address and stack, along with those from upcals that are from other sources
32570110Swosch * such as those generated in thread_userret() itself.
32657000Swosch */
32725031Swoschvoid
32845349Swoschcpu_set_upcall(struct thread *td, struct thread *td0)
32978270Snik{
33071231Sitojun	struct pcb *pcb2;
33125031Swosch
332147051Smaxim	/* Point the pcb to the top of the stack. */
333147051Smaxim	pcb2 = td->td_pcb;
334147051Smaxim
335147051Smaxim	/*
33657000Swosch	 * Copy the upcall pcb.  This loads kernel regs.
33738440Sjkh	 * Those not loaded individually below get their default
338147055Smaxim	 * values here.
33970110Swosch	 */
34069278Sasmodai	bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
34170110Swosch	pcb2->pcb_flags &= ~PCB_FPUINITDONE;
34225031Swosch	pcb2->pcb_full_iret = 1;
34325031Swosch
34469278Sasmodai	/*
34545349Swosch	 * Create a new fresh stack for the new thread.
34670110Swosch	 */
34769278Sasmodai	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
34845349Swosch
34945349Swosch	/* If the current thread has the trap bit set (i.e. a debugger had
35069278Sasmodai	 * single stepped the process to the system call), we need to clear
35169278Sasmodai	 * the trap flag from the new frame. Otherwise, the new thread will
35280675Sasmodai	 * receive a (likely unexpected) SIGTRAP when it executes the first
35369278Sasmodai	 * instruction after returning to userland.
35470110Swosch	 */
35569278Sasmodai	td->td_frame->tf_rflags &= ~PSL_T;
35669278Sasmodai
35769278Sasmodai	/*
35857000Swosch	 * Set registers for trampoline to user mode.  Leave space for the
35945349Swosch	 * return address on stack.  These are the kernel mode register values.
36069277Sasmodai	 */
36145349Swosch	pcb2->pcb_r12 = (register_t)fork_return;	    /* trampoline arg */
36266542Sitojun	pcb2->pcb_rbp = 0;
36369277Sasmodai	pcb2->pcb_rsp = (register_t)td->td_frame - sizeof(void *);	/* trampoline arg */
36457000Swosch	pcb2->pcb_rbx = (register_t)td;			    /* trampoline arg */
36570110Swosch	pcb2->pcb_rip = (register_t)fork_trampoline;
36645349Swosch	/*
36757000Swosch	 * If we didn't copy the pcb, we'd need to do the following registers:
36869277Sasmodai	 * pcb2->pcb_cr3:	cloned above.
36970110Swosch	 * pcb2->pcb_dr*:	cloned above.
37070110Swosch	 * pcb2->pcb_savefpu:	cloned above.
37142589Swosch	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
37270110Swosch	 * pcb2->pcb_[fg]sbase: cloned above
37370110Swosch	 */
37446321Swosch
37545349Swosch	/* Setup to release spin count in fork_exit(). */
37645349Swosch	td->td_md.md_spinlock_count = 1;
37757000Swosch	td->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
37846318Swosch}
37970110Swosch
38056406Swosch/*
38155389Sbillf * Set that machine state for performing an upcall that has to
38255389Sbillf * be done in thread_userret() so that those upcalls generated
38357000Swosch * in thread_userret() itself can be done as well.
38455389Sbillf */
38555389Sbillfvoid
38655389Sbillfcpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
38770110Swosch	stack_t *stack)
38858448Swosch{
38958448Swosch
39065412Swosch	/*
39164612Salex	 * Do any extra cleaning that needs to be done.
39264612Salex	 * The thread may have optional components
39365411Swosch	 * that are not present in a fresh thread.
39465974Swosch	 * This may be a recycled thread so make it look
39569277Sasmodai	 * as though it's newly allocated.
39669277Sasmodai	 */
39770119Swosch	cpu_thread_clean(td);
39869277Sasmodai
39970111Swosch#ifdef COMPAT_IA32
40080675Sasmodai	if (td->td_proc->p_sysent->sv_flags & SV_ILP32) {
40175833Swosch		/*
40278867Sitojun	 	 * Set the trap frame to point at the beginning of the uts
40379603Sitojun		 * function.
40484087Swosch		 */
40583686Sobrien		td->td_frame->tf_rbp = 0;
406104797Sjhb		td->td_frame->tf_rsp =
40787201Swosch		   (((uintptr_t)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4;
40892013Swosch		td->td_frame->tf_rip = (uintptr_t)entry;
409104772Smaxim
410101331Swosch		/*
411101331Swosch		 * Pass the address of the mailbox for this kse to the uts
412104797Sjhb		 * function as a parameter on the stack.
413104781Sjhb		 */
414104781Sjhb		suword32((void *)(td->td_frame->tf_rsp + sizeof(int32_t)),
415104781Sjhb		    (uint32_t)(uintptr_t)arg);
416104659Smurray
417106406Smaxim		return;
418111949Swosch	}
419111949Swosch#endif
420111949Swosch
421113054Smurray	/*
422114211Swosch	 * Set the trap frame to point at the beginning of the uts
423114572Swosch	 * function.
424119217Smurray	 */
425119217Smurray	td->td_frame->tf_rbp = 0;
426121648Smurray	td->td_frame->tf_rsp =
427121648Smurray	    ((register_t)stack->ss_sp + stack->ss_size) & ~0x0f;
428126724Swosch	td->td_frame->tf_rsp -= 8;
429121787Swosch	td->td_frame->tf_rip = (register_t)entry;
430124462Smaxim	td->td_frame->tf_ds = _udatasel;
431126235Swosch	td->td_frame->tf_es = _udatasel;
432128863Smaxim	td->td_frame->tf_fs = _ufssel;
433128863Smaxim	td->td_frame->tf_gs = _ugssel;
434129793Shrs	td->td_frame->tf_flags = TF_HASSEGS;
435144864Smaxim
436137120Smaxim	/*
437137326Shrs	 * Pass the address of the mailbox for this kse to the uts
438138844Smaxim	 * function as a parameter on the stack.
439140831Smaxim	 */
440144864Smaxim	td->td_frame->tf_rdi = (register_t)arg;
441147593Shrs}
442146091Smaxim
443146071Smaximint
444146433Smaximcpu_set_user_tls(struct thread *td, void *tls_base)
445152004Smaxim{
446151926Smaxim
447152003Smaxim	if ((u_int64_t)tls_base >= VM_MAXUSER_ADDRESS)
448152004Smaxim		return (EINVAL);
449153699Smaxim
450154120Smaxim#ifdef COMPAT_IA32
451158234Smaxim	if (td->td_proc->p_sysent->sv_flags & SV_ILP32) {
452158956Smaxim		td->td_pcb->pcb_gsbase = (register_t)tls_base;
453158956Smaxim		return (0);
454160877Smaxim	}
455160663Smaxim#endif
456163859Smaxim	td->td_pcb->pcb_fsbase = (register_t)tls_base;
45724424Swosch	td->td_pcb->pcb_full_iret = 1;
45824424Swosch	return (0);
45924424Swosch}
46024424Swosch
46124424Swosch#ifdef SMP
46224424Swoschstatic void
46324424Swoschcpu_reset_proxy()
46424424Swosch{
46524424Swosch
46624424Swosch	cpu_reset_proxy_active = 1;
46724424Swosch	while (cpu_reset_proxy_active == 1)
46824424Swosch		;	/* Wait for other cpu to see that we've started */
46924424Swosch	stop_cpus((1<<cpu_reset_proxyid));
47024424Swosch	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
47124424Swosch	DELAY(1000000);
472134378Sosa	cpu_reset_real();
473134378Sosa}
474134378Sosa#endif
475134378Sosa
47625031Swoschvoid
47725031Swoschcpu_reset()
47824424Swosch{
47924424Swosch#ifdef SMP
48024424Swosch	u_int cnt, map;
48124424Swosch
48259769Sgrog	if (smp_active) {
483147051Smaxim		map = PCPU_GET(other_cpus) & ~stopped_cpus;
48424424Swosch		if (map != 0) {
48525031Swosch			printf("cpu_reset: Stopping other CPUs\n");
48625031Swosch			stop_cpus(map);
48725031Swosch		}
48825031Swosch
48959769Sgrog		if (PCPU_GET(cpuid) != 0) {
49025031Swosch			cpu_reset_proxyid = PCPU_GET(cpuid);
49131658Swosch			cpustop_restartfunc = cpu_reset_proxy;
49279514Sitojun			cpu_reset_proxy_active = 0;
49331658Swosch			printf("cpu_reset: Restarting BSP\n");
49459769Sgrog
49579514Sitojun			/* Restart CPU #0. */
49659769Sgrog			atomic_store_rel_int(&started_cpus, 1 << 0);
49765415Swosch
498104786Sjhb			cnt = 0;
49965415Swosch			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
50075834Swosch				cnt++;	/* Wait for BSP to announce restart */
501156214Smaxim			if (cpu_reset_proxy_active == 0)
50275834Swosch				printf("cpu_reset: Failed to restart BSP\n");
50375834Swosch			enable_intr();
50475834Swosch			cpu_reset_proxy_active = 2;
50525031Swosch
50625031Swosch			while (1);
50725031Swosch			/* NOTREACHED */
50859769Sgrog		}
50925031Swosch
51025031Swosch		DELAY(1000000);
51131658Swosch	}
51225031Swosch#endif
51324424Swosch	cpu_reset_real();
514126235Swosch	/* NOTREACHED */
515121787Swosch}
51642589Swosch
51750970Speterstatic void
518cpu_reset_real()
519{
520	struct region_descriptor null_idt;
521	int b;
522
523	disable_intr();
524
525	/*
526	 * Attempt to do a CPU reset via the keyboard controller,
527	 * do not turn off GateA20, as any machine that fails
528	 * to do the reset here would then end up in no man's land.
529	 */
530	outb(IO_KBD + 4, 0xFE);
531	DELAY(500000);	/* wait 0.5 sec to see if that did it */
532
533	/*
534	 * Attempt to force a reset via the Reset Control register at
535	 * I/O port 0xcf9.  Bit 2 forces a system reset when it
536	 * transitions from 0 to 1.  Bit 1 selects the type of reset
537	 * to attempt: 0 selects a "soft" reset, and 1 selects a
538	 * "hard" reset.  We try a "hard" reset.  The first write sets
539	 * bit 1 to select a "hard" reset and clears bit 2.  The
540	 * second write forces a 0 -> 1 transition in bit 2 to trigger
541	 * a reset.
542	 */
543	outb(0xcf9, 0x2);
544	outb(0xcf9, 0x6);
545	DELAY(500000);  /* wait 0.5 sec to see if that did it */
546
547	/*
548	 * Attempt to force a reset via the Fast A20 and Init register
549	 * at I/O port 0x92.  Bit 1 serves as an alternate A20 gate.
550	 * Bit 0 asserts INIT# when set to 1.  We are careful to only
551	 * preserve bit 1 while setting bit 0.  We also must clear bit
552	 * 0 before setting it if it isn't already clear.
553	 */
554	b = inb(0x92);
555	if (b != 0xff) {
556		if ((b & 0x1) != 0)
557			outb(0x92, b & 0xfe);
558		outb(0x92, b | 0x1);
559		DELAY(500000);  /* wait 0.5 sec to see if that did it */
560	}
561
562	printf("No known reset method worked, attempting CPU shutdown\n");
563	DELAY(1000000);	/* wait 1 sec for printf to complete */
564
565	/* Wipe the IDT. */
566	null_idt.rd_limit = 0;
567	null_idt.rd_base = 0;
568	lidt(&null_idt);
569
570	/* "good night, sweet prince .... <THUNK!>" */
571	breakpoint();
572
573	/* NOTREACHED */
574	while(1);
575}
576
577/*
578 * Allocate an sf_buf for the given vm_page.  On this machine, however, there
579 * is no sf_buf object.  Instead, an opaque pointer to the given vm_page is
580 * returned.
581 */
582struct sf_buf *
583sf_buf_alloc(struct vm_page *m, int pri)
584{
585
586	return ((struct sf_buf *)m);
587}
588
589/*
590 * Free the sf_buf.  In fact, do nothing because there are no resources
591 * associated with the sf_buf.
592 */
593void
594sf_buf_free(struct sf_buf *sf)
595{
596}
597
598/*
599 * Software interrupt handler for queued VM system processing.
600 */
601void
602swi_vm(void *dummy)
603{
604	if (busdma_swi_pending != 0)
605		busdma_swi();
606}
607
608/*
609 * Tell whether this address is in some physical memory region.
610 * Currently used by the kernel coredump code in order to avoid
611 * dumping the ``ISA memory hole'' which could cause indefinite hangs,
612 * or other unpredictable behaviour.
613 */
614
615int
616is_physical_memory(vm_paddr_t addr)
617{
618
619#ifdef DEV_ISA
620	/* The ISA ``memory hole''. */
621	if (addr >= 0xa0000 && addr < 0x100000)
622		return 0;
623#endif
624
625	/*
626	 * stuff other tests for known memory-mapped devices (PCI?)
627	 * here
628	 */
629
630	return 1;
631}
632