cpu_switch.S revision 177535
1/*- 2 * Copyright (c) 2003 Peter Wemm. 3 * Copyright (c) 1990 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $FreeBSD: head/sys/amd64/amd64/cpu_switch.S 177535 2008-03-23 23:09:06Z peter $ 34 */ 35 36#include <machine/asmacros.h> 37#include <machine/specialreg.h> 38 39#include "assym.s" 40#include "opt_sched.h" 41 42/*****************************************************************************/ 43/* Scheduling */ 44/*****************************************************************************/ 45 46 .text 47 48#ifdef SMP 49#define LK lock ; 50#else 51#define LK 52#endif 53 54#if defined(SCHED_ULE) && defined(SMP) 55#define SETLK xchgq 56#else 57#define SETLK movq 58#endif 59 60/* 61 * cpu_throw() 62 * 63 * This is the second half of cpu_switch(). It is used when the current 64 * thread is either a dummy or slated to die, and we no longer care 65 * about its state. This is only a slight optimization and is probably 66 * not worth it anymore. Note that we need to clear the pm_active bits so 67 * we do need the old proc if it still exists. 68 * %rdi = oldtd 69 * %rsi = newtd 70 */ 71ENTRY(cpu_throw) 72 testq %rdi,%rdi 73 jnz 1f 74 movq PCPU(IDLETHREAD),%rdi 751: 76 movq TD_PCB(%rdi),%r8 /* Old pcb */ 77 movl PCPU(CPUID), %eax 78 movq PCB_FSBASE(%r8),%r9 79 movq PCB_GSBASE(%r8),%r10 80 /* release bit from old pm_active */ 81 movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */ 82 movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */ 83 LK btrl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ 84 movq TD_PCB(%rsi),%r8 /* newtd->td_proc */ 85 movq PCB_CR3(%r8),%rdx 86 movq %rdx,%cr3 /* new address space */ 87 jmp swact 88END(cpu_throw) 89 90/* 91 * cpu_switch(old, new, mtx) 92 * 93 * Save the current thread state, then select the next thread to run 94 * and load its state. 95 * %rdi = oldtd 96 * %rsi = newtd 97 * %rdx = mtx 98 */ 99ENTRY(cpu_switch) 100 /* Switch to new thread. First, save context. */ 101 movq TD_PCB(%rdi),%r8 102 103 movq (%rsp),%rax /* Hardware registers */ 104 movq %r15,PCB_R15(%r8) 105 movq %r14,PCB_R14(%r8) 106 movq %r13,PCB_R13(%r8) 107 movq %r12,PCB_R12(%r8) 108 movq %rbp,PCB_RBP(%r8) 109 movq %rsp,PCB_RSP(%r8) 110 movq %rbx,PCB_RBX(%r8) 111 movq %rax,PCB_RIP(%r8) 112 movq PCB_FSBASE(%r8),%r9 113 movq PCB_GSBASE(%r8),%r10 114 115 testl $PCB_32BIT,PCB_FLAGS(%r8) 116 jnz store_gs /* static predict not taken */ 117done_store_gs: 118 119 testl $PCB_DBREGS,PCB_FLAGS(%r8) 120 jnz store_dr /* static predict not taken */ 121done_store_dr: 122 123 /* have we used fp, and need a save? */ 124 cmpq %rdi,PCPU(FPCURTHREAD) 125 jne 1f 126 addq $PCB_SAVEFPU,%r8 127 clts 128 fxsave (%r8) 129 smsw %ax 130 orb $CR0_TS,%al 131 lmsw %ax 132 xorl %eax,%eax 133 movq %rax,PCPU(FPCURTHREAD) 1341: 135 136 /* Save is done. Now fire up new thread. Leave old vmspace. */ 137 movq TD_PCB(%rsi),%r8 138 139 /* switch address space */ 140 movq PCB_CR3(%r8),%rcx 141 movq %cr3,%rax 142 cmpq %rcx,%rax /* Same address space? */ 143 jne swinact 144 SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ 145 jmp sw1 146swinact: 147 movq %rcx,%cr3 /* new address space */ 148 movl PCPU(CPUID), %eax 149 /* Release bit from old pmap->pm_active */ 150 movq TD_PROC(%rdi), %rcx /* oldproc */ 151 movq P_VMSPACE(%rcx), %rcx 152 LK btrl %eax, VM_PMAP+PM_ACTIVE(%rcx) /* clear old */ 153 SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ 154swact: 155 /* Set bit in new pmap->pm_active */ 156 movq TD_PROC(%rsi),%rdx /* newproc */ 157 movq P_VMSPACE(%rdx), %rdx 158 LK btsl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ 159 160sw1: 161#if defined(SCHED_ULE) && defined(SMP) 162 /* Wait for the new thread to become unblocked */ 163 movq $blocked_lock, %rdx 1641: 165 movq TD_LOCK(%rsi),%rcx 166 cmpq %rcx, %rdx 167 pause 168 je 1b 169#endif 170 /* 171 * At this point, we've switched address spaces and are ready 172 * to load up the rest of the next context. 173 */ 174 175 /* Skip loading user fsbase/gsbase for kthreads */ 176 testl $TDP_KTHREAD,TD_PFLAGS(%rsi) 177 jnz do_kthread 178 179 cmpq PCB_FSBASE(%r8),%r9 180 jz 1f 181 /* Restore userland %fs */ 182 movl $MSR_FSBASE,%ecx 183 movl PCB_FSBASE(%r8),%eax 184 movl PCB_FSBASE+4(%r8),%edx 185 wrmsr 1861: 187 188 cmpq PCB_GSBASE(%r8),%r10 189 jz 2f 190 /* Restore userland %gs */ 191 movl $MSR_KGSBASE,%ecx 192 movl PCB_GSBASE(%r8),%eax 193 movl PCB_GSBASE+4(%r8),%edx 194 wrmsr 1952: 196do_tss: 197 198 /* Update the TSS_RSP0 pointer for the next interrupt */ 199 movq PCPU(TSSP), %rax 200 movq %r8, PCPU(RSP0) 201 movq %r8, PCPU(CURPCB) 202 addq $COMMON_TSS_RSP0, %rax 203 movq %rsi, PCPU(CURTHREAD) /* into next thread */ 204 movq %r8, (%rax) 205 206 /* Test if debug registers should be restored. */ 207 testl $PCB_DBREGS,PCB_FLAGS(%r8) 208 jnz load_dr /* static predict not taken */ 209done_load_dr: 210 211 testl $PCB_32BIT,PCB_FLAGS(%r8) 212 jnz load_gs /* static predict not taken */ 213done_load_gs: 214 215 /* Restore context. */ 216 movq PCB_R15(%r8),%r15 217 movq PCB_R14(%r8),%r14 218 movq PCB_R13(%r8),%r13 219 movq PCB_R12(%r8),%r12 220 movq PCB_RBP(%r8),%rbp 221 movq PCB_RSP(%r8),%rsp 222 movq PCB_RBX(%r8),%rbx 223 movq PCB_RIP(%r8),%rax 224 movq %rax,(%rsp) 225 ret 226 227 /* 228 * We order these strangely for several reasons. 229 * 1: I wanted to use static branch prediction hints 230 * 2: Most athlon64/opteron cpus don't have them. They define 231 * a forward branch as 'predict not taken'. Intel cores have 232 * the 'rep' prefix to invert this. 233 * So, to make it work on both forms of cpu we do the detour. 234 * We use jumps rather than call in order to avoid the stack. 235 */ 236 237do_kthread: 238 /* 239 * Copy old fs/gsbase to new kthread pcb for future switches 240 * This maintains curpcb->pcb_[fg]sbase as caches of the MSR 241 */ 242 movq %r9,PCB_FSBASE(%r8) 243 movq %r10,PCB_GSBASE(%r8) 244 jmp do_tss 245 246store_gs: 247 movl %gs,PCB_GS(%r8) 248 movq PCB_GS32P(%r8),%rax 249 movq (%rax),%rax 250 movq %rax,PCB_GS32SD(%r8) 251 jmp done_store_gs 252 253load_gs: 254 /* Restore userland %gs while preserving kernel gsbase */ 255 movq PCB_GS32P(%r8),%rax 256 movq PCB_GS32SD(%r8),%rcx 257 movq %rcx,(%rax) 258 movl $MSR_GSBASE,%ecx 259 rdmsr 260 movl PCB_GS(%r8),%gs 261 wrmsr 262 jmp done_load_gs 263 264store_dr: 265 movq %dr7,%rax /* yes, do the save */ 266 movq %dr0,%r15 267 movq %dr1,%r14 268 movq %dr2,%r13 269 movq %dr3,%r12 270 movq %dr6,%r11 271 andq $0x0000fc00, %rax /* disable all watchpoints */ 272 movq %r15,PCB_DR0(%r8) 273 movq %r14,PCB_DR1(%r8) 274 movq %r13,PCB_DR2(%r8) 275 movq %r12,PCB_DR3(%r8) 276 movq %r11,PCB_DR6(%r8) 277 movq %rax,PCB_DR7(%r8) 278 movq %rax,%dr7 279 jmp done_store_dr 280 281load_dr: 282 movq %dr7,%rax 283 movq PCB_DR0(%r8),%r15 284 movq PCB_DR1(%r8),%r14 285 movq PCB_DR2(%r8),%r13 286 movq PCB_DR3(%r8),%r12 287 movq PCB_DR6(%r8),%r11 288 movq PCB_DR7(%r8),%rcx 289 movq %r15,%dr0 290 movq %r14,%dr1 291 /* Preserve reserved bits in %dr7 */ 292 andq $0x0000fc00,%rax 293 andq $~0x0000fc00,%rcx 294 movq %r13,%dr2 295 movq %r12,%dr3 296 orq %rcx,%rax 297 movq %r11,%dr6 298 movq %rax,%dr7 299 jmp done_load_dr 300 301END(cpu_switch) 302 303/* 304 * savectx(pcb) 305 * Update pcb, saving current processor state. 306 */ 307ENTRY(savectx) 308 /* Fetch PCB. */ 309 movq %rdi,%rcx 310 311 /* Save caller's return address. */ 312 movq (%rsp),%rax 313 movq %rax,PCB_RIP(%rcx) 314 315 movq %cr3,%rax 316 movq %rax,PCB_CR3(%rcx) 317 318 movq %rbx,PCB_RBX(%rcx) 319 movq %rsp,PCB_RSP(%rcx) 320 movq %rbp,PCB_RBP(%rcx) 321 movq %r12,PCB_R12(%rcx) 322 movq %r13,PCB_R13(%rcx) 323 movq %r14,PCB_R14(%rcx) 324 movq %r15,PCB_R15(%rcx) 325 326 /* 327 * If fpcurthread == NULL, then the fpu h/w state is irrelevant and the 328 * state had better already be in the pcb. This is true for forks 329 * but not for dumps (the old book-keeping with FP flags in the pcb 330 * always lost for dumps because the dump pcb has 0 flags). 331 * 332 * If fpcurthread != NULL, then we have to save the fpu h/w state to 333 * fpcurthread's pcb and copy it to the requested pcb, or save to the 334 * requested pcb and reload. Copying is easier because we would 335 * have to handle h/w bugs for reloading. We used to lose the 336 * parent's fpu state for forks by forgetting to reload. 337 */ 338 pushfq 339 cli 340 movq PCPU(FPCURTHREAD),%rax 341 testq %rax,%rax 342 je 1f 343 344 movq TD_PCB(%rax),%rdi 345 leaq PCB_SAVEFPU(%rdi),%rdi 346 clts 347 fxsave (%rdi) 348 smsw %ax 349 orb $CR0_TS,%al 350 lmsw %ax 351 352 movq $PCB_SAVEFPU_SIZE,%rdx /* arg 3 */ 353 leaq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */ 354 /* arg 1 (%rdi) already loaded */ 355 call bcopy 3561: 357 popfq 358 359 ret 360END(savectx) 361