1/* 2 * Intel SMP support routines. 3 * 4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> 5 * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> 6 * 7 * This code is released under the GNU General Public License version 2 or 8 * later. 9 */ 10 11#include <linux/init.h> 12 13#include <linux/mm.h> 14#include <linux/delay.h> 15#include <linux/spinlock.h> 16#include <linux/kernel_stat.h> 17#include <linux/mc146818rtc.h> 18#include <linux/cache.h> 19#include <linux/interrupt.h> 20#include <linux/cpu.h> 21#include <linux/module.h> 22 23#include <asm/mtrr.h> 24#include <asm/tlbflush.h> 25#include <mach_apic.h> 26 27 28DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, }; 29 30/* 31 * the following functions deal with sending IPIs between CPUs. 32 * 33 * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. 34 */ 35 36static inline int __prepare_ICR (unsigned int shortcut, int vector) 37{ 38 unsigned int icr = shortcut | APIC_DEST_LOGICAL; 39 40 switch (vector) { 41 default: 42 icr |= APIC_DM_FIXED | vector; 43 break; 44 case NMI_VECTOR: 45 icr |= APIC_DM_NMI; 46 break; 47 } 48 return icr; 49} 50 51static inline int __prepare_ICR2 (unsigned int mask) 52{ 53 return SET_APIC_DEST_FIELD(mask); 54} 55 56void __send_IPI_shortcut(unsigned int shortcut, int vector) 57{ 58 unsigned int cfg; 59 60 /* 61 * Wait for idle. 62 */ 63 apic_wait_icr_idle(); 64 65 /* 66 * No need to touch the target chip field 67 */ 68 cfg = __prepare_ICR(shortcut, vector); 69 70 /* 71 * Send the IPI. The write to APIC_ICR fires this off. 72 */ 73 apic_write_around(APIC_ICR, cfg); 74} 75 76void fastcall send_IPI_self(int vector) 77{ 78 __send_IPI_shortcut(APIC_DEST_SELF, vector); 79} 80 81/* 82 * This is used to send an IPI with no shorthand notation (the destination is 83 * specified in bits 56 to 63 of the ICR). 84 */ 85static inline void __send_IPI_dest_field(unsigned long mask, int vector) 86{ 87 unsigned long cfg; 88 89 /* 90 * Wait for idle. 91 */ 92 if (unlikely(vector == NMI_VECTOR)) 93 safe_apic_wait_icr_idle(); 94 else 95 apic_wait_icr_idle(); 96 97 /* 98 * prepare target chip field 99 */ 100 cfg = __prepare_ICR2(mask); 101 apic_write_around(APIC_ICR2, cfg); 102 103 /* 104 * program the ICR 105 */ 106 cfg = __prepare_ICR(0, vector); 107 108 /* 109 * Send the IPI. The write to APIC_ICR fires this off. 110 */ 111 apic_write_around(APIC_ICR, cfg); 112} 113 114/* 115 * This is only used on smaller machines. 116 */ 117void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) 118{ 119 unsigned long mask = cpus_addr(cpumask)[0]; 120 unsigned long flags; 121 122 local_irq_save(flags); 123 WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); 124 __send_IPI_dest_field(mask, vector); 125 local_irq_restore(flags); 126} 127 128void send_IPI_mask_sequence(cpumask_t mask, int vector) 129{ 130 unsigned long flags; 131 unsigned int query_cpu; 132 133 /* 134 * Hack. The clustered APIC addressing mode doesn't allow us to send 135 * to an arbitrary mask, so I do a unicasts to each CPU instead. This 136 * should be modified to do 1 message per cluster ID - mbligh 137 */ 138 139 local_irq_save(flags); 140 for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { 141 if (cpu_isset(query_cpu, mask)) { 142 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), 143 vector); 144 } 145 } 146 local_irq_restore(flags); 147} 148 149#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */ 150 151/* 152 * Smarter SMP flushing macros. 153 * c/o Linus Torvalds. 154 * 155 * These mean you can really definitely utterly forget about 156 * writing to user space from interrupts. (Its not allowed anyway). 157 * 158 * Optimizations Manfred Spraul <manfred@colorfullife.com> 159 */ 160 161static cpumask_t flush_cpumask; 162static struct mm_struct * flush_mm; 163static unsigned long flush_va; 164static DEFINE_SPINLOCK(tlbstate_lock); 165 166/* 167 * We cannot call mmdrop() because we are in interrupt context, 168 * instead update mm->cpu_vm_mask. 169 * 170 * We need to reload %cr3 since the page tables may be going 171 * away from under us.. 172 */ 173static inline void leave_mm (unsigned long cpu) 174{ 175 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) 176 BUG(); 177 cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); 178 load_cr3(swapper_pg_dir); 179} 180 181/* 182 * 183 * The flush IPI assumes that a thread switch happens in this order: 184 * [cpu0: the cpu that switches] 185 * 1) switch_mm() either 1a) or 1b) 186 * 1a) thread switch to a different mm 187 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); 188 * Stop ipi delivery for the old mm. This is not synchronized with 189 * the other cpus, but smp_invalidate_interrupt ignore flush ipis 190 * for the wrong mm, and in the worst case we perform a superflous 191 * tlb flush. 192 * 1a2) set cpu_tlbstate to TLBSTATE_OK 193 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 194 * was in lazy tlb mode. 195 * 1a3) update cpu_tlbstate[].active_mm 196 * Now cpu0 accepts tlb flushes for the new mm. 197 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); 198 * Now the other cpus will send tlb flush ipis. 199 * 1a4) change cr3. 200 * 1b) thread switch without mm change 201 * cpu_tlbstate[].active_mm is correct, cpu0 already handles 202 * flush ipis. 203 * 1b1) set cpu_tlbstate to TLBSTATE_OK 204 * 1b2) test_and_set the cpu bit in cpu_vm_mask. 205 * Atomically set the bit [other cpus will start sending flush ipis], 206 * and test the bit. 207 * 1b3) if the bit was 0: leave_mm was called, flush the tlb. 208 * 2) switch %%esp, ie current 209 * 210 * The interrupt must handle 2 special cases: 211 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. 212 * - the cpu performs speculative tlb reads, i.e. even if the cpu only 213 * runs in kernel space, the cpu could load tlb entries for user space 214 * pages. 215 * 216 * The good news is that cpu_tlbstate is local to each cpu, no 217 * write/read ordering problems. 218 */ 219 220/* 221 * TLB flush IPI: 222 * 223 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. 224 * 2) Leave the mm if we are in the lazy tlb mode. 225 */ 226 227fastcall void smp_invalidate_interrupt(struct pt_regs *regs) 228{ 229 unsigned long cpu; 230 231 cpu = get_cpu(); 232 233 if (!cpu_isset(cpu, flush_cpumask)) 234 goto out; 235 /* 236 * This was a BUG() but until someone can quote me the 237 * line from the intel manual that guarantees an IPI to 238 * multiple CPUs is retried _only_ on the erroring CPUs 239 * its staying as a return 240 * 241 * BUG(); 242 */ 243 244 if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { 245 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { 246 if (flush_va == TLB_FLUSH_ALL) 247 local_flush_tlb(); 248 else 249 __flush_tlb_one(flush_va); 250 } else 251 leave_mm(cpu); 252 } 253 ack_APIC_irq(); 254 smp_mb__before_clear_bit(); 255 cpu_clear(cpu, flush_cpumask); 256 smp_mb__after_clear_bit(); 257out: 258 put_cpu_no_resched(); 259} 260 261void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, 262 unsigned long va) 263{ 264 cpumask_t cpumask = *cpumaskp; 265 266 /* 267 * A couple of (to be removed) sanity checks: 268 * 269 * - current CPU must not be in mask 270 * - mask must exist :) 271 */ 272 BUG_ON(cpus_empty(cpumask)); 273 BUG_ON(cpu_isset(smp_processor_id(), cpumask)); 274 BUG_ON(!mm); 275 276#ifdef CONFIG_HOTPLUG_CPU 277 /* If a CPU which we ran on has gone down, OK. */ 278 cpus_and(cpumask, cpumask, cpu_online_map); 279 if (unlikely(cpus_empty(cpumask))) 280 return; 281#endif 282 283 /* 284 * i'm not happy about this global shared spinlock in the 285 * MM hot path, but we'll see how contended it is. 286 * AK: x86-64 has a faster method that could be ported. 287 */ 288 spin_lock(&tlbstate_lock); 289 290 flush_mm = mm; 291 flush_va = va; 292 cpus_or(flush_cpumask, cpumask, flush_cpumask); 293 /* 294 * We have to send the IPI only to 295 * CPUs affected. 296 */ 297 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); 298 299 while (!cpus_empty(flush_cpumask)) 300 /* nothing. lockup detection does not belong here */ 301 cpu_relax(); 302 303 flush_mm = NULL; 304 flush_va = 0; 305 spin_unlock(&tlbstate_lock); 306} 307 308void flush_tlb_current_task(void) 309{ 310 struct mm_struct *mm = current->mm; 311 cpumask_t cpu_mask; 312 313 preempt_disable(); 314 cpu_mask = mm->cpu_vm_mask; 315 cpu_clear(smp_processor_id(), cpu_mask); 316 317 local_flush_tlb(); 318 if (!cpus_empty(cpu_mask)) 319 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 320 preempt_enable(); 321} 322 323void flush_tlb_mm (struct mm_struct * mm) 324{ 325 cpumask_t cpu_mask; 326 327 preempt_disable(); 328 cpu_mask = mm->cpu_vm_mask; 329 cpu_clear(smp_processor_id(), cpu_mask); 330 331 if (current->active_mm == mm) { 332 if (current->mm) 333 local_flush_tlb(); 334 else 335 leave_mm(smp_processor_id()); 336 } 337 if (!cpus_empty(cpu_mask)) 338 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 339 340 preempt_enable(); 341} 342 343void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) 344{ 345 struct mm_struct *mm = vma->vm_mm; 346 cpumask_t cpu_mask; 347 348 preempt_disable(); 349 cpu_mask = mm->cpu_vm_mask; 350 cpu_clear(smp_processor_id(), cpu_mask); 351 352 if (current->active_mm == mm) { 353 if(current->mm) 354 __flush_tlb_one(va); 355 else 356 leave_mm(smp_processor_id()); 357 } 358 359 if (!cpus_empty(cpu_mask)) 360 flush_tlb_others(cpu_mask, mm, va); 361 362 preempt_enable(); 363} 364EXPORT_SYMBOL(flush_tlb_page); 365 366static void do_flush_tlb_all(void* info) 367{ 368 unsigned long cpu = smp_processor_id(); 369 370 __flush_tlb_all(); 371 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) 372 leave_mm(cpu); 373} 374 375void flush_tlb_all(void) 376{ 377 on_each_cpu(do_flush_tlb_all, NULL, 1, 1); 378} 379 380/* 381 * this function sends a 'reschedule' IPI to another CPU. 382 * it goes straight through and wastes no time serializing 383 * anything. Worst case is that we lose a reschedule ... 384 */ 385static void native_smp_send_reschedule(int cpu) 386{ 387 WARN_ON(cpu_is_offline(cpu)); 388 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 389} 390 391/* 392 * Structure and data for smp_call_function(). This is designed to minimise 393 * static memory requirements. It also looks cleaner. 394 */ 395static DEFINE_SPINLOCK(call_lock); 396 397struct call_data_struct { 398 void (*func) (void *info); 399 void *info; 400 atomic_t started; 401 atomic_t finished; 402 int wait; 403}; 404 405void lock_ipi_call_lock(void) 406{ 407 spin_lock_irq(&call_lock); 408} 409 410void unlock_ipi_call_lock(void) 411{ 412 spin_unlock_irq(&call_lock); 413} 414 415static struct call_data_struct *call_data; 416 417static void __smp_call_function(void (*func) (void *info), void *info, 418 int nonatomic, int wait) 419{ 420 struct call_data_struct data; 421 int cpus = num_online_cpus() - 1; 422 423 if (!cpus) 424 return; 425 426 data.func = func; 427 data.info = info; 428 atomic_set(&data.started, 0); 429 data.wait = wait; 430 if (wait) 431 atomic_set(&data.finished, 0); 432 433 call_data = &data; 434 mb(); 435 436 /* Send a message to all other CPUs and wait for them to respond */ 437 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 438 439 /* Wait for response */ 440 while (atomic_read(&data.started) != cpus) 441 cpu_relax(); 442 443 if (wait) 444 while (atomic_read(&data.finished) != cpus) 445 cpu_relax(); 446} 447 448 449/** 450 * smp_call_function_mask(): Run a function on a set of other CPUs. 451 * @mask: The set of cpus to run on. Must not include the current cpu. 452 * @func: The function to run. This must be fast and non-blocking. 453 * @info: An arbitrary pointer to pass to the function. 454 * @wait: If true, wait (atomically) until function has completed on other CPUs. 455 * 456 * Returns 0 on success, else a negative status code. 457 * 458 * If @wait is true, then returns once @func has returned; otherwise 459 * it returns just before the target cpu calls @func. 460 * 461 * You must not call this function with disabled interrupts or from a 462 * hardware interrupt handler or from a bottom half handler. 463 */ 464static int 465native_smp_call_function_mask(cpumask_t mask, 466 void (*func)(void *), void *info, 467 int wait) 468{ 469 struct call_data_struct data; 470 cpumask_t allbutself; 471 int cpus; 472 473 /* Can deadlock when called with interrupts disabled */ 474 WARN_ON(irqs_disabled()); 475 476 /* Holding any lock stops cpus from going down. */ 477 spin_lock(&call_lock); 478 479 allbutself = cpu_online_map; 480 cpu_clear(smp_processor_id(), allbutself); 481 482 cpus_and(mask, mask, allbutself); 483 cpus = cpus_weight(mask); 484 485 if (!cpus) { 486 spin_unlock(&call_lock); 487 return 0; 488 } 489 490 data.func = func; 491 data.info = info; 492 atomic_set(&data.started, 0); 493 data.wait = wait; 494 if (wait) 495 atomic_set(&data.finished, 0); 496 497 call_data = &data; 498 mb(); 499 500 /* Send a message to other CPUs */ 501 if (cpus_equal(mask, allbutself)) 502 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 503 else 504 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 505 506 /* Wait for response */ 507 while (atomic_read(&data.started) != cpus) 508 cpu_relax(); 509 510 if (wait) 511 while (atomic_read(&data.finished) != cpus) 512 cpu_relax(); 513 spin_unlock(&call_lock); 514 515 return 0; 516} 517 518static void stop_this_cpu (void * dummy) 519{ 520 local_irq_disable(); 521 /* 522 * Remove this CPU: 523 */ 524 cpu_clear(smp_processor_id(), cpu_online_map); 525 disable_local_APIC(); 526 if (cpu_data[smp_processor_id()].hlt_works_ok) 527 for(;;) halt(); 528 for (;;); 529} 530 531/* 532 * this function calls the 'stop' function on all other CPUs in the system. 533 */ 534 535static void native_smp_send_stop(void) 536{ 537 /* Don't deadlock on the call lock in panic */ 538 int nolock = !spin_trylock(&call_lock); 539 unsigned long flags; 540 541 local_irq_save(flags); 542 __smp_call_function(stop_this_cpu, NULL, 0, 0); 543 if (!nolock) 544 spin_unlock(&call_lock); 545 disable_local_APIC(); 546 local_irq_restore(flags); 547} 548 549/* 550 * Reschedule call back. Nothing to do, 551 * all the work is done automatically when 552 * we return from the interrupt. 553 */ 554fastcall void smp_reschedule_interrupt(struct pt_regs *regs) 555{ 556 ack_APIC_irq(); 557} 558 559fastcall void smp_call_function_interrupt(struct pt_regs *regs) 560{ 561 void (*func) (void *info) = call_data->func; 562 void *info = call_data->info; 563 int wait = call_data->wait; 564 565 ack_APIC_irq(); 566 /* 567 * Notify initiating CPU that I've grabbed the data and am 568 * about to execute the function 569 */ 570 mb(); 571 atomic_inc(&call_data->started); 572 /* 573 * At this point the info structure may be out of scope unless wait==1 574 */ 575 irq_enter(); 576 (*func)(info); 577 irq_exit(); 578 579 if (wait) { 580 mb(); 581 atomic_inc(&call_data->finished); 582 } 583} 584 585static int convert_apicid_to_cpu(int apic_id) 586{ 587 int i; 588 589 for (i = 0; i < NR_CPUS; i++) { 590 if (x86_cpu_to_apicid[i] == apic_id) 591 return i; 592 } 593 return -1; 594} 595 596int safe_smp_processor_id(void) 597{ 598 int apicid, cpuid; 599 600 if (!boot_cpu_has(X86_FEATURE_APIC)) 601 return 0; 602 603 apicid = hard_smp_processor_id(); 604 if (apicid == BAD_APICID) 605 return 0; 606 607 cpuid = convert_apicid_to_cpu(apicid); 608 609 return cpuid >= 0 ? cpuid : 0; 610} 611 612struct smp_ops smp_ops = { 613 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, 614 .smp_prepare_cpus = native_smp_prepare_cpus, 615 .cpu_up = native_cpu_up, 616 .smp_cpus_done = native_smp_cpus_done, 617 618 .smp_send_stop = native_smp_send_stop, 619 .smp_send_reschedule = native_smp_send_reschedule, 620 .smp_call_function_mask = native_smp_call_function_mask, 621}; 622