Deleted Added
full compact
mptable.h (99766) mptable.h (99862)
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright

--- 8 unchanged lines hidden (view full) ---

17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright

--- 8 unchanged lines hidden (view full) ---

17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: head/sys/i386/include/mptable.h 99766 2002-07-11 08:31:10Z peter $
25 * $FreeBSD: head/sys/i386/include/mptable.h 99862 2002-07-12 07:56:11Z peter $
26 */
27
28#include "opt_cpu.h"
29#include "opt_kstack_pages.h"
30
31#ifdef SMP
32#include <machine/smptests.h>
33#else

--- 249 unchanged lines hidden (view full) ---

283/* Hotwire a 0->4MB V==P mapping */
284extern pt_entry_t *KPTphys;
285
286/* SMP page table page */
287extern pt_entry_t *SMPpt;
288
289struct pcb stoppcbs[MAXCPU];
290
26 */
27
28#include "opt_cpu.h"
29#include "opt_kstack_pages.h"
30
31#ifdef SMP
32#include <machine/smptests.h>
33#else

--- 249 unchanged lines hidden (view full) ---

283/* Hotwire a 0->4MB V==P mapping */
284extern pt_entry_t *KPTphys;
285
286/* SMP page table page */
287extern pt_entry_t *SMPpt;
288
289struct pcb stoppcbs[MAXCPU];
290
291#ifdef APIC_IO
292/* Variables needed for SMP tlb shootdown. */
293vm_offset_t smp_tlb_addr1;
294vm_offset_t smp_tlb_addr2;
295volatile int smp_tlb_wait;
296static struct mtx smp_tlb_mtx;
297#endif
298
291/*
292 * Local data and functions.
293 */
294
295/* Set to 1 once we're ready to let the APs out of the pen. */
296static volatile int aps_ready = 0;
297
298static int mp_capable;

--- 32 unchanged lines hidden (view full) ---

331
332static void
333init_locks(void)
334{
335
336#ifdef USE_COMLOCK
337 mtx_init(&com_mtx, "com", NULL, MTX_SPIN);
338#endif /* USE_COMLOCK */
299/*
300 * Local data and functions.
301 */
302
303/* Set to 1 once we're ready to let the APs out of the pen. */
304static volatile int aps_ready = 0;
305
306static int mp_capable;

--- 32 unchanged lines hidden (view full) ---

339
340static void
341init_locks(void)
342{
343
344#ifdef USE_COMLOCK
345 mtx_init(&com_mtx, "com", NULL, MTX_SPIN);
346#endif /* USE_COMLOCK */
347#ifdef APIC_IO
348 mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
349#endif
339}
340
341/*
342 * Calculate usable address in base memory for AP trampoline code.
343 */
344u_int
345mp_bootaddress(u_int basemem)
346{

--- 253 unchanged lines hidden (view full) ---

600
601 /* install a 'Spurious INTerrupt' vector */
602 setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
603 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
604
605 /* install an inter-CPU IPI for TLB invalidation */
606 setidt(XINVLTLB_OFFSET, Xinvltlb,
607 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
350}
351
352/*
353 * Calculate usable address in base memory for AP trampoline code.
354 */
355u_int
356mp_bootaddress(u_int basemem)
357{

--- 253 unchanged lines hidden (view full) ---

611
612 /* install a 'Spurious INTerrupt' vector */
613 setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
614 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
615
616 /* install an inter-CPU IPI for TLB invalidation */
617 setidt(XINVLTLB_OFFSET, Xinvltlb,
618 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
619 setidt(XINVLPG_OFFSET, Xinvlpg,
620 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
621 setidt(XINVLRNG_OFFSET, Xinvlrng,
622 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
608
609 /* install an inter-CPU IPI for forwarding hardclock() */
610 setidt(XHARDCLOCK_OFFSET, Xhardclock,
611 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
612
613 /* install an inter-CPU IPI for forwarding statclock() */
614 setidt(XSTATCLOCK_OFFSET, Xstatclock,
615 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));

--- 1569 unchanged lines hidden (view full) ---

2185 set_apic_timer(5000000);/* == 5 seconds */
2186 while (read_apic_timer())
2187 if (mp_ncpus > cpus)
2188 return 1; /* return SUCCESS */
2189
2190 return 0; /* return FAILURE */
2191}
2192
623
624 /* install an inter-CPU IPI for forwarding hardclock() */
625 setidt(XHARDCLOCK_OFFSET, Xhardclock,
626 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
627
628 /* install an inter-CPU IPI for forwarding statclock() */
629 setidt(XSTATCLOCK_OFFSET, Xstatclock,
630 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));

--- 1569 unchanged lines hidden (view full) ---

2200 set_apic_timer(5000000);/* == 5 seconds */
2201 while (read_apic_timer())
2202 if (mp_ncpus > cpus)
2203 return 1; /* return SUCCESS */
2204
2205 return 0; /* return FAILURE */
2206}
2207
2193#if defined(APIC_IO) && defined(COUNT_XINVLTLB_HITS)
2194u_int xhits[MAXCPU];
2195SYSCTL_OPAQUE(_debug, OID_AUTO, xhits, CTLFLAG_RW, &xhits, sizeof(xhits),
2196 "IU", "");
2208#if defined(APIC_IO)
2209
2210#ifdef COUNT_XINVLTLB_HITS
2211u_int xhits_gbl[MAXCPU];
2212u_int xhits_pg[MAXCPU];
2213u_int xhits_rng[MAXCPU];
2214SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
2215SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
2216 sizeof(xhits_gbl), "IU", "");
2217SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
2218 sizeof(xhits_pg), "IU", "");
2219SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
2220 sizeof(xhits_rng), "IU", "");
2221
2222u_int ipi_global;
2223u_int ipi_page;
2224u_int ipi_range;
2225u_int ipi_range_size;
2226SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
2227SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
2228SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
2229SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
2230 0, "");
2231
2232u_int ipi_masked_global;
2233u_int ipi_masked_page;
2234u_int ipi_masked_range;
2235u_int ipi_masked_range_size;
2236SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
2237 &ipi_masked_global, 0, "");
2238SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
2239 &ipi_masked_page, 0, "");
2240SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
2241 &ipi_masked_range, 0, "");
2242SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
2243 &ipi_masked_range_size, 0, "");
2197#endif
2198
2199/*
2200 * Flush the TLB on all other CPU's
2244#endif
2245
2246/*
2247 * Flush the TLB on all other CPU's
2248 */
2249static void
2250smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
2251{
2252 u_int ncpu;
2253 register_t eflags;
2254
2255 ncpu = mp_ncpus - 1; /* does not shootdown self */
2256 if (ncpu < 1)
2257 return; /* no other cpus */
2258 eflags = read_eflags();
2259 if ((eflags & PSL_I) == 0)
2260 panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
2261 mtx_lock_spin(&smp_tlb_mtx);
2262 smp_tlb_addr1 = addr1;
2263 smp_tlb_addr2 = addr2;
2264 atomic_store_rel_int(&smp_tlb_wait, 0);
2265 ipi_all_but_self(vector);
2266 while (smp_tlb_wait < ncpu)
2267 ia32_pause();
2268 mtx_unlock_spin(&smp_tlb_mtx);
2269}
2270
2271/*
2272 * This is about as magic as it gets. fortune(1) has got similar code
2273 * for reversing bits in a word. Who thinks up this stuff??
2201 *
2274 *
2202 * XXX: Needs to handshake and wait for completion before proceding.
2275 * Yes, it does appear to be consistently faster than:
2276 * while (i = ffs(m)) {
2277 * m >>= i;
2278 * bits++;
2279 * }
2280 * and
2281 * while (lsb = (m & -m)) { // This is magic too
2282 * m &= ~lsb; // or: m ^= lsb
2283 * bits++;
2284 * }
2285 * Both of these latter forms do some very strange things on gcc-3.1 with
2286 * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
2287 * There is probably an SSE or MMX popcnt instruction.
2288 *
2289 * I wonder if this should be in libkern?
2290 *
2291 * XXX Stop the presses! Another one:
2292 * static __inline u_int32_t
2293 * popcnt1(u_int32_t v)
2294 * {
2295 * v -= ((v >> 1) & 0x55555555);
2296 * v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
2297 * v = (v + (v >> 4)) & 0x0F0F0F0F;
2298 * return (v * 0x01010101) >> 24;
2299 * }
2300 * The downside is that it has a multiply. With a pentium3 with
2301 * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
2302 * an imull, and in that case it is faster. In most other cases
2303 * it appears slightly slower.
2203 */
2304 */
2305static __inline u_int32_t
2306popcnt(u_int32_t m)
2307{
2308
2309 m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
2310 m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
2311 m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
2312 m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
2313 m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
2314 return m;
2315}
2316
2317static void
2318smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
2319{
2320 int ncpu, othercpus;
2321 register_t eflags;
2322
2323 othercpus = mp_ncpus - 1;
2324 if (mask == (u_int)-1) {
2325 ncpu = othercpus;
2326 if (ncpu < 1)
2327 return;
2328 } else {
2329 /* XXX there should be a pcpu self mask */
2330 mask &= ~(1 << PCPU_GET(cpuid));
2331 if (mask == 0)
2332 return;
2333 ncpu = popcnt(mask);
2334 if (ncpu > othercpus) {
2335 /* XXX this should be a panic offence */
2336 printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
2337 ncpu, othercpus);
2338 ncpu = othercpus;
2339 }
2340 /* XXX should be a panic, implied by mask == 0 above */
2341 if (ncpu < 1)
2342 return;
2343 }
2344 eflags = read_eflags();
2345 if ((eflags & PSL_I) == 0)
2346 panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
2347 mtx_lock_spin(&smp_tlb_mtx);
2348 smp_tlb_addr1 = addr1;
2349 smp_tlb_addr2 = addr2;
2350 atomic_store_rel_int(&smp_tlb_wait, 0);
2351 if (mask == (u_int)-1)
2352 ipi_all_but_self(vector);
2353 else
2354 ipi_selected(mask, vector);
2355 while (smp_tlb_wait < ncpu)
2356 ia32_pause();
2357 mtx_unlock_spin(&smp_tlb_mtx);
2358}
2359#endif
2360
2204void
2205smp_invltlb(void)
2206{
2207#if defined(APIC_IO)
2361void
2362smp_invltlb(void)
2363{
2364#if defined(APIC_IO)
2208 if (smp_started)
2209 ipi_all_but_self(IPI_INVLTLB);
2365 if (smp_started) {
2366 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
2367#ifdef COUNT_XINVLTLB_HITS
2368 ipi_global++;
2369#endif
2370 }
2210#endif /* APIC_IO */
2211}
2212
2213void
2371#endif /* APIC_IO */
2372}
2373
2374void
2214invlpg(u_int addr)
2375smp_invlpg(vm_offset_t addr)
2215{
2376{
2216 __asm __volatile("invlpg (%0)"::"r"(addr):"memory");
2377#if defined(APIC_IO)
2378 if (smp_started) {
2379 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
2380#ifdef COUNT_XINVLTLB_HITS
2381 ipi_page++;
2382#endif
2383 }
2384#endif /* APIC_IO */
2385}
2217
2386
2218 /* send a message to the other CPUs */
2219 smp_invltlb();
2387void
2388smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
2389{
2390#if defined(APIC_IO)
2391 if (smp_started) {
2392 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
2393#ifdef COUNT_XINVLTLB_HITS
2394 ipi_range++;
2395 ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
2396#endif
2397 }
2398#endif /* APIC_IO */
2220}
2221
2222void
2399}
2400
2401void
2223invltlb(void)
2402smp_masked_invltlb(u_int mask)
2224{
2403{
2225 u_long temp;
2404#if defined(APIC_IO)
2405 if (smp_started) {
2406 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
2407#ifdef COUNT_XINVLTLB_HITS
2408 ipi_masked_global++;
2409#endif
2410 }
2411#endif /* APIC_IO */
2412}
2226
2413
2227 /*
2228 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
2229 * inlined.
2230 */
2231 __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
2414void
2415smp_masked_invlpg(u_int mask, vm_offset_t addr)
2416{
2417#if defined(APIC_IO)
2418 if (smp_started) {
2419 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
2420#ifdef COUNT_XINVLTLB_HITS
2421 ipi_masked_page++;
2422#endif
2423 }
2424#endif /* APIC_IO */
2425}
2232
2426
2233 /* send a message to the other CPUs */
2234 smp_invltlb();
2427void
2428smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
2429{
2430#if defined(APIC_IO)
2431 if (smp_started) {
2432 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
2433#ifdef COUNT_XINVLTLB_HITS
2434 ipi_masked_range++;
2435 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
2436#endif
2437 }
2438#endif /* APIC_IO */
2235}
2236
2237
2238/*
2239 * This is called once the rest of the system is up and running and we're
2240 * ready to let the AP's out of the pen.
2241 */
2242extern void enable_sse(void);
2243
2244void
2245ap_init(void)
2246{
2247 u_int apic_id;
2248
2249 /* spin until all the AP's are ready */
2250 while (!aps_ready)
2251 /* spin */ ;
2252
2253 /* BSP may have changed PTD while we were waiting */
2439}
2440
2441
2442/*
2443 * This is called once the rest of the system is up and running and we're
2444 * ready to let the AP's out of the pen.
2445 */
2446extern void enable_sse(void);
2447
2448void
2449ap_init(void)
2450{
2451 u_int apic_id;
2452
2453 /* spin until all the AP's are ready */
2454 while (!aps_ready)
2455 /* spin */ ;
2456
2457 /* BSP may have changed PTD while we were waiting */
2254 cpu_invltlb();
2458 invltlb();
2255
2256#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2257 lidt(&r_idt);
2258#endif
2259
2260 /* set up CPU registers and state */
2261 cpu_setregs();
2262

--- 22 unchanged lines hidden (view full) ---

2285
2286 CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
2287
2288 smp_cpus++;
2289
2290 /* Build our map of 'other' CPUs. */
2291 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
2292
2459
2460#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2461 lidt(&r_idt);
2462#endif
2463
2464 /* set up CPU registers and state */
2465 cpu_setregs();
2466

--- 22 unchanged lines hidden (view full) ---

2489
2490 CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
2491
2492 smp_cpus++;
2493
2494 /* Build our map of 'other' CPUs. */
2495 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
2496
2497 if (bootverbose)
2498 apic_dump("ap_init()");
2499
2293 printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
2294
2295 if (smp_cpus == mp_ncpus) {
2296 smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
2297 smp_active = 1; /* historic */
2298 }
2299
2300 mtx_unlock_spin(&ap_boot_mtx);

--- 19 unchanged lines hidden (view full) ---

2320 * WARNING! unpend() will call statclock_process() directly and skip this
2321 * routine.
2322 */
2323void
2324forwarded_statclock(struct trapframe frame)
2325{
2326
2327 mtx_lock_spin(&sched_lock);
2500 printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
2501
2502 if (smp_cpus == mp_ncpus) {
2503 smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
2504 smp_active = 1; /* historic */
2505 }
2506
2507 mtx_unlock_spin(&ap_boot_mtx);

--- 19 unchanged lines hidden (view full) ---

2527 * WARNING! unpend() will call statclock_process() directly and skip this
2528 * routine.
2529 */
2530void
2531forwarded_statclock(struct trapframe frame)
2532{
2533
2534 mtx_lock_spin(&sched_lock);
2328 statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
2535 statclock_process(curthread->td_kse, TRAPF_PC(&frame),
2536 TRAPF_USERMODE(&frame));
2329 mtx_unlock_spin(&sched_lock);
2330}
2331
2332void
2333forward_statclock(void)
2334{
2335 int map;
2336

--- 111 unchanged lines hidden ---
2537 mtx_unlock_spin(&sched_lock);
2538}
2539
2540void
2541forward_statclock(void)
2542{
2543 int map;
2544

--- 111 unchanged lines hidden ---