1/* 2 * Written by: Patricia Gaughen, IBM Corporation 3 * 4 * Copyright (C) 2002, IBM Corp. 5 * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar 6 * 7 * All rights reserved. 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License as published by 11 * the Free Software Foundation; either version 2 of the License, or 12 * (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 17 * NON INFRINGEMENT. See the GNU General Public License for more 18 * details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 23 * 24 * Send feedback to <gone@us.ibm.com> 25 */ 26#include <linux/nodemask.h> 27#include <linux/topology.h> 28#include <linux/bootmem.h> 29#include <linux/threads.h> 30#include <linux/cpumask.h> 31#include <linux/kernel.h> 32#include <linux/mmzone.h> 33#include <linux/module.h> 34#include <linux/string.h> 35#include <linux/init.h> 36#include <linux/numa.h> 37#include <linux/smp.h> 38#include <linux/io.h> 39#include <linux/mm.h> 40 41#include <asm/processor.h> 42#include <asm/fixmap.h> 43#include <asm/mpspec.h> 44#include <asm/numaq.h> 45#include <asm/setup.h> 46#include <asm/apic.h> 47#include <asm/e820.h> 48#include <asm/ipi.h> 49 50#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) 51 52int found_numaq; 53 54/* 55 * Have to match translation table entries to main table entries by counter 56 * hence the mpc_record variable .... can't see a less disgusting way of 57 * doing this .... 58 */ 59struct mpc_trans { 60 unsigned char mpc_type; 61 unsigned char trans_len; 62 unsigned char trans_type; 63 unsigned char trans_quad; 64 unsigned char trans_global; 65 unsigned char trans_local; 66 unsigned short trans_reserved; 67}; 68 69static int mpc_record; 70 71static struct mpc_trans *translation_table[MAX_MPC_ENTRY]; 72 73int mp_bus_id_to_node[MAX_MP_BUSSES]; 74int mp_bus_id_to_local[MAX_MP_BUSSES]; 75int quad_local_to_mp_bus_id[NR_CPUS/4][4]; 76 77 78static inline void numaq_register_node(int node, struct sys_cfg_data *scd) 79{ 80 struct eachquadmem *eq = scd->eq + node; 81 82 node_set_online(node); 83 84 /* Convert to pages */ 85 node_start_pfn[node] = 86 MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size); 87 88 node_end_pfn[node] = 89 MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); 90 91 e820_register_active_regions(node, node_start_pfn[node], 92 node_end_pfn[node]); 93 94 memory_present(node, node_start_pfn[node], node_end_pfn[node]); 95 96 node_remap_size[node] = node_memmap_size_bytes(node, 97 node_start_pfn[node], 98 node_end_pfn[node]); 99} 100 101/* 102 * Function: smp_dump_qct() 103 * 104 * Description: gets memory layout from the quad config table. This 105 * function also updates node_online_map with the nodes (quads) present. 106 */ 107static void __init smp_dump_qct(void) 108{ 109 struct sys_cfg_data *scd; 110 int node; 111 112 scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); 113 114 nodes_clear(node_online_map); 115 for_each_node(node) { 116 if (scd->quads_present31_0 & (1 << node)) 117 numaq_register_node(node, scd); 118 } 119} 120 121void __cpuinit numaq_tsc_disable(void) 122{ 123 if (!found_numaq) 124 return; 125 126 if (num_online_nodes() > 1) { 127 printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); 128 setup_clear_cpu_cap(X86_FEATURE_TSC); 129 } 130} 131 132static void __init numaq_tsc_init(void) 133{ 134 numaq_tsc_disable(); 135} 136 137static inline int generate_logical_apicid(int quad, int phys_apicid) 138{ 139 return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); 140} 141 142/* x86_quirks member */ 143static int mpc_apic_id(struct mpc_cpu *m) 144{ 145 int quad = translation_table[mpc_record]->trans_quad; 146 int logical_apicid = generate_logical_apicid(quad, m->apicid); 147 148 printk(KERN_DEBUG 149 "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", 150 m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, 151 (m->cpufeature & CPU_MODEL_MASK) >> 4, 152 m->apicver, quad, logical_apicid); 153 154 return logical_apicid; 155} 156 157/* x86_quirks member */ 158static void mpc_oem_bus_info(struct mpc_bus *m, char *name) 159{ 160 int quad = translation_table[mpc_record]->trans_quad; 161 int local = translation_table[mpc_record]->trans_local; 162 163 mp_bus_id_to_node[m->busid] = quad; 164 mp_bus_id_to_local[m->busid] = local; 165 166 printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad); 167} 168 169/* x86_quirks member */ 170static void mpc_oem_pci_bus(struct mpc_bus *m) 171{ 172 int quad = translation_table[mpc_record]->trans_quad; 173 int local = translation_table[mpc_record]->trans_local; 174 175 quad_local_to_mp_bus_id[quad][local] = m->busid; 176} 177 178/* 179 * Called from mpparse code. 180 * mode = 0: prescan 181 * mode = 1: one mpc entry scanned 182 */ 183static void numaq_mpc_record(unsigned int mode) 184{ 185 if (!mode) 186 mpc_record = 0; 187 else 188 mpc_record++; 189} 190 191static void __init MP_translation_info(struct mpc_trans *m) 192{ 193 printk(KERN_INFO 194 "Translation: record %d, type %d, quad %d, global %d, local %d\n", 195 mpc_record, m->trans_type, m->trans_quad, m->trans_global, 196 m->trans_local); 197 198 if (mpc_record >= MAX_MPC_ENTRY) 199 printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); 200 else 201 translation_table[mpc_record] = m; /* stash this for later */ 202 203 if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) 204 node_set_online(m->trans_quad); 205} 206 207static int __init mpf_checksum(unsigned char *mp, int len) 208{ 209 int sum = 0; 210 211 while (len--) 212 sum += *mp++; 213 214 return sum & 0xFF; 215} 216 217/* 218 * Read/parse the MPC oem tables 219 */ 220static void __init smp_read_mpc_oem(struct mpc_table *mpc) 221{ 222 struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr; 223 int count = sizeof(*oemtable); /* the header size */ 224 unsigned char *oemptr = ((unsigned char *)oemtable) + count; 225 226 mpc_record = 0; 227 printk(KERN_INFO 228 "Found an OEM MPC table at %8p - parsing it...\n", oemtable); 229 230 if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { 231 printk(KERN_WARNING 232 "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", 233 oemtable->signature[0], oemtable->signature[1], 234 oemtable->signature[2], oemtable->signature[3]); 235 return; 236 } 237 238 if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) { 239 printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); 240 return; 241 } 242 243 while (count < oemtable->length) { 244 switch (*oemptr) { 245 case MP_TRANSLATION: 246 { 247 struct mpc_trans *m = (void *)oemptr; 248 249 MP_translation_info(m); 250 oemptr += sizeof(*m); 251 count += sizeof(*m); 252 ++mpc_record; 253 break; 254 } 255 default: 256 printk(KERN_WARNING 257 "Unrecognised OEM table entry type! - %d\n", 258 (int)*oemptr); 259 return; 260 } 261 } 262} 263 264static __init void early_check_numaq(void) 265{ 266 /* 267 * get boot-time SMP configuration: 268 */ 269 if (smp_found_config) 270 early_get_smp_config(); 271 272 if (found_numaq) { 273 x86_init.mpparse.mpc_record = numaq_mpc_record; 274 x86_init.mpparse.setup_ioapic_ids = x86_init_noop; 275 x86_init.mpparse.mpc_apic_id = mpc_apic_id; 276 x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem; 277 x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; 278 x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; 279 x86_init.timers.tsc_pre_init = numaq_tsc_init; 280 x86_init.pci.init = pci_numaq_init; 281 } 282} 283 284int __init get_memcfg_numaq(void) 285{ 286 early_check_numaq(); 287 if (!found_numaq) 288 return 0; 289 smp_dump_qct(); 290 291 return 1; 292} 293 294#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) 295 296static inline unsigned int numaq_get_apic_id(unsigned long x) 297{ 298 return (x >> 24) & 0x0F; 299} 300 301static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) 302{ 303 default_send_IPI_mask_sequence_logical(mask, vector); 304} 305 306static inline void numaq_send_IPI_allbutself(int vector) 307{ 308 default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); 309} 310 311static inline void numaq_send_IPI_all(int vector) 312{ 313 numaq_send_IPI_mask(cpu_online_mask, vector); 314} 315 316#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) 317#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) 318 319/* 320 * Because we use NMIs rather than the INIT-STARTUP sequence to 321 * bootstrap the CPUs, the APIC may be in a weird state. Kick it: 322 */ 323static inline void numaq_smp_callin_clear_local_apic(void) 324{ 325 clear_local_APIC(); 326} 327 328static inline const struct cpumask *numaq_target_cpus(void) 329{ 330 return cpu_all_mask; 331} 332 333static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid) 334{ 335 return physid_isset(apicid, *map); 336} 337 338static inline unsigned long numaq_check_apicid_present(int bit) 339{ 340 return physid_isset(bit, phys_cpu_present_map); 341} 342 343static inline int numaq_apic_id_registered(void) 344{ 345 return 1; 346} 347 348static inline void numaq_init_apic_ldr(void) 349{ 350 /* Already done in NUMA-Q firmware */ 351} 352 353static inline void numaq_setup_apic_routing(void) 354{ 355 printk(KERN_INFO 356 "Enabling APIC mode: NUMA-Q. Using %d I/O APICs\n", 357 nr_ioapics); 358} 359 360/* 361 * Skip adding the timer int on secondary nodes, which causes 362 * a small but painful rift in the time-space continuum. 363 */ 364static inline int numaq_multi_timer_check(int apic, int irq) 365{ 366 return apic != 0 && irq == 0; 367} 368 369static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) 370{ 371 /* We don't have a good way to do this yet - hack */ 372 return physids_promote(0xFUL, retmap); 373} 374 375static inline int numaq_cpu_to_logical_apicid(int cpu) 376{ 377 if (cpu >= nr_cpu_ids) 378 return BAD_APICID; 379 return cpu_2_logical_apicid[cpu]; 380} 381 382/* 383 * Supporting over 60 cpus on NUMA-Q requires a locality-dependent 384 * cpu to APIC ID relation to properly interact with the intelligent 385 * mode of the cluster controller. 386 */ 387static inline int numaq_cpu_present_to_apicid(int mps_cpu) 388{ 389 if (mps_cpu < 60) 390 return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); 391 else 392 return BAD_APICID; 393} 394 395static inline int numaq_apicid_to_node(int logical_apicid) 396{ 397 return logical_apicid >> 4; 398} 399 400static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) 401{ 402 int node = numaq_apicid_to_node(logical_apicid); 403 int cpu = __ffs(logical_apicid & 0xf); 404 405 physid_set_mask_of_physid(cpu + 4*node, retmap); 406} 407 408/* Where the IO area was mapped on multiquad, always 0 otherwise */ 409void *xquad_portio; 410 411static inline int numaq_check_phys_apicid_present(int phys_apicid) 412{ 413 return 1; 414} 415 416/* 417 * We use physical apicids here, not logical, so just return the default 418 * physical broadcast to stop people from breaking us 419 */ 420static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask) 421{ 422 return 0x0F; 423} 424 425static inline unsigned int 426numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 427 const struct cpumask *andmask) 428{ 429 return 0x0F; 430} 431 432/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ 433static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) 434{ 435 return cpuid_apic >> index_msb; 436} 437 438static int 439numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) 440{ 441 if (strncmp(oem, "IBM NUMA", 8)) 442 printk(KERN_ERR "Warning! Not a NUMA-Q system!\n"); 443 else 444 found_numaq = 1; 445 446 return found_numaq; 447} 448 449static int probe_numaq(void) 450{ 451 /* already know from get_memcfg_numaq() */ 452 return found_numaq; 453} 454 455static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask) 456{ 457 /* Careful. Some cpus do not strictly honor the set of cpus 458 * specified in the interrupt destination when using lowest 459 * priority interrupt delivery mode. 460 * 461 * In particular there was a hyperthreading cpu observed to 462 * deliver interrupts to the wrong hyperthread when only one 463 * hyperthread was specified in the interrupt desitination. 464 */ 465 cpumask_clear(retmask); 466 cpumask_bits(retmask)[0] = APIC_ALL_CPUS; 467} 468 469static void numaq_setup_portio_remap(void) 470{ 471 int num_quads = num_online_nodes(); 472 473 if (num_quads <= 1) 474 return; 475 476 printk(KERN_INFO 477 "Remapping cross-quad port I/O for %d quads\n", num_quads); 478 479 xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); 480 481 printk(KERN_INFO 482 "xquad_portio vaddr 0x%08lx, len %08lx\n", 483 (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); 484} 485 486/* Use __refdata to keep false positive warning calm. */ 487struct apic __refdata apic_numaq = { 488 489 .name = "NUMAQ", 490 .probe = probe_numaq, 491 .acpi_madt_oem_check = NULL, 492 .apic_id_registered = numaq_apic_id_registered, 493 494 .irq_delivery_mode = dest_LowestPrio, 495 /* physical delivery on LOCAL quad: */ 496 .irq_dest_mode = 0, 497 498 .target_cpus = numaq_target_cpus, 499 .disable_esr = 1, 500 .dest_logical = APIC_DEST_LOGICAL, 501 .check_apicid_used = numaq_check_apicid_used, 502 .check_apicid_present = numaq_check_apicid_present, 503 504 .vector_allocation_domain = numaq_vector_allocation_domain, 505 .init_apic_ldr = numaq_init_apic_ldr, 506 507 .ioapic_phys_id_map = numaq_ioapic_phys_id_map, 508 .setup_apic_routing = numaq_setup_apic_routing, 509 .multi_timer_check = numaq_multi_timer_check, 510 .apicid_to_node = numaq_apicid_to_node, 511 .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, 512 .cpu_present_to_apicid = numaq_cpu_present_to_apicid, 513 .apicid_to_cpu_present = numaq_apicid_to_cpu_present, 514 .setup_portio_remap = numaq_setup_portio_remap, 515 .check_phys_apicid_present = numaq_check_phys_apicid_present, 516 .enable_apic_mode = NULL, 517 .phys_pkg_id = numaq_phys_pkg_id, 518 .mps_oem_check = numaq_mps_oem_check, 519 520 .get_apic_id = numaq_get_apic_id, 521 .set_apic_id = NULL, 522 .apic_id_mask = 0x0F << 24, 523 524 .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, 525 .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, 526 527 .send_IPI_mask = numaq_send_IPI_mask, 528 .send_IPI_mask_allbutself = NULL, 529 .send_IPI_allbutself = numaq_send_IPI_allbutself, 530 .send_IPI_all = numaq_send_IPI_all, 531 .send_IPI_self = default_send_IPI_self, 532 533 .wakeup_secondary_cpu = wakeup_secondary_cpu_via_nmi, 534 .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, 535 .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, 536 537 /* We don't do anything here because we use NMI's to boot instead */ 538 .wait_for_init_deassert = NULL, 539 540 .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, 541 .inquire_remote_apic = NULL, 542 543 .read = native_apic_mem_read, 544 .write = native_apic_mem_write, 545 .icr_read = native_apic_icr_read, 546 .icr_write = native_apic_icr_write, 547 .wait_icr_idle = native_apic_wait_icr_idle, 548 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 549}; 550