1/* $NetBSD: acpi_srat.c,v 1.2 2009/12/04 10:42:39 njoly Exp $ */ 2 3/* 4 * Copyright (c) 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Christoph Egger. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.2 2009/12/04 10:42:39 njoly Exp $"); 34 35#include <sys/param.h> 36#include <sys/kmem.h> 37#include <sys/systm.h> 38 39#include <dev/acpi/acpivar.h> 40#include <dev/acpi/acpi_srat.h> 41 42static ACPI_TABLE_SRAT *srat; 43 44struct acpisrat_node { 45 acpisrat_nodeid_t nodeid; 46 uint32_t ncpus; /* Number of cpus in this node */ 47 struct acpisrat_cpu **cpu; /* Array of cpus */ 48 uint32_t nmems; /* Number of memory ranges in this node */ 49 struct acpisrat_mem **mem; /* Array of memory ranges */ 50}; 51 52static uint32_t nnodes; /* Number of NUMA nodes */ 53static struct acpisrat_node *node_array; /* Array of NUMA nodes */ 54static uint32_t ncpus; /* Number of CPUs */ 55static struct acpisrat_cpu *cpu_array; /* Array of cpus */ 56static uint32_t nmems; /* Number of Memory ranges */ 57static struct acpisrat_mem *mem_array; 58 59 60struct cpulist { 61 struct acpisrat_cpu cpu; 62 TAILQ_ENTRY(cpulist) entry; 63}; 64 65static TAILQ_HEAD(, cpulist) cpulisthead; 66 67#define CPU_INIT TAILQ_INIT(&cpulisthead); 68#define CPU_FOREACH(cpu) TAILQ_FOREACH(cpu, &cpulisthead, entry) 69#define CPU_ADD(cpu) TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry) 70#define CPU_REM(cpu) TAILQ_REMOVE(&cpulisthead, cpu, entry) 71#define CPU_FIRST TAILQ_FIRST(&cpulisthead) 72 73 74struct memlist { 75 struct acpisrat_mem mem; 76 TAILQ_ENTRY(memlist) entry; 77}; 78 79static TAILQ_HEAD(, memlist) memlisthead; 80 81#define MEM_INIT TAILQ_INIT(&memlisthead) 82#define MEM_FOREACH(mem) TAILQ_FOREACH(mem, &memlisthead, entry) 83#define MEM_ADD(mem) TAILQ_INSERT_TAIL(&memlisthead, mem, entry) 84#define MEM_ADD_BEFORE(mem, b) TAILQ_INSERT_BEFORE(b, mem, entry) 85#define MEM_REM(mem) TAILQ_REMOVE(&memlisthead, mem, entry) 86#define MEM_FIRST TAILQ_FIRST(&memlisthead) 87 88 89static struct cpulist * 90cpu_alloc(void) 91{ 92 return kmem_zalloc(sizeof(struct cpulist), KM_NOSLEEP); 93} 94 95static void 96cpu_free(struct cpulist *c) 97{ 98 kmem_free(c, sizeof(struct cpulist)); 99} 100 101#if 0 102static struct cpulist * 103cpu_get(acpisrat_nodeid_t nodeid) 104{ 105 struct cpulist *tmp; 106 107 CPU_FOREACH(tmp) { 108 if (tmp->cpu.nodeid == nodeid) 109 return tmp; 110 } 111 112 return NULL; 113} 114#endif 115 116static struct memlist * 117mem_alloc(void) 118{ 119 return kmem_zalloc(sizeof(struct memlist), KM_NOSLEEP); 120} 121 122static void 123mem_free(struct memlist *m) 124{ 125 kmem_free(m, sizeof(struct memlist)); 126} 127 128static struct memlist * 129mem_get(acpisrat_nodeid_t nodeid) 130{ 131 struct memlist *tmp; 132 133 MEM_FOREACH(tmp) { 134 if (tmp->mem.nodeid == nodeid) 135 return tmp; 136 } 137 138 return NULL; 139} 140 141 142bool 143acpisrat_exist(void) 144{ 145 ACPI_TABLE_HEADER *table; 146 ACPI_STATUS rv; 147 148 rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table); 149 if (ACPI_FAILURE(rv)) 150 return false; 151 152 /* Check if header is valid */ 153 if (table == NULL) 154 return false; 155 156 if (table->Length == 0xffffffff) 157 return false; 158 159 srat = (ACPI_TABLE_SRAT *)table; 160 161 return true; 162} 163 164static int 165acpisrat_parse(void) 166{ 167 ACPI_SUBTABLE_HEADER *subtable; 168 ACPI_SRAT_CPU_AFFINITY *srat_cpu; 169 ACPI_SRAT_MEM_AFFINITY *srat_mem; 170 ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic; 171 172 acpisrat_nodeid_t nodeid; 173 struct cpulist *cpuentry = NULL; 174 struct memlist *mementry; 175 uint32_t srat_pos; 176 bool ignore_cpu_affinity = false; 177 178 KASSERT(srat != NULL); 179 180 /* Content starts right after the header */ 181 srat_pos = sizeof(ACPI_TABLE_SRAT); 182 183 while (srat_pos < srat->Header.Length) { 184 subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos); 185 srat_pos += subtable->Length; 186 187 switch (subtable->Type) { 188 case ACPI_SRAT_TYPE_CPU_AFFINITY: 189 if (ignore_cpu_affinity) 190 continue; 191 192 srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable; 193 nodeid = (srat_cpu->ProximityDomainHi[2] << 24) | 194 (srat_cpu->ProximityDomainHi[1] << 16) | 195 (srat_cpu->ProximityDomainHi[0] << 8) | 196 (srat_cpu->ProximityDomainLo); 197 198 cpuentry = cpu_alloc(); 199 if (cpuentry == NULL) 200 return ENOMEM; 201 CPU_ADD(cpuentry); 202 203 cpuentry->cpu.nodeid = nodeid; 204 cpuentry->cpu.apicid = srat_cpu->ApicId; 205 cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid; 206 cpuentry->cpu.flags = srat_cpu->Flags; 207 cpuentry->cpu.clockdomain = srat_cpu->ClockDomain; 208 break; 209 210 case ACPI_SRAT_TYPE_MEMORY_AFFINITY: 211 srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable; 212 nodeid = srat_mem->ProximityDomain; 213 214 mementry = mem_alloc(); 215 if (mementry == NULL) 216 return ENOMEM; 217 MEM_ADD(mementry); 218 219 mementry->mem.nodeid = nodeid; 220 mementry->mem.baseaddress = srat_mem->BaseAddress; 221 mementry->mem.length = srat_mem->Length; 222 mementry->mem.flags = srat_mem->Flags; 223 break; 224 225 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: 226 srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable; 227 nodeid = srat_x2apic->ProximityDomain; 228 229 /* This table entry overrides 230 * ACPI_SRAT_TYPE_CPU_AFFINITY. 231 */ 232 if (!ignore_cpu_affinity) { 233 struct cpulist *citer; 234 while ((citer = CPU_FIRST) != NULL) { 235 CPU_REM(citer); 236 cpu_free(citer); 237 } 238 ignore_cpu_affinity = true; 239 } 240 241 cpuentry = cpu_alloc(); 242 if (cpuentry == NULL) 243 return ENOMEM; 244 CPU_ADD(cpuentry); 245 246 cpuentry->cpu.nodeid = nodeid; 247 cpuentry->cpu.apicid = srat_x2apic->ApicId; 248 cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain; 249 cpuentry->cpu.flags = srat_x2apic->Flags; 250 break; 251 252 case ACPI_SRAT_TYPE_RESERVED: 253 printf("ACPI SRAT subtable reserved, length: 0x%x\n", 254 subtable->Length); 255 break; 256 } 257 } 258 259 return 0; 260} 261 262static int 263acpisrat_quirks(void) 264{ 265 struct cpulist *citer; 266 struct memlist *mem, *miter; 267 268 /* Some sanity checks. */ 269 270 /* Deal with holes in the memory nodes. 271 * BIOS doesn't enlist memory nodes which 272 * don't have any memory modules plugged in. 273 * This behaviour has been observed on AMD machines. 274 * 275 * Do that by searching for CPUs in NUMA nodes 276 * which don't exist in the memory and then insert 277 * a zero memory range for the missing node. 278 */ 279 CPU_FOREACH(citer) { 280 mem = mem_get(citer->cpu.nodeid); 281 if (mem != NULL) 282 continue; 283 mem = mem_alloc(); 284 if (mem == NULL) 285 return ENOMEM; 286 mem->mem.nodeid = citer->cpu.nodeid; 287 /* all other fields are already zero filled */ 288 289 MEM_FOREACH(miter) { 290 if (miter->mem.nodeid < citer->cpu.nodeid) 291 continue; 292 MEM_ADD_BEFORE(mem, miter); 293 break; 294 } 295 } 296 297 return 0; 298} 299 300int 301acpisrat_init(void) 302{ 303 if (!acpisrat_exist()) 304 return EEXIST; 305 return acpisrat_refresh(); 306} 307 308int 309acpisrat_refresh(void) 310{ 311 int rc, i, j, k; 312 struct cpulist *citer; 313 struct memlist *miter; 314 uint32_t cnodes = 0, mnodes = 0; 315 316 CPU_INIT; 317 MEM_INIT; 318 319 rc = acpisrat_parse(); 320 if (rc) 321 return rc; 322 323 rc = acpisrat_quirks(); 324 if (rc) 325 return rc; 326 327 /* cleanup resources */ 328 rc = acpisrat_exit(); 329 if (rc) 330 return rc; 331 332 nnodes = 0; 333 ncpus = 0; 334 CPU_FOREACH(citer) { 335 cnodes = MAX(citer->cpu.nodeid, cnodes); 336 ncpus++; 337 } 338 339 nmems = 0; 340 MEM_FOREACH(miter) { 341 mnodes = MAX(miter->mem.nodeid, mnodes); 342 nmems++; 343 } 344 345 nnodes = MAX(cnodes, mnodes) + 1; 346 347 node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node), 348 KM_NOSLEEP); 349 if (node_array == NULL) 350 return ENOMEM; 351 352 cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu), 353 KM_NOSLEEP); 354 if (cpu_array == NULL) 355 return ENOMEM; 356 357 mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem), 358 KM_NOSLEEP); 359 if (mem_array == NULL) 360 return ENOMEM; 361 362 i = 0; 363 CPU_FOREACH(citer) { 364 memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu)); 365 i++; 366 node_array[citer->cpu.nodeid].ncpus++; 367 } 368 369 i = 0; 370 MEM_FOREACH(miter) { 371 memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem)); 372 i++; 373 node_array[miter->mem.nodeid].nmems++; 374 } 375 376 for (i = 0; i < nnodes; i++) { 377 node_array[i].nodeid = i; 378 379 node_array[i].cpu = kmem_zalloc(node_array[i].ncpus * 380 sizeof(struct acpisrat_cpu *), KM_NOSLEEP); 381 node_array[i].mem = kmem_zalloc(node_array[i].nmems * 382 sizeof(struct acpisrat_mem *), KM_NOSLEEP); 383 384 k = 0; 385 for (j = 0; j < ncpus; j++) { 386 if (cpu_array[j].nodeid != i) 387 continue; 388 node_array[i].cpu[k] = &cpu_array[j]; 389 k++; 390 } 391 392 k = 0; 393 for (j = 0; j < nmems; j++) { 394 if (mem_array[j].nodeid != i) 395 continue; 396 node_array[i].mem[k] = &mem_array[j]; 397 k++; 398 } 399 } 400 401 while ((citer = CPU_FIRST) != NULL) { 402 CPU_REM(citer); 403 cpu_free(citer); 404 } 405 406 while ((miter = MEM_FIRST) != NULL) { 407 MEM_REM(miter); 408 mem_free(miter); 409 } 410 411 return 0; 412} 413 414 415int 416acpisrat_exit(void) 417{ 418 int i; 419 420 if (node_array) { 421 for (i = 0; i < nnodes; i++) { 422 if (node_array[i].cpu) 423 kmem_free(node_array[i].cpu, 424 node_array[i].ncpus * sizeof(struct acpisrat_cpu *)); 425 if (node_array[i].mem) 426 kmem_free(node_array[i].mem, 427 node_array[i].nmems * sizeof(struct acpisrat_mem *)); 428 } 429 kmem_free(node_array, nnodes * sizeof(struct acpisrat_node)); 430 } 431 node_array = NULL; 432 433 if (cpu_array) 434 kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu)); 435 cpu_array = NULL; 436 437 if (mem_array) 438 kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem)); 439 mem_array = NULL; 440 441 nnodes = 0; 442 ncpus = 0; 443 nmems = 0; 444 445 return 0; 446} 447 448 449void 450acpisrat_dump(void) 451{ 452 uint32_t i, j, nn, nc, nm; 453 struct acpisrat_cpu c; 454 struct acpisrat_mem m; 455 456 nn = acpisrat_nodes(); 457 aprint_debug("SRAT: %u NUMA nodes\n", nn); 458 for (i = 0; i < nn; i++) { 459 nc = acpisrat_node_cpus(i); 460 for (j = 0; j < nc; j++) { 461 acpisrat_cpu(i, j, &c); 462 aprint_debug("SRAT: node %u cpu %u " 463 "(apic %u, sapic %u, flags %u, clockdomain %u)\n", 464 c.nodeid, j, c.apicid, c.sapiceid, c.flags, 465 c.clockdomain); 466 } 467 468 nm = acpisrat_node_memoryranges(i); 469 for (j = 0; j < nm; j++) { 470 acpisrat_mem(i, j, &m); 471 aprint_debug("SRAT: node %u memory range %u (0x%" 472 PRIx64" - 0x%"PRIx64" flags %u)\n", 473 m.nodeid, j, m.baseaddress, 474 m.baseaddress + m.length, m.flags); 475 } 476 } 477} 478 479uint32_t 480acpisrat_nodes(void) 481{ 482 return nnodes; 483} 484 485uint32_t 486acpisrat_node_cpus(acpisrat_nodeid_t nodeid) 487{ 488 return node_array[nodeid].ncpus; 489} 490 491uint32_t 492acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid) 493{ 494 return node_array[nodeid].nmems; 495} 496 497void 498acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum, 499 struct acpisrat_cpu *c) 500{ 501 memcpy(c, node_array[nodeid].cpu[cpunum], 502 sizeof(struct acpisrat_cpu)); 503} 504 505void 506acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange, 507 struct acpisrat_mem *mem) 508{ 509 memcpy(mem, node_array[nodeid].mem[memrange], 510 sizeof(struct acpisrat_mem)); 511} 512