1/* $NetBSD: acpi_srat.c,v 1.2 2009/12/04 10:42:39 njoly Exp $ */
2
3/*
4 * Copyright (c) 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christoph Egger.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33__KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.2 2009/12/04 10:42:39 njoly Exp $");
34
35#include <sys/param.h>
36#include <sys/kmem.h>
37#include <sys/systm.h>
38
39#include <dev/acpi/acpivar.h>
40#include <dev/acpi/acpi_srat.h>
41
42static ACPI_TABLE_SRAT *srat;
43
44struct acpisrat_node {
45	acpisrat_nodeid_t nodeid;
46	uint32_t ncpus; /* Number of cpus in this node */
47	struct acpisrat_cpu **cpu; /* Array of cpus */
48	uint32_t nmems; /* Number of memory ranges in this node */
49	struct acpisrat_mem **mem; /* Array of memory ranges */
50};
51
52static uint32_t nnodes; /* Number of NUMA nodes */
53static struct acpisrat_node *node_array; /* Array of NUMA nodes */
54static uint32_t ncpus; /* Number of CPUs */
55static struct acpisrat_cpu *cpu_array; /* Array of cpus */
56static uint32_t nmems; /* Number of Memory ranges */
57static struct acpisrat_mem *mem_array;
58
59
60struct cpulist {
61	struct acpisrat_cpu cpu;
62	TAILQ_ENTRY(cpulist) entry;
63};
64
65static TAILQ_HEAD(, cpulist) cpulisthead;
66
67#define CPU_INIT		TAILQ_INIT(&cpulisthead);
68#define CPU_FOREACH(cpu)	TAILQ_FOREACH(cpu, &cpulisthead, entry)
69#define CPU_ADD(cpu)		TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
70#define CPU_REM(cpu)		TAILQ_REMOVE(&cpulisthead, cpu, entry)
71#define CPU_FIRST		TAILQ_FIRST(&cpulisthead)
72
73
74struct memlist {
75	struct acpisrat_mem mem;
76	TAILQ_ENTRY(memlist) entry;
77};
78
79static TAILQ_HEAD(, memlist) memlisthead;
80
81#define MEM_INIT		TAILQ_INIT(&memlisthead)
82#define MEM_FOREACH(mem)	TAILQ_FOREACH(mem, &memlisthead, entry)
83#define MEM_ADD(mem)		TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
84#define MEM_ADD_BEFORE(mem, b)	TAILQ_INSERT_BEFORE(b, mem, entry)
85#define MEM_REM(mem)		TAILQ_REMOVE(&memlisthead, mem, entry)
86#define MEM_FIRST		TAILQ_FIRST(&memlisthead)
87
88
89static struct cpulist *
90cpu_alloc(void)
91{
92	return kmem_zalloc(sizeof(struct cpulist), KM_NOSLEEP);
93}
94
95static void
96cpu_free(struct cpulist *c)
97{
98	kmem_free(c, sizeof(struct cpulist));
99}
100
101#if 0
102static struct cpulist *
103cpu_get(acpisrat_nodeid_t nodeid)
104{
105	struct cpulist *tmp;
106
107	CPU_FOREACH(tmp) {
108		if (tmp->cpu.nodeid == nodeid)
109			return tmp;
110	}
111
112	return NULL;
113}
114#endif
115
116static struct memlist *
117mem_alloc(void)
118{
119	return kmem_zalloc(sizeof(struct memlist), KM_NOSLEEP);
120}
121
122static void
123mem_free(struct memlist *m)
124{
125	kmem_free(m, sizeof(struct memlist));
126}
127
128static struct memlist *
129mem_get(acpisrat_nodeid_t nodeid)
130{
131	struct memlist *tmp;
132
133	MEM_FOREACH(tmp) {
134		if (tmp->mem.nodeid == nodeid)
135			return tmp;
136	}
137
138	return NULL;
139}
140
141
142bool
143acpisrat_exist(void)
144{
145	ACPI_TABLE_HEADER *table;
146	ACPI_STATUS rv;
147
148	rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
149	if (ACPI_FAILURE(rv))
150		return false;
151
152	/* Check if header is valid */
153	if (table == NULL)
154		return false;
155
156	if (table->Length == 0xffffffff)
157		return false;
158
159	srat = (ACPI_TABLE_SRAT *)table;
160
161	return true;
162}
163
164static int
165acpisrat_parse(void)
166{
167	ACPI_SUBTABLE_HEADER *subtable;
168	ACPI_SRAT_CPU_AFFINITY *srat_cpu;
169	ACPI_SRAT_MEM_AFFINITY *srat_mem;
170	ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
171
172	acpisrat_nodeid_t nodeid;
173	struct cpulist *cpuentry = NULL;
174	struct memlist *mementry;
175	uint32_t srat_pos;
176	bool ignore_cpu_affinity = false;
177
178	KASSERT(srat != NULL);
179
180	/* Content starts right after the header */
181	srat_pos = sizeof(ACPI_TABLE_SRAT);
182
183	while (srat_pos < srat->Header.Length) {
184		subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
185		srat_pos += subtable->Length;
186
187		switch (subtable->Type) {
188		case ACPI_SRAT_TYPE_CPU_AFFINITY:
189			if (ignore_cpu_affinity)
190				continue;
191
192			srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
193			nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
194			    (srat_cpu->ProximityDomainHi[1] << 16) |
195			    (srat_cpu->ProximityDomainHi[0] << 8) |
196			    (srat_cpu->ProximityDomainLo);
197
198			cpuentry = cpu_alloc();
199			if (cpuentry == NULL)
200				return ENOMEM;
201			CPU_ADD(cpuentry);
202
203			cpuentry->cpu.nodeid = nodeid;
204			cpuentry->cpu.apicid = srat_cpu->ApicId;
205			cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
206			cpuentry->cpu.flags = srat_cpu->Flags;
207			cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
208			break;
209
210		case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
211			srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
212			nodeid = srat_mem->ProximityDomain;
213
214			mementry = mem_alloc();
215			if (mementry == NULL)
216				return ENOMEM;
217			MEM_ADD(mementry);
218
219			mementry->mem.nodeid = nodeid;
220			mementry->mem.baseaddress = srat_mem->BaseAddress;
221			mementry->mem.length = srat_mem->Length;
222			mementry->mem.flags = srat_mem->Flags;
223			break;
224
225		case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
226			srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
227			nodeid = srat_x2apic->ProximityDomain;
228
229			/* This table entry overrides
230			 * ACPI_SRAT_TYPE_CPU_AFFINITY.
231			 */
232			if (!ignore_cpu_affinity) {
233				struct cpulist *citer;
234				while ((citer = CPU_FIRST) != NULL) {
235					CPU_REM(citer);
236					cpu_free(citer);
237				}
238				ignore_cpu_affinity = true;
239			}
240
241			cpuentry = cpu_alloc();
242			if (cpuentry == NULL)
243				return ENOMEM;
244			CPU_ADD(cpuentry);
245
246			cpuentry->cpu.nodeid = nodeid;
247			cpuentry->cpu.apicid = srat_x2apic->ApicId;
248			cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
249			cpuentry->cpu.flags = srat_x2apic->Flags;
250			break;
251
252		case ACPI_SRAT_TYPE_RESERVED:
253			printf("ACPI SRAT subtable reserved, length: 0x%x\n",
254				subtable->Length);
255			break;
256		}
257	}
258
259	return 0;
260}
261
262static int
263acpisrat_quirks(void)
264{
265	struct cpulist *citer;
266	struct memlist *mem, *miter;
267
268	/* Some sanity checks. */
269
270	/* Deal with holes in the memory nodes.
271	 * BIOS doesn't enlist memory nodes which
272	 * don't have any memory modules plugged in.
273	 * This behaviour has been observed on AMD machines.
274	 *
275	 * Do that by searching for CPUs in NUMA nodes
276	 * which don't exist in the memory and then insert
277	 * a zero memory range for the missing node.
278	 */
279	CPU_FOREACH(citer) {
280		mem = mem_get(citer->cpu.nodeid);
281		if (mem != NULL)
282			continue;
283		mem = mem_alloc();
284		if (mem == NULL)
285			return ENOMEM;
286		mem->mem.nodeid = citer->cpu.nodeid;
287		/* all other fields are already zero filled */
288
289		MEM_FOREACH(miter) {
290			if (miter->mem.nodeid < citer->cpu.nodeid)
291				continue;
292			MEM_ADD_BEFORE(mem, miter);
293			break;
294		}
295	}
296
297	return 0;
298}
299
300int
301acpisrat_init(void)
302{
303	if (!acpisrat_exist())
304		return EEXIST;
305	return acpisrat_refresh();
306}
307
308int
309acpisrat_refresh(void)
310{
311	int rc, i, j, k;
312	struct cpulist *citer;
313	struct memlist *miter;
314	uint32_t cnodes = 0, mnodes = 0;
315
316	CPU_INIT;
317	MEM_INIT;
318
319	rc = acpisrat_parse();
320	if (rc)
321		return rc;
322
323	rc = acpisrat_quirks();
324	if (rc)
325		return rc;
326
327	/* cleanup resources */
328	rc = acpisrat_exit();
329	if (rc)
330		return rc;
331
332	nnodes = 0;
333	ncpus = 0;
334	CPU_FOREACH(citer) {
335		cnodes = MAX(citer->cpu.nodeid, cnodes);
336		ncpus++;
337	}
338
339	nmems = 0;
340	MEM_FOREACH(miter) {
341		mnodes = MAX(miter->mem.nodeid, mnodes);
342		nmems++;
343	}
344
345	nnodes = MAX(cnodes, mnodes) + 1;
346
347	node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
348	    KM_NOSLEEP);
349	if (node_array == NULL)
350		return ENOMEM;
351
352	cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
353	    KM_NOSLEEP);
354	if (cpu_array == NULL)
355		return ENOMEM;
356
357	mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
358	    KM_NOSLEEP);
359	if (mem_array == NULL)
360		return ENOMEM;
361
362	i = 0;
363	CPU_FOREACH(citer) {
364		memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
365		i++;
366		node_array[citer->cpu.nodeid].ncpus++;
367	}
368
369	i = 0;
370	MEM_FOREACH(miter) {
371		memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
372		i++;
373		node_array[miter->mem.nodeid].nmems++;
374	}
375
376	for (i = 0; i < nnodes; i++) {
377		node_array[i].nodeid = i;
378
379		node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
380		    sizeof(struct acpisrat_cpu *), KM_NOSLEEP);
381		node_array[i].mem = kmem_zalloc(node_array[i].nmems *
382		    sizeof(struct acpisrat_mem *), KM_NOSLEEP);
383
384		k = 0;
385		for (j = 0; j < ncpus; j++) {
386			if (cpu_array[j].nodeid != i)
387				continue;
388			node_array[i].cpu[k] = &cpu_array[j];
389			k++;
390		}
391
392		k = 0;
393		for (j = 0; j < nmems; j++) {
394			if (mem_array[j].nodeid != i)
395				continue;
396			node_array[i].mem[k] = &mem_array[j];
397			k++;
398		}
399	}
400
401	while ((citer = CPU_FIRST) != NULL) {
402		CPU_REM(citer);
403		cpu_free(citer);
404	}
405
406	while ((miter = MEM_FIRST) != NULL) {
407		MEM_REM(miter);
408		mem_free(miter);
409	}
410
411	return 0;
412}
413
414
415int
416acpisrat_exit(void)
417{
418	int i;
419
420	if (node_array) {
421		for (i = 0; i < nnodes; i++) {
422			if (node_array[i].cpu)
423				kmem_free(node_array[i].cpu,
424				    node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
425			if (node_array[i].mem)
426				kmem_free(node_array[i].mem,
427				    node_array[i].nmems * sizeof(struct acpisrat_mem *));
428		}
429		kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
430	}
431	node_array = NULL;
432
433	if (cpu_array)
434		kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
435	cpu_array = NULL;
436
437	if (mem_array)
438		kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
439	mem_array = NULL;
440
441	nnodes = 0;
442	ncpus = 0;
443	nmems = 0;
444
445	return 0;
446}
447
448
449void
450acpisrat_dump(void)
451{
452	uint32_t i, j, nn, nc, nm;
453	struct acpisrat_cpu c;
454	struct acpisrat_mem m;
455
456	nn = acpisrat_nodes();
457	aprint_debug("SRAT: %u NUMA nodes\n", nn);
458	for (i = 0; i < nn; i++) {
459		nc = acpisrat_node_cpus(i);
460		for (j = 0; j < nc; j++) {
461			acpisrat_cpu(i, j, &c);
462			aprint_debug("SRAT: node %u cpu %u "
463			    "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
464			    c.nodeid, j, c.apicid, c.sapiceid, c.flags,
465			    c.clockdomain);
466		}
467
468		nm = acpisrat_node_memoryranges(i);
469		for (j = 0; j < nm; j++) {
470			acpisrat_mem(i, j, &m);
471			aprint_debug("SRAT: node %u memory range %u (0x%"
472			    PRIx64" - 0x%"PRIx64" flags %u)\n",
473			    m.nodeid, j, m.baseaddress,
474			    m.baseaddress + m.length, m.flags);
475		}
476	}
477}
478
479uint32_t
480acpisrat_nodes(void)
481{
482	return nnodes;
483}
484
485uint32_t
486acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
487{
488	return node_array[nodeid].ncpus;
489}
490
491uint32_t
492acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
493{
494	return node_array[nodeid].nmems;
495}
496
497void
498acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
499    struct acpisrat_cpu *c)
500{
501	memcpy(c, node_array[nodeid].cpu[cpunum],
502	    sizeof(struct acpisrat_cpu));
503}
504
505void
506acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
507    struct acpisrat_mem *mem)
508{
509	memcpy(mem, node_array[nodeid].mem[memrange],
510	    sizeof(struct acpisrat_mem));
511}
512