1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2013 Gleb Smirnoff <glebius@FreeBSD.org>
5 * Copyright (c) 2010 Juniper Networks, Inc.
6 * Copyright (c) 2009 Robert N. M. Watson
7 * Copyright (c) 2009 Bjoern A. Zeeb <bz@FreeBSD.org>
8 * Copyright (c) 2008 Yahoo!, Inc.
9 * All rights reserved.
10 *
11 * Written by: John Baldwin <jhb@FreeBSD.org>
12 *
13 * This software was developed by Robert N. M. Watson under contract
14 * to Juniper Networks, Inc.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 *    notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 *    notice, this list of conditions and the following disclaimer in the
23 *    documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the author nor the names of any co-contributors
25 *    may be used to endorse or promote products derived from this software
26 *    without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 */
40
41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD$");
43
44#include <sys/param.h>
45#include <sys/pcpu.h>
46#include <sys/sysctl.h>
47#include <kvm.h>
48#include <limits.h>
49#include <stdlib.h>
50
51#include "kvm_private.h"
52
53#ifdef __amd64__
54#define	__OFFSET_BY_PCPU
55#endif
56
57static struct nlist kvm_pcpu_nl[] = {
58	{ .n_name = "_cpuid_to_pcpu" },
59	{ .n_name = "_mp_maxcpus" },
60	{ .n_name = "_mp_ncpus" },
61#ifdef __OFFSET_BY_PCPU
62	{ .n_name = "___pcpu" },
63#endif
64	{ .n_name = NULL },
65};
66#define	NL_CPUID_TO_PCPU	0
67#define	NL_MP_MAXCPUS		1
68#define	NL_MP_NCPUS		2
69#define	NL___PCPU		3
70
71/*
72 * Kernel per-CPU data state.  We cache this stuff on the first
73 * access.
74 *
75 * XXXRW: Possibly, this (and kvmpcpu_nl) should be per-kvm_t, in case the
76 * consumer has multiple handles in flight to differently configured
77 * kernels/crashdumps.
78 */
79static void **pcpu_data;
80static int maxcpu;
81static int mp_ncpus;
82#ifdef __OFFSET_BY_PCPU
83static unsigned long __pcpu;
84#endif
85
86static int
87_kvm_pcpu_init(kvm_t *kd)
88{
89	size_t len;
90	int max;
91	void *data;
92
93	if (kvm_nlist(kd, kvm_pcpu_nl) < 0)
94		return (-1);
95	if (kvm_pcpu_nl[NL_CPUID_TO_PCPU].n_value == 0) {
96		_kvm_err(kd, kd->program, "unable to find cpuid_to_pcpu");
97		return (-1);
98	}
99	if (kvm_pcpu_nl[NL_MP_MAXCPUS].n_value == 0) {
100		_kvm_err(kd, kd->program, "unable to find mp_maxcpus");
101		return (-1);
102	}
103	if (kvm_read(kd, kvm_pcpu_nl[NL_MP_MAXCPUS].n_value, &max,
104	    sizeof(max)) != sizeof(max)) {
105		_kvm_err(kd, kd->program, "cannot read mp_maxcpus");
106		return (-1);
107	}
108	if (kvm_pcpu_nl[NL_MP_NCPUS].n_value == 0) {
109		_kvm_err(kd, kd->program, "unable to find mp_ncpus");
110		return (-1);
111	}
112	if (kvm_read(kd, kvm_pcpu_nl[NL_MP_NCPUS].n_value, &mp_ncpus,
113	    sizeof(mp_ncpus)) != sizeof(mp_ncpus)) {
114		_kvm_err(kd, kd->program, "cannot read mp_ncpus");
115		return (-1);
116	}
117#ifdef __OFFSET_BY_PCPU
118	if (kvm_pcpu_nl[NL___PCPU].n_value == 0) {
119		_kvm_err(kd, kd->program, "unable to find __pcpu");
120		return (-1);
121	}
122	if (kvm_read(kd, kvm_pcpu_nl[NL___PCPU].n_value, &__pcpu,
123	    sizeof(__pcpu)) != sizeof(__pcpu)) {
124		_kvm_err(kd, kd->program, "cannot read __pcpu");
125		return (-1);
126	}
127#endif
128	len = max * sizeof(void *);
129	data = malloc(len);
130	if (data == NULL) {
131		_kvm_err(kd, kd->program, "out of memory");
132		return (-1);
133	}
134	if (kvm_read(kd, kvm_pcpu_nl[NL_CPUID_TO_PCPU].n_value, data, len) !=
135	   (ssize_t)len) {
136		_kvm_err(kd, kd->program, "cannot read cpuid_to_pcpu array");
137		free(data);
138		return (-1);
139	}
140	pcpu_data = data;
141	maxcpu = max;
142	return (0);
143}
144
145static void
146_kvm_pcpu_clear(void)
147{
148
149	maxcpu = 0;
150	free(pcpu_data);
151	pcpu_data = NULL;
152}
153
154void *
155kvm_getpcpu(kvm_t *kd, int cpu)
156{
157	char *buf;
158
159	if (kd == NULL) {
160		_kvm_pcpu_clear();
161		return (NULL);
162	}
163
164	if (maxcpu == 0)
165		if (_kvm_pcpu_init(kd) < 0)
166			return ((void *)-1);
167
168	if (cpu >= maxcpu || pcpu_data[cpu] == NULL)
169		return (NULL);
170
171	buf = malloc(sizeof(struct pcpu));
172	if (buf == NULL) {
173		_kvm_err(kd, kd->program, "out of memory");
174		return ((void *)-1);
175	}
176	if (kvm_read(kd, (uintptr_t)pcpu_data[cpu], buf,
177	    sizeof(struct pcpu)) != sizeof(struct pcpu)) {
178		_kvm_err(kd, kd->program, "unable to read per-CPU data");
179		free(buf);
180		return ((void *)-1);
181	}
182	return (buf);
183}
184
185int
186kvm_getmaxcpu(kvm_t *kd)
187{
188
189	if (kd == NULL) {
190		_kvm_pcpu_clear();
191		return (0);
192	}
193
194	if (maxcpu == 0)
195		if (_kvm_pcpu_init(kd) < 0)
196			return (-1);
197	return (maxcpu);
198}
199
200int
201kvm_getncpus(kvm_t *kd)
202{
203
204	if (mp_ncpus == 0)
205		if (_kvm_pcpu_init(kd) < 0)
206			return (-1);
207	return (mp_ncpus);
208}
209
210static int
211_kvm_dpcpu_setcpu(kvm_t *kd, u_int cpu, int report_error)
212{
213
214	if (!kd->dpcpu_initialized) {
215		if (report_error)
216			_kvm_err(kd, kd->program, "%s: not initialized",
217			    __func__);
218		return (-1);
219	}
220	if (cpu >= kd->dpcpu_maxcpus) {
221		if (report_error)
222			_kvm_err(kd, kd->program, "%s: CPU %u too big",
223			    __func__, cpu);
224		return (-1);
225	}
226	if (kd->dpcpu_off[cpu] == 0) {
227		if (report_error)
228			_kvm_err(kd, kd->program, "%s: CPU %u not found",
229			    __func__, cpu);
230		return (-1);
231	}
232	kd->dpcpu_curcpu = cpu;
233	kd->dpcpu_curoff = kd->dpcpu_off[cpu];
234	return (0);
235}
236
237/*
238 * Set up libkvm to handle dynamic per-CPU memory.
239 */
240static int
241_kvm_dpcpu_init(kvm_t *kd)
242{
243	struct kvm_nlist nl[] = {
244#define	NLIST_START_SET_PCPU	0
245		{ .n_name = "___start_" DPCPU_SETNAME },
246#define	NLIST_STOP_SET_PCPU	1
247		{ .n_name = "___stop_" DPCPU_SETNAME },
248#define	NLIST_DPCPU_OFF		2
249		{ .n_name = "_dpcpu_off" },
250#define	NLIST_MP_MAXCPUS	3
251		{ .n_name = "_mp_maxcpus" },
252		{ .n_name = NULL },
253	};
254	uintptr_t *dpcpu_off_buf;
255	size_t len;
256	u_int dpcpu_maxcpus;
257
258	/*
259	 * XXX: This only works for native kernels for now.
260	 */
261	if (!kvm_native(kd))
262		return (-1);
263
264	/*
265	 * Locate and cache locations of important symbols using the internal
266	 * version of _kvm_nlist, turning off initialization to avoid
267	 * recursion in case of unresolveable symbols.
268	 */
269	if (_kvm_nlist(kd, nl, 0) != 0)
270		return (-1);
271	if (kvm_read(kd, nl[NLIST_MP_MAXCPUS].n_value, &dpcpu_maxcpus,
272	    sizeof(dpcpu_maxcpus)) != sizeof(dpcpu_maxcpus))
273		return (-1);
274	len = dpcpu_maxcpus * sizeof(*dpcpu_off_buf);
275	dpcpu_off_buf = malloc(len);
276	if (dpcpu_off_buf == NULL)
277		return (-1);
278	if (kvm_read(kd, nl[NLIST_DPCPU_OFF].n_value, dpcpu_off_buf, len) !=
279	    (ssize_t)len) {
280		free(dpcpu_off_buf);
281		return (-1);
282	}
283	kd->dpcpu_start = nl[NLIST_START_SET_PCPU].n_value;
284	kd->dpcpu_stop = nl[NLIST_STOP_SET_PCPU].n_value;
285	kd->dpcpu_maxcpus = dpcpu_maxcpus;
286	kd->dpcpu_off = dpcpu_off_buf;
287	kd->dpcpu_initialized = 1;
288	(void)_kvm_dpcpu_setcpu(kd, 0, 0);
289	return (0);
290}
291
292/*
293 * Check whether the dpcpu module has been initialized successfully or not,
294 * initialize it if permitted.
295 */
296int
297_kvm_dpcpu_initialized(kvm_t *kd, int intialize)
298{
299
300	if (kd->dpcpu_initialized || !intialize)
301		return (kd->dpcpu_initialized);
302
303	(void)_kvm_dpcpu_init(kd);
304
305	return (kd->dpcpu_initialized);
306}
307
308/*
309 * Check whether the value is within the dpcpu symbol range and only if so
310 * adjust the offset relative to the current offset.
311 */
312kvaddr_t
313_kvm_dpcpu_validaddr(kvm_t *kd, kvaddr_t value)
314{
315
316	if (value == 0)
317		return (value);
318
319	if (!kd->dpcpu_initialized)
320		return (value);
321
322	if (value < kd->dpcpu_start || value >= kd->dpcpu_stop)
323		return (value);
324
325	return (kd->dpcpu_curoff + value);
326}
327
328int
329kvm_dpcpu_setcpu(kvm_t *kd, u_int cpu)
330{
331	int ret;
332
333	if (!kd->dpcpu_initialized) {
334		ret = _kvm_dpcpu_init(kd);
335		if (ret != 0) {
336			_kvm_err(kd, kd->program, "%s: init failed",
337			    __func__);
338			return (ret);
339		}
340	}
341
342	return (_kvm_dpcpu_setcpu(kd, cpu, 1));
343}
344
345/*
346 * Obtain a per-CPU copy for given cpu from UMA_ZONE_PCPU allocation.
347 */
348ssize_t
349kvm_read_zpcpu(kvm_t *kd, u_long base, void *buf, size_t size, int cpu)
350{
351
352	if (!kvm_native(kd))
353		return (-1);
354	if (mp_ncpus == 0)
355		if (_kvm_pcpu_init(kd) < 0)
356			return (0);
357
358#ifdef __OFFSET_BY_PCPU
359	base += __pcpu;
360#endif
361	return (kvm_read(kd, (uintptr_t)(base + sizeof(struct pcpu) * cpu),
362	    buf, size));
363}
364
365/*
366 * Fetch value of a counter(9).
367 */
368uint64_t
369kvm_counter_u64_fetch(kvm_t *kd, u_long base)
370{
371	uint64_t r, c;
372
373	if (mp_ncpus == 0)
374		if (_kvm_pcpu_init(kd) < 0)
375			return (0);
376
377	r = 0;
378	for (int i = 0; i < mp_ncpus; i++) {
379		if (kvm_read_zpcpu(kd, base, &c, sizeof(c), i) != sizeof(c))
380			return (0);
381		r += c;
382	}
383
384	return (r);
385}
386