1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2005 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD$
29 */
30
31#include <sys/cdefs.h>
32#include <sys/param.h>
33#include <sys/malloc.h>
34#include <sys/sysctl.h>
35
36#include <err.h>
37#include <errno.h>
38#include <kvm.h>
39#include <nlist.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43
44#include "memstat.h"
45#include "memstat_internal.h"
46
47static struct nlist namelist[] = {
48#define	X_KMEMSTATISTICS	0
49	{ .n_name = "_kmemstatistics" },
50#define	X_MP_MAXCPUS		1
51	{ .n_name = "_mp_maxcpus" },
52	{ .n_name = "" },
53};
54
55/*
56 * Extract malloc(9) statistics from the running kernel, and store all memory
57 * type information in the passed list.  For each type, check the list for an
58 * existing entry with the right name/allocator -- if present, update that
59 * entry.  Otherwise, add a new entry.  On error, the entire list will be
60 * cleared, as entries will be in an inconsistent state.
61 *
62 * To reduce the level of work for a list that starts empty, we keep around a
63 * hint as to whether it was empty when we began, so we can avoid searching
64 * the list for entries to update.  Updates are O(n^2) due to searching for
65 * each entry before adding it.
66 */
67int
68memstat_sysctl_malloc(struct memory_type_list *list, int flags)
69{
70	struct malloc_type_stream_header *mtshp;
71	struct malloc_type_header *mthp;
72	struct malloc_type_stats *mtsp;
73	struct memory_type *mtp;
74	int count, hint_dontsearch, i, j, maxcpus;
75	char *buffer, *p;
76	size_t size;
77
78	hint_dontsearch = LIST_EMPTY(&list->mtl_list);
79
80	/*
81	 * Query the number of CPUs, number of malloc types so that we can
82	 * guess an initial buffer size.  We loop until we succeed or really
83	 * fail.  Note that the value of maxcpus we query using sysctl is not
84	 * the version we use when processing the real data -- that is read
85	 * from the header.
86	 */
87retry:
88	size = sizeof(maxcpus);
89	if (sysctlbyname("kern.smp.maxcpus", &maxcpus, &size, NULL, 0) < 0) {
90		if (errno == EACCES || errno == EPERM)
91			list->mtl_error = MEMSTAT_ERROR_PERMISSION;
92		else
93			list->mtl_error = MEMSTAT_ERROR_DATAERROR;
94		return (-1);
95	}
96	if (size != sizeof(maxcpus)) {
97		list->mtl_error = MEMSTAT_ERROR_DATAERROR;
98		return (-1);
99	}
100
101	size = sizeof(count);
102	if (sysctlbyname("kern.malloc_count", &count, &size, NULL, 0) < 0) {
103		if (errno == EACCES || errno == EPERM)
104			list->mtl_error = MEMSTAT_ERROR_PERMISSION;
105		else
106			list->mtl_error = MEMSTAT_ERROR_VERSION;
107		return (-1);
108	}
109	if (size != sizeof(count)) {
110		list->mtl_error = MEMSTAT_ERROR_DATAERROR;
111		return (-1);
112	}
113
114	size = sizeof(*mthp) + count * (sizeof(*mthp) + sizeof(*mtsp) *
115	    maxcpus);
116
117	buffer = malloc(size);
118	if (buffer == NULL) {
119		list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
120		return (-1);
121	}
122
123	if (sysctlbyname("kern.malloc_stats", buffer, &size, NULL, 0) < 0) {
124		/*
125		 * XXXRW: ENOMEM is an ambiguous return, we should bound the
126		 * number of loops, perhaps.
127		 */
128		if (errno == ENOMEM) {
129			free(buffer);
130			goto retry;
131		}
132		if (errno == EACCES || errno == EPERM)
133			list->mtl_error = MEMSTAT_ERROR_PERMISSION;
134		else
135			list->mtl_error = MEMSTAT_ERROR_VERSION;
136		free(buffer);
137		return (-1);
138	}
139
140	if (size == 0) {
141		free(buffer);
142		return (0);
143	}
144
145	if (size < sizeof(*mtshp)) {
146		list->mtl_error = MEMSTAT_ERROR_VERSION;
147		free(buffer);
148		return (-1);
149	}
150	p = buffer;
151	mtshp = (struct malloc_type_stream_header *)p;
152	p += sizeof(*mtshp);
153
154	if (mtshp->mtsh_version != MALLOC_TYPE_STREAM_VERSION) {
155		list->mtl_error = MEMSTAT_ERROR_VERSION;
156		free(buffer);
157		return (-1);
158	}
159
160	/*
161	 * For the remainder of this function, we are quite trusting about
162	 * the layout of structures and sizes, since we've determined we have
163	 * a matching version and acceptable CPU count.
164	 */
165	maxcpus = mtshp->mtsh_maxcpus;
166	count = mtshp->mtsh_count;
167	for (i = 0; i < count; i++) {
168		mthp = (struct malloc_type_header *)p;
169		p += sizeof(*mthp);
170
171		if (hint_dontsearch == 0) {
172			mtp = memstat_mtl_find(list, ALLOCATOR_MALLOC,
173			    mthp->mth_name);
174		} else
175			mtp = NULL;
176		if (mtp == NULL)
177			mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC,
178			    mthp->mth_name, maxcpus);
179		if (mtp == NULL) {
180			_memstat_mtl_empty(list);
181			free(buffer);
182			list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
183			return (-1);
184		}
185
186		/*
187		 * Reset the statistics on a current node.
188		 */
189		_memstat_mt_reset_stats(mtp, maxcpus);
190
191		for (j = 0; j < maxcpus; j++) {
192			mtsp = (struct malloc_type_stats *)p;
193			p += sizeof(*mtsp);
194
195			/*
196			 * Sumarize raw statistics across CPUs into coalesced
197			 * statistics.
198			 */
199			mtp->mt_memalloced += mtsp->mts_memalloced;
200			mtp->mt_memfreed += mtsp->mts_memfreed;
201			mtp->mt_numallocs += mtsp->mts_numallocs;
202			mtp->mt_numfrees += mtsp->mts_numfrees;
203			mtp->mt_sizemask |= mtsp->mts_size;
204
205			/*
206			 * Copies of per-CPU statistics.
207			 */
208			mtp->mt_percpu_alloc[j].mtp_memalloced =
209			    mtsp->mts_memalloced;
210			mtp->mt_percpu_alloc[j].mtp_memfreed =
211			    mtsp->mts_memfreed;
212			mtp->mt_percpu_alloc[j].mtp_numallocs =
213			    mtsp->mts_numallocs;
214			mtp->mt_percpu_alloc[j].mtp_numfrees =
215			    mtsp->mts_numfrees;
216			mtp->mt_percpu_alloc[j].mtp_sizemask =
217			    mtsp->mts_size;
218		}
219
220		/*
221		 * Derived cross-CPU statistics.
222		 */
223		mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
224		mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
225	}
226
227	free(buffer);
228
229	return (0);
230}
231
232static int
233kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size,
234    size_t offset)
235{
236	ssize_t ret;
237
238	ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address,
239	    size);
240	if (ret < 0)
241		return (MEMSTAT_ERROR_KVM);
242	if ((size_t)ret != size)
243		return (MEMSTAT_ERROR_KVM_SHORTREAD);
244	return (0);
245}
246
247static int
248kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen)
249{
250	ssize_t ret;
251	int i;
252
253	for (i = 0; i < buflen; i++) {
254		ret = kvm_read(kvm, __DECONST(unsigned long, kvm_pointer) +
255		    i, &(buffer[i]), sizeof(char));
256		if (ret < 0)
257			return (MEMSTAT_ERROR_KVM);
258		if ((size_t)ret != sizeof(char))
259			return (MEMSTAT_ERROR_KVM_SHORTREAD);
260		if (buffer[i] == '\0')
261			return (0);
262	}
263	/* Truncate. */
264	buffer[i-1] = '\0';
265	return (0);
266}
267
268static int
269kread_symbol(kvm_t *kvm, int index, void *address, size_t size,
270    size_t offset)
271{
272	ssize_t ret;
273
274	ret = kvm_read(kvm, namelist[index].n_value + offset, address, size);
275	if (ret < 0)
276		return (MEMSTAT_ERROR_KVM);
277	if ((size_t)ret != size)
278		return (MEMSTAT_ERROR_KVM_SHORTREAD);
279	return (0);
280}
281
282static int
283kread_zpcpu(kvm_t *kvm, u_long base, void *buf, size_t size, int cpu)
284{
285	ssize_t ret;
286
287	ret = kvm_read_zpcpu(kvm, base, buf, size, cpu);
288	if (ret < 0)
289		return (MEMSTAT_ERROR_KVM);
290	if ((size_t)ret != size)
291		return (MEMSTAT_ERROR_KVM_SHORTREAD);
292	return (0);
293}
294
295int
296memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle)
297{
298	struct memory_type *mtp;
299	void *kmemstatistics;
300	int hint_dontsearch, j, mp_maxcpus, mp_ncpus, ret;
301	char name[MEMTYPE_MAXNAME];
302	struct malloc_type_stats mts;
303	struct malloc_type_internal mti, *mtip;
304	struct malloc_type type, *typep;
305	kvm_t *kvm;
306
307	kvm = (kvm_t *)kvm_handle;
308
309	hint_dontsearch = LIST_EMPTY(&list->mtl_list);
310
311	if (kvm_nlist(kvm, namelist) != 0) {
312		list->mtl_error = MEMSTAT_ERROR_KVM;
313		return (-1);
314	}
315
316	if (namelist[X_KMEMSTATISTICS].n_type == 0 ||
317	    namelist[X_KMEMSTATISTICS].n_value == 0) {
318		list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL;
319		return (-1);
320	}
321
322	ret = kread_symbol(kvm, X_MP_MAXCPUS, &mp_maxcpus,
323	    sizeof(mp_maxcpus), 0);
324	if (ret != 0) {
325		list->mtl_error = ret;
326		return (-1);
327	}
328
329	ret = kread_symbol(kvm, X_KMEMSTATISTICS, &kmemstatistics,
330	    sizeof(kmemstatistics), 0);
331	if (ret != 0) {
332		list->mtl_error = ret;
333		return (-1);
334	}
335
336	mp_ncpus = kvm_getncpus(kvm);
337
338	for (typep = kmemstatistics; typep != NULL; typep = type.ks_next) {
339		ret = kread(kvm, typep, &type, sizeof(type), 0);
340		if (ret != 0) {
341			_memstat_mtl_empty(list);
342			list->mtl_error = ret;
343			return (-1);
344		}
345		ret = kread_string(kvm, (void *)type.ks_shortdesc, name,
346		    MEMTYPE_MAXNAME);
347		if (ret != 0) {
348			_memstat_mtl_empty(list);
349			list->mtl_error = ret;
350			return (-1);
351		}
352
353		/*
354		 * Since our compile-time value for MAXCPU may differ from the
355		 * kernel's, we populate our own array.
356		 */
357		mtip = type.ks_handle;
358		ret = kread(kvm, mtip, &mti, sizeof(mti), 0);
359		if (ret != 0) {
360			_memstat_mtl_empty(list);
361			list->mtl_error = ret;
362			return (-1);
363		}
364
365		if (hint_dontsearch == 0) {
366			mtp = memstat_mtl_find(list, ALLOCATOR_MALLOC, name);
367		} else
368			mtp = NULL;
369		if (mtp == NULL)
370			mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC,
371			    name, mp_maxcpus);
372		if (mtp == NULL) {
373			_memstat_mtl_empty(list);
374			list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
375			return (-1);
376		}
377
378		/*
379		 * This logic is replicated from kern_malloc.c, and should
380		 * be kept in sync.
381		 */
382		_memstat_mt_reset_stats(mtp, mp_maxcpus);
383		for (j = 0; j < mp_ncpus; j++) {
384			ret = kread_zpcpu(kvm, (u_long)mti.mti_stats, &mts,
385			    sizeof(mts), j);
386			if (ret != 0) {
387				_memstat_mtl_empty(list);
388				list->mtl_error = ret;
389				return (-1);
390			}
391			mtp->mt_memalloced += mts.mts_memalloced;
392			mtp->mt_memfreed += mts.mts_memfreed;
393			mtp->mt_numallocs += mts.mts_numallocs;
394			mtp->mt_numfrees += mts.mts_numfrees;
395			mtp->mt_sizemask |= mts.mts_size;
396
397			mtp->mt_percpu_alloc[j].mtp_memalloced =
398			    mts.mts_memalloced;
399			mtp->mt_percpu_alloc[j].mtp_memfreed =
400			    mts.mts_memfreed;
401			mtp->mt_percpu_alloc[j].mtp_numallocs =
402			    mts.mts_numallocs;
403			mtp->mt_percpu_alloc[j].mtp_numfrees =
404			    mts.mts_numfrees;
405			mtp->mt_percpu_alloc[j].mtp_sizemask =
406			    mts.mts_size;
407		}
408		for (; j < mp_maxcpus; j++) {
409			bzero(&mtp->mt_percpu_alloc[j],
410			    sizeof(mtp->mt_percpu_alloc[0]));
411		}
412
413		mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
414		mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
415	}
416
417	return (0);
418}
419