1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2005 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/param.h>
30#include <sys/malloc.h>
31#include <sys/sysctl.h>
32
33#include <err.h>
34#include <errno.h>
35#include <kvm.h>
36#include <nlist.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40
41#include "memstat.h"
42#include "memstat_internal.h"
43
44static int memstat_malloc_zone_count;
45static int memstat_malloc_zone_sizes[32];
46
47static int	memstat_malloc_zone_init(void);
48static int	memstat_malloc_zone_init_kvm(kvm_t *kvm);
49
50static struct nlist namelist[] = {
51#define	X_KMEMSTATISTICS	0
52	{ .n_name = "_kmemstatistics" },
53#define	X_KMEMZONES		1
54	{ .n_name = "_kmemzones" },
55#define	X_NUMZONES		2
56	{ .n_name = "_numzones" },
57#define	X_VM_MALLOC_ZONE_COUNT	3
58	{ .n_name = "_vm_malloc_zone_count" },
59#define	X_MP_MAXCPUS		4
60	{ .n_name = "_mp_maxcpus" },
61	{ .n_name = "" },
62};
63
64/*
65 * Extract malloc(9) statistics from the running kernel, and store all memory
66 * type information in the passed list.  For each type, check the list for an
67 * existing entry with the right name/allocator -- if present, update that
68 * entry.  Otherwise, add a new entry.  On error, the entire list will be
69 * cleared, as entries will be in an inconsistent state.
70 *
71 * To reduce the level of work for a list that starts empty, we keep around a
72 * hint as to whether it was empty when we began, so we can avoid searching
73 * the list for entries to update.  Updates are O(n^2) due to searching for
74 * each entry before adding it.
75 */
76int
77memstat_sysctl_malloc(struct memory_type_list *list, int flags)
78{
79	struct malloc_type_stream_header *mtshp;
80	struct malloc_type_header *mthp;
81	struct malloc_type_stats *mtsp;
82	struct memory_type *mtp;
83	int count, hint_dontsearch, i, j, maxcpus;
84	char *buffer, *p;
85	size_t size;
86
87	hint_dontsearch = LIST_EMPTY(&list->mtl_list);
88
89	/*
90	 * Query the number of CPUs, number of malloc types so that we can
91	 * guess an initial buffer size.  We loop until we succeed or really
92	 * fail.  Note that the value of maxcpus we query using sysctl is not
93	 * the version we use when processing the real data -- that is read
94	 * from the header.
95	 */
96retry:
97	size = sizeof(maxcpus);
98	if (sysctlbyname("kern.smp.maxcpus", &maxcpus, &size, NULL, 0) < 0) {
99		if (errno == EACCES || errno == EPERM)
100			list->mtl_error = MEMSTAT_ERROR_PERMISSION;
101		else
102			list->mtl_error = MEMSTAT_ERROR_DATAERROR;
103		return (-1);
104	}
105	if (size != sizeof(maxcpus)) {
106		list->mtl_error = MEMSTAT_ERROR_DATAERROR;
107		return (-1);
108	}
109
110	size = sizeof(count);
111	if (sysctlbyname("kern.malloc_count", &count, &size, NULL, 0) < 0) {
112		if (errno == EACCES || errno == EPERM)
113			list->mtl_error = MEMSTAT_ERROR_PERMISSION;
114		else
115			list->mtl_error = MEMSTAT_ERROR_VERSION;
116		return (-1);
117	}
118	if (size != sizeof(count)) {
119		list->mtl_error = MEMSTAT_ERROR_DATAERROR;
120		return (-1);
121	}
122
123	if (memstat_malloc_zone_init() == -1) {
124		list->mtl_error = MEMSTAT_ERROR_VERSION;
125		return (-1);
126	}
127
128	size = sizeof(*mthp) + count * (sizeof(*mthp) + sizeof(*mtsp) *
129	    maxcpus);
130
131	buffer = malloc(size);
132	if (buffer == NULL) {
133		list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
134		return (-1);
135	}
136
137	if (sysctlbyname("kern.malloc_stats", buffer, &size, NULL, 0) < 0) {
138		/*
139		 * XXXRW: ENOMEM is an ambiguous return, we should bound the
140		 * number of loops, perhaps.
141		 */
142		if (errno == ENOMEM) {
143			free(buffer);
144			goto retry;
145		}
146		if (errno == EACCES || errno == EPERM)
147			list->mtl_error = MEMSTAT_ERROR_PERMISSION;
148		else
149			list->mtl_error = MEMSTAT_ERROR_VERSION;
150		free(buffer);
151		return (-1);
152	}
153
154	if (size == 0) {
155		free(buffer);
156		return (0);
157	}
158
159	if (size < sizeof(*mtshp)) {
160		list->mtl_error = MEMSTAT_ERROR_VERSION;
161		free(buffer);
162		return (-1);
163	}
164	p = buffer;
165	mtshp = (struct malloc_type_stream_header *)p;
166	p += sizeof(*mtshp);
167
168	if (mtshp->mtsh_version != MALLOC_TYPE_STREAM_VERSION) {
169		list->mtl_error = MEMSTAT_ERROR_VERSION;
170		free(buffer);
171		return (-1);
172	}
173
174	/*
175	 * For the remainder of this function, we are quite trusting about
176	 * the layout of structures and sizes, since we've determined we have
177	 * a matching version and acceptable CPU count.
178	 */
179	maxcpus = mtshp->mtsh_maxcpus;
180	count = mtshp->mtsh_count;
181	for (i = 0; i < count; i++) {
182		mthp = (struct malloc_type_header *)p;
183		p += sizeof(*mthp);
184
185		if (hint_dontsearch == 0) {
186			mtp = memstat_mtl_find(list, ALLOCATOR_MALLOC,
187			    mthp->mth_name);
188		} else
189			mtp = NULL;
190		if (mtp == NULL)
191			mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC,
192			    mthp->mth_name, maxcpus);
193		if (mtp == NULL) {
194			_memstat_mtl_empty(list);
195			free(buffer);
196			list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
197			return (-1);
198		}
199
200		/*
201		 * Reset the statistics on a current node.
202		 */
203		_memstat_mt_reset_stats(mtp, maxcpus);
204
205		for (j = 0; j < maxcpus; j++) {
206			mtsp = (struct malloc_type_stats *)p;
207			p += sizeof(*mtsp);
208
209			/*
210			 * Sumarize raw statistics across CPUs into coalesced
211			 * statistics.
212			 */
213			mtp->mt_memalloced += mtsp->mts_memalloced;
214			mtp->mt_memfreed += mtsp->mts_memfreed;
215			mtp->mt_numallocs += mtsp->mts_numallocs;
216			mtp->mt_numfrees += mtsp->mts_numfrees;
217			mtp->mt_sizemask |= mtsp->mts_size;
218
219			/*
220			 * Copies of per-CPU statistics.
221			 */
222			mtp->mt_percpu_alloc[j].mtp_memalloced =
223			    mtsp->mts_memalloced;
224			mtp->mt_percpu_alloc[j].mtp_memfreed =
225			    mtsp->mts_memfreed;
226			mtp->mt_percpu_alloc[j].mtp_numallocs =
227			    mtsp->mts_numallocs;
228			mtp->mt_percpu_alloc[j].mtp_numfrees =
229			    mtsp->mts_numfrees;
230			mtp->mt_percpu_alloc[j].mtp_sizemask =
231			    mtsp->mts_size;
232		}
233
234		/*
235		 * Derived cross-CPU statistics.
236		 */
237		mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
238		mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
239	}
240
241	free(buffer);
242
243	return (0);
244}
245
246static int
247kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size,
248    size_t offset)
249{
250	ssize_t ret;
251
252	ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address,
253	    size);
254	if (ret < 0)
255		return (MEMSTAT_ERROR_KVM);
256	if ((size_t)ret != size)
257		return (MEMSTAT_ERROR_KVM_SHORTREAD);
258	return (0);
259}
260
261static int
262kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen)
263{
264	ssize_t ret;
265	int i;
266
267	for (i = 0; i < buflen; i++) {
268		ret = kvm_read(kvm, __DECONST(unsigned long, kvm_pointer) +
269		    i, &(buffer[i]), sizeof(char));
270		if (ret < 0)
271			return (MEMSTAT_ERROR_KVM);
272		if ((size_t)ret != sizeof(char))
273			return (MEMSTAT_ERROR_KVM_SHORTREAD);
274		if (buffer[i] == '\0')
275			return (0);
276	}
277	/* Truncate. */
278	buffer[i-1] = '\0';
279	return (0);
280}
281
282static int
283kread_symbol(kvm_t *kvm, int index, void *address, size_t size,
284    size_t offset)
285{
286	ssize_t ret;
287
288	ret = kvm_read(kvm, namelist[index].n_value + offset, address, size);
289	if (ret < 0)
290		return (MEMSTAT_ERROR_KVM);
291	if ((size_t)ret != size)
292		return (MEMSTAT_ERROR_KVM_SHORTREAD);
293	return (0);
294}
295
296static int
297kread_zpcpu(kvm_t *kvm, u_long base, void *buf, size_t size, int cpu)
298{
299	ssize_t ret;
300
301	ret = kvm_read_zpcpu(kvm, base, buf, size, cpu);
302	if (ret < 0)
303		return (MEMSTAT_ERROR_KVM);
304	if ((size_t)ret != size)
305		return (MEMSTAT_ERROR_KVM_SHORTREAD);
306	return (0);
307}
308
309int
310memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle)
311{
312	struct memory_type *mtp;
313	void *kmemstatistics;
314	int hint_dontsearch, j, mp_maxcpus, mp_ncpus, ret;
315	char name[MEMTYPE_MAXNAME];
316	struct malloc_type_stats mts;
317	struct malloc_type_internal *mtip;
318	struct malloc_type type, *typep;
319	kvm_t *kvm;
320
321	kvm = (kvm_t *)kvm_handle;
322
323	hint_dontsearch = LIST_EMPTY(&list->mtl_list);
324
325	if (kvm_nlist(kvm, namelist) != 0) {
326		list->mtl_error = MEMSTAT_ERROR_KVM;
327		return (-1);
328	}
329
330	if (namelist[X_KMEMSTATISTICS].n_type == 0 ||
331	    namelist[X_KMEMSTATISTICS].n_value == 0) {
332		list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL;
333		return (-1);
334	}
335
336	ret = kread_symbol(kvm, X_MP_MAXCPUS, &mp_maxcpus,
337	    sizeof(mp_maxcpus), 0);
338	if (ret != 0) {
339		list->mtl_error = ret;
340		return (-1);
341	}
342
343	ret = kread_symbol(kvm, X_KMEMSTATISTICS, &kmemstatistics,
344	    sizeof(kmemstatistics), 0);
345	if (ret != 0) {
346		list->mtl_error = ret;
347		return (-1);
348	}
349
350	ret = memstat_malloc_zone_init_kvm(kvm);
351	if (ret != 0) {
352		list->mtl_error = ret;
353		return (-1);
354	}
355
356	mp_ncpus = kvm_getncpus(kvm);
357
358	for (typep = kmemstatistics; typep != NULL; typep = type.ks_next) {
359		ret = kread(kvm, typep, &type, sizeof(type), 0);
360		if (ret != 0) {
361			_memstat_mtl_empty(list);
362			list->mtl_error = ret;
363			return (-1);
364		}
365		ret = kread_string(kvm, (void *)type.ks_shortdesc, name,
366		    MEMTYPE_MAXNAME);
367		if (ret != 0) {
368			_memstat_mtl_empty(list);
369			list->mtl_error = ret;
370			return (-1);
371		}
372		if (type.ks_version != M_VERSION) {
373			warnx("type %s with unsupported version %lu; skipped",
374			    name, type.ks_version);
375			continue;
376		}
377
378		/*
379		 * Since our compile-time value for MAXCPU may differ from the
380		 * kernel's, we populate our own array.
381		 */
382		mtip = &type.ks_mti;
383
384		if (hint_dontsearch == 0) {
385			mtp = memstat_mtl_find(list, ALLOCATOR_MALLOC, name);
386		} else
387			mtp = NULL;
388		if (mtp == NULL)
389			mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC,
390			    name, mp_maxcpus);
391		if (mtp == NULL) {
392			_memstat_mtl_empty(list);
393			list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
394			return (-1);
395		}
396
397		/*
398		 * This logic is replicated from kern_malloc.c, and should
399		 * be kept in sync.
400		 */
401		_memstat_mt_reset_stats(mtp, mp_maxcpus);
402		for (j = 0; j < mp_ncpus; j++) {
403			ret = kread_zpcpu(kvm, (u_long)mtip->mti_stats, &mts,
404			    sizeof(mts), j);
405			if (ret != 0) {
406				_memstat_mtl_empty(list);
407				list->mtl_error = ret;
408				return (-1);
409			}
410			mtp->mt_memalloced += mts.mts_memalloced;
411			mtp->mt_memfreed += mts.mts_memfreed;
412			mtp->mt_numallocs += mts.mts_numallocs;
413			mtp->mt_numfrees += mts.mts_numfrees;
414			mtp->mt_sizemask |= mts.mts_size;
415
416			mtp->mt_percpu_alloc[j].mtp_memalloced =
417			    mts.mts_memalloced;
418			mtp->mt_percpu_alloc[j].mtp_memfreed =
419			    mts.mts_memfreed;
420			mtp->mt_percpu_alloc[j].mtp_numallocs =
421			    mts.mts_numallocs;
422			mtp->mt_percpu_alloc[j].mtp_numfrees =
423			    mts.mts_numfrees;
424			mtp->mt_percpu_alloc[j].mtp_sizemask =
425			    mts.mts_size;
426		}
427		for (; j < mp_maxcpus; j++) {
428			bzero(&mtp->mt_percpu_alloc[j],
429			    sizeof(mtp->mt_percpu_alloc[0]));
430		}
431
432		mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
433		mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
434	}
435
436	return (0);
437}
438
439static int
440memstat_malloc_zone_init(void)
441{
442	size_t size;
443
444	size = sizeof(memstat_malloc_zone_count);
445	if (sysctlbyname("vm.malloc.zone_count", &memstat_malloc_zone_count,
446	    &size, NULL, 0) < 0) {
447		return (-1);
448	}
449
450	if (memstat_malloc_zone_count > (int)nitems(memstat_malloc_zone_sizes)) {
451		return (-1);
452	}
453
454	size = sizeof(memstat_malloc_zone_sizes);
455	if (sysctlbyname("vm.malloc.zone_sizes", &memstat_malloc_zone_sizes,
456	    &size, NULL, 0) < 0) {
457		return (-1);
458	}
459
460	return (0);
461}
462
463/*
464 * Copied from kern_malloc.c
465 *
466 * kz_zone is an array sized at compilation time, the size is exported in
467 * "numzones". Below we need to iterate kz_size.
468 */
469struct memstat_kmemzone {
470	int kz_size;
471	const char *kz_name;
472	void *kz_zone[1];
473};
474
475static int
476memstat_malloc_zone_init_kvm(kvm_t *kvm)
477{
478	struct memstat_kmemzone *kmemzones, *kz;
479	int numzones, objsize, allocsize, ret;
480	int i;
481
482	ret = kread_symbol(kvm, X_VM_MALLOC_ZONE_COUNT,
483	    &memstat_malloc_zone_count, sizeof(memstat_malloc_zone_count), 0);
484	if (ret != 0) {
485		return (ret);
486	}
487
488	ret = kread_symbol(kvm, X_NUMZONES, &numzones, sizeof(numzones), 0);
489	if (ret != 0) {
490		return (ret);
491	}
492
493	objsize = __offsetof(struct memstat_kmemzone, kz_zone) +
494	    sizeof(void *) * numzones;
495
496	allocsize = objsize * memstat_malloc_zone_count;
497	kmemzones = malloc(allocsize);
498	if (kmemzones == NULL) {
499		return (MEMSTAT_ERROR_NOMEMORY);
500	}
501	ret = kread_symbol(kvm, X_KMEMZONES, kmemzones, allocsize, 0);
502	if (ret != 0) {
503		free(kmemzones);
504		return (ret);
505	}
506
507	kz = kmemzones;
508	for (i = 0; i < (int)nitems(memstat_malloc_zone_sizes); i++) {
509		memstat_malloc_zone_sizes[i] = kz->kz_size;
510		kz = (struct memstat_kmemzone *)((char *)kz + objsize);
511	}
512
513	free(kmemzones);
514	return (0);
515}
516
517size_t
518memstat_malloc_zone_get_count(void)
519{
520
521	return (memstat_malloc_zone_count);
522}
523
524size_t
525memstat_malloc_zone_get_size(size_t n)
526{
527
528	if (n >= nitems(memstat_malloc_zone_sizes)) {
529		return (-1);
530	}
531
532	return (memstat_malloc_zone_sizes[n]);
533}
534
535int
536memstat_malloc_zone_used(const struct memory_type *mtp, size_t n)
537{
538
539	if (memstat_get_sizemask(mtp) & (1 << n))
540		return (1);
541
542	return (0);
543}
544