1/*
2 * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * Links to Illumos.org for more information on kstat function:
27 * [1] https://illumos.org/man/1M/kstat
28 * [2] https://illumos.org/man/9f/kstat_create
29 */
30
31#include <sys/types.h>
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/systm.h>
35#include <sys/malloc.h>
36#include <sys/sysctl.h>
37#include <sys/kstat.h>
38#include <sys/sbuf.h>
39#include <sys/zone.h>
40
41static MALLOC_DEFINE(M_KSTAT, "kstat_data", "Kernel statistics");
42
43SYSCTL_ROOT_NODE(OID_AUTO, kstat, CTLFLAG_RW, 0, "Kernel statistics");
44
45void
46__kstat_set_raw_ops(kstat_t *ksp,
47    int (*headers)(char *buf, size_t size),
48    int (*data)(char *buf, size_t size, void *data),
49    void *(*addr)(kstat_t *ksp, loff_t index))
50{
51	ksp->ks_raw_ops.headers = headers;
52	ksp->ks_raw_ops.data    = data;
53	ksp->ks_raw_ops.addr    = addr;
54}
55
56void
57__kstat_set_seq_raw_ops(kstat_t *ksp,
58    int (*headers)(struct seq_file *f),
59    int (*data)(char *buf, size_t size, void *data),
60    void *(*addr)(kstat_t *ksp, loff_t index))
61{
62	ksp->ks_raw_ops.seq_headers = headers;
63	ksp->ks_raw_ops.data    = data;
64	ksp->ks_raw_ops.addr    = addr;
65}
66
67static int
68kstat_default_update(kstat_t *ksp, int rw)
69{
70	ASSERT3P(ksp, !=, NULL);
71
72	if (rw == KSTAT_WRITE)
73		return (EACCES);
74
75	return (0);
76}
77
78static int
79kstat_resize_raw(kstat_t *ksp)
80{
81	if (ksp->ks_raw_bufsize == KSTAT_RAW_MAX)
82		return (ENOMEM);
83
84	free(ksp->ks_raw_buf, M_TEMP);
85	ksp->ks_raw_bufsize = MIN(ksp->ks_raw_bufsize * 2, KSTAT_RAW_MAX);
86	ksp->ks_raw_buf = malloc(ksp->ks_raw_bufsize, M_TEMP, M_WAITOK);
87
88	return (0);
89}
90
91static void *
92kstat_raw_default_addr(kstat_t *ksp, loff_t n)
93{
94	if (n == 0)
95		return (ksp->ks_data);
96	return (NULL);
97}
98
99static int
100kstat_sysctl(SYSCTL_HANDLER_ARGS)
101{
102	kstat_t *ksp = arg1;
103	kstat_named_t *ksent;
104	uint64_t val;
105
106	ksent = ksp->ks_data;
107	/* Select the correct element */
108	ksent += arg2;
109	/* Update the aggsums before reading */
110	(void) ksp->ks_update(ksp, KSTAT_READ);
111	val = ksent->value.ui64;
112
113	return (sysctl_handle_64(oidp, &val, 0, req));
114}
115
116static int
117kstat_sysctl_string(SYSCTL_HANDLER_ARGS)
118{
119	kstat_t *ksp = arg1;
120	kstat_named_t *ksent = ksp->ks_data;
121	char *val;
122	uint32_t len = 0;
123
124	/* Select the correct element */
125	ksent += arg2;
126	/* Update the aggsums before reading */
127	(void) ksp->ks_update(ksp, KSTAT_READ);
128	val = KSTAT_NAMED_STR_PTR(ksent);
129	len = KSTAT_NAMED_STR_BUFLEN(ksent);
130	val[len-1] = '\0';
131
132	return (sysctl_handle_string(oidp, val, len, req));
133}
134
135static int
136kstat_sysctl_dataset(SYSCTL_HANDLER_ARGS)
137{
138	kstat_t *ksp = arg1;
139	kstat_named_t *ksent;
140	kstat_named_t *ksent_ds;
141	uint64_t val;
142	char *ds_name;
143	uint32_t ds_len = 0;
144
145	ksent_ds = ksent = ksp->ks_data;
146	ds_name = KSTAT_NAMED_STR_PTR(ksent_ds);
147	ds_len = KSTAT_NAMED_STR_BUFLEN(ksent_ds);
148	ds_name[ds_len-1] = '\0';
149
150	if (!zone_dataset_visible(ds_name, NULL)) {
151		return (EPERM);
152	}
153
154	/* Select the correct element */
155	ksent += arg2;
156	/* Update the aggsums before reading */
157	(void) ksp->ks_update(ksp, KSTAT_READ);
158	val = ksent->value.ui64;
159
160	return (sysctl_handle_64(oidp, &val, 0, req));
161}
162
163static int
164kstat_sysctl_dataset_string(SYSCTL_HANDLER_ARGS)
165{
166	kstat_t *ksp = arg1;
167	kstat_named_t *ksent = ksp->ks_data;
168	char *val;
169	uint32_t len = 0;
170
171	/* Select the correct element */
172	ksent += arg2;
173	val = KSTAT_NAMED_STR_PTR(ksent);
174	len = KSTAT_NAMED_STR_BUFLEN(ksent);
175	val[len-1] = '\0';
176
177	if (!zone_dataset_visible(val, NULL)) {
178		return (EPERM);
179	}
180
181	return (sysctl_handle_string(oidp, val, len, req));
182}
183
184static int
185kstat_sysctl_io(SYSCTL_HANDLER_ARGS)
186{
187	struct sbuf sb;
188	kstat_t *ksp = arg1;
189	kstat_io_t *kip = ksp->ks_data;
190	int rc;
191
192	sbuf_new_for_sysctl(&sb, NULL, 0, req);
193
194	/* Update the aggsums before reading */
195	(void) ksp->ks_update(ksp, KSTAT_READ);
196
197	/* though wlentime & friends are signed, they will never be negative */
198	sbuf_printf(&sb,
199	    "%-8llu %-8llu %-8u %-8u %-8llu %-8llu "
200	    "%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n",
201	    kip->nread, kip->nwritten,
202	    kip->reads, kip->writes,
203	    kip->wtime, kip->wlentime, kip->wlastupdate,
204	    kip->rtime, kip->rlentime, kip->rlastupdate,
205	    kip->wcnt,  kip->rcnt);
206	rc = sbuf_finish(&sb);
207	sbuf_delete(&sb);
208	return (rc);
209}
210
211static int
212kstat_sysctl_raw(SYSCTL_HANDLER_ARGS)
213{
214	struct sbuf sb;
215	void *data;
216	kstat_t *ksp = arg1;
217	void *(*addr_op)(kstat_t *ksp, loff_t index);
218	int n, has_header, rc = 0;
219
220	sbuf_new_for_sysctl(&sb, NULL, PAGE_SIZE, req);
221
222	if (ksp->ks_raw_ops.addr)
223		addr_op = ksp->ks_raw_ops.addr;
224	else
225		addr_op = kstat_raw_default_addr;
226
227	mutex_enter(ksp->ks_lock);
228
229	/* Update the aggsums before reading */
230	(void) ksp->ks_update(ksp, KSTAT_READ);
231
232	ksp->ks_raw_bufsize = PAGE_SIZE;
233	ksp->ks_raw_buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
234
235	n = 0;
236	has_header = (ksp->ks_raw_ops.headers ||
237	    ksp->ks_raw_ops.seq_headers);
238
239restart_headers:
240	if (ksp->ks_raw_ops.headers) {
241		rc = ksp->ks_raw_ops.headers(
242		    ksp->ks_raw_buf, ksp->ks_raw_bufsize);
243	} else if (ksp->ks_raw_ops.seq_headers) {
244		struct seq_file f;
245
246		f.sf_buf = ksp->ks_raw_buf;
247		f.sf_size = ksp->ks_raw_bufsize;
248		rc = ksp->ks_raw_ops.seq_headers(&f);
249	}
250	if (has_header) {
251		if (rc == ENOMEM && !kstat_resize_raw(ksp))
252			goto restart_headers;
253		if (rc == 0) {
254			sbuf_cat(&sb, "\n");
255			sbuf_cat(&sb, ksp->ks_raw_buf);
256		}
257	}
258
259	while ((data = addr_op(ksp, n)) != NULL) {
260restart:
261		if (ksp->ks_raw_ops.data) {
262			rc = ksp->ks_raw_ops.data(ksp->ks_raw_buf,
263			    ksp->ks_raw_bufsize, data);
264			if (rc == ENOMEM && !kstat_resize_raw(ksp))
265				goto restart;
266			if (rc == 0)
267				sbuf_cat(&sb, ksp->ks_raw_buf);
268
269		} else {
270			ASSERT3U(ksp->ks_ndata, ==, 1);
271			sbuf_hexdump(&sb, ksp->ks_data,
272			    ksp->ks_data_size, NULL, 0);
273		}
274		n++;
275	}
276	free(ksp->ks_raw_buf, M_TEMP);
277	mutex_exit(ksp->ks_lock);
278	rc = sbuf_finish(&sb);
279	sbuf_delete(&sb);
280	return (rc);
281}
282
283kstat_t *
284__kstat_create(const char *module, int instance, const char *name,
285    const char *class, uchar_t ks_type, uint_t ks_ndata, uchar_t flags)
286{
287	char buf[KSTAT_STRLEN];
288	struct sysctl_oid *root;
289	kstat_t *ksp;
290	char *pool;
291
292	KASSERT(instance == 0, ("instance=%d", instance));
293	if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
294		ASSERT3U(ks_ndata, ==, 1);
295
296	if (class == NULL)
297		class = "misc";
298
299	/*
300	 * Allocate the main structure. We don't need to keep a copy of
301	 * module in here, because it is only used for sysctl node creation
302	 * done in this function.
303	 */
304	ksp = malloc(sizeof (*ksp), M_KSTAT, M_WAITOK|M_ZERO);
305
306	ksp->ks_crtime = gethrtime();
307	ksp->ks_snaptime = ksp->ks_crtime;
308	ksp->ks_instance = instance;
309	(void) strlcpy(ksp->ks_name, name, KSTAT_STRLEN);
310	(void) strlcpy(ksp->ks_class, class, KSTAT_STRLEN);
311	ksp->ks_type = ks_type;
312	ksp->ks_flags = flags;
313	ksp->ks_update = kstat_default_update;
314
315	mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL);
316	ksp->ks_lock = &ksp->ks_private_lock;
317
318	switch (ksp->ks_type) {
319	case KSTAT_TYPE_RAW:
320		ksp->ks_ndata = 1;
321		ksp->ks_data_size = ks_ndata;
322		break;
323	case KSTAT_TYPE_NAMED:
324		ksp->ks_ndata = ks_ndata;
325		ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t);
326		break;
327	case KSTAT_TYPE_INTR:
328		ksp->ks_ndata = ks_ndata;
329		ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t);
330		break;
331	case KSTAT_TYPE_IO:
332		ksp->ks_ndata = ks_ndata;
333		ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t);
334		break;
335	case KSTAT_TYPE_TIMER:
336		ksp->ks_ndata = ks_ndata;
337		ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t);
338		break;
339	default:
340		panic("Undefined kstat type %d\n", ksp->ks_type);
341	}
342
343	if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL)
344		ksp->ks_data = NULL;
345	else
346		ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
347
348	/*
349	 * Some kstats use a module name like "zfs/poolname" to distinguish a
350	 * set of kstats belonging to a specific pool.  Split on '/' to add an
351	 * extra node for the pool name if needed.
352	 */
353	(void) strlcpy(buf, module, KSTAT_STRLEN);
354	module = buf;
355	pool = strchr(module, '/');
356	if (pool != NULL)
357		*pool++ = '\0';
358
359	/*
360	 * Create sysctl tree for those statistics:
361	 *
362	 *	kstat.<module>[.<pool>].<class>.<name>
363	 */
364	sysctl_ctx_init(&ksp->ks_sysctl_ctx);
365	root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
366	    SYSCTL_STATIC_CHILDREN(_kstat), OID_AUTO, module, CTLFLAG_RW, 0,
367	    "");
368	if (root == NULL) {
369		printf("%s: Cannot create kstat.%s tree!\n", __func__, module);
370		sysctl_ctx_free(&ksp->ks_sysctl_ctx);
371		free(ksp, M_KSTAT);
372		return (NULL);
373	}
374	if (pool != NULL) {
375		root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
376		    SYSCTL_CHILDREN(root), OID_AUTO, pool, CTLFLAG_RW, 0, "");
377		if (root == NULL) {
378			printf("%s: Cannot create kstat.%s.%s tree!\n",
379			    __func__, module, pool);
380			sysctl_ctx_free(&ksp->ks_sysctl_ctx);
381			free(ksp, M_KSTAT);
382			return (NULL);
383		}
384	}
385	root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
386	    OID_AUTO, class, CTLFLAG_RW, 0, "");
387	if (root == NULL) {
388		if (pool != NULL)
389			printf("%s: Cannot create kstat.%s.%s.%s tree!\n",
390			    __func__, module, pool, class);
391		else
392			printf("%s: Cannot create kstat.%s.%s tree!\n",
393			    __func__, module, class);
394		sysctl_ctx_free(&ksp->ks_sysctl_ctx);
395		free(ksp, M_KSTAT);
396		return (NULL);
397	}
398	if (ksp->ks_type == KSTAT_TYPE_NAMED) {
399		root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
400		    SYSCTL_CHILDREN(root),
401		    OID_AUTO, name, CTLFLAG_RW, 0, "");
402		if (root == NULL) {
403			if (pool != NULL)
404				printf("%s: Cannot create kstat.%s.%s.%s.%s "
405				    "tree!\n", __func__, module, pool, class,
406				    name);
407			else
408				printf("%s: Cannot create kstat.%s.%s.%s "
409				    "tree!\n", __func__, module, class, name);
410			sysctl_ctx_free(&ksp->ks_sysctl_ctx);
411			free(ksp, M_KSTAT);
412			return (NULL);
413		}
414
415	}
416	ksp->ks_sysctl_root = root;
417
418	return (ksp);
419}
420
421static void
422kstat_install_named(kstat_t *ksp)
423{
424	kstat_named_t *ksent;
425	char *namelast;
426	int typelast;
427
428	ksent = ksp->ks_data;
429
430	VERIFY((ksp->ks_flags & KSTAT_FLAG_VIRTUAL) || ksent != NULL);
431
432	typelast = 0;
433	namelast = NULL;
434
435	for (int i = 0; i < ksp->ks_ndata; i++, ksent++) {
436		if (ksent->data_type != 0) {
437			typelast = ksent->data_type;
438			namelast = ksent->name;
439		}
440		switch (typelast) {
441		case KSTAT_DATA_CHAR:
442			/* Not Implemented */
443			break;
444		case KSTAT_DATA_INT32:
445			SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
446			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
447			    OID_AUTO, namelast,
448			    CTLTYPE_S32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
449			    ksp, i, kstat_sysctl, "I", namelast);
450			break;
451		case KSTAT_DATA_UINT32:
452			SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
453			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
454			    OID_AUTO, namelast,
455			    CTLTYPE_U32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
456			    ksp, i, kstat_sysctl, "IU", namelast);
457			break;
458		case KSTAT_DATA_INT64:
459			SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
460			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
461			    OID_AUTO, namelast,
462			    CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
463			    ksp, i, kstat_sysctl, "Q", namelast);
464			break;
465		case KSTAT_DATA_UINT64:
466			if (strcmp(ksp->ks_class, "dataset") == 0) {
467				SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
468				    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
469				    OID_AUTO, namelast,
470				    CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
471				    ksp, i, kstat_sysctl_dataset, "QU",
472				    namelast);
473			} else {
474				SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
475				    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
476				    OID_AUTO, namelast,
477				    CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
478				    ksp, i, kstat_sysctl, "QU", namelast);
479			}
480			break;
481		case KSTAT_DATA_LONG:
482			SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
483			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
484			    OID_AUTO, namelast,
485			    CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
486			    ksp, i, kstat_sysctl, "L", namelast);
487			break;
488		case KSTAT_DATA_ULONG:
489			SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
490			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
491			    OID_AUTO, namelast,
492			    CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
493			    ksp, i, kstat_sysctl, "LU", namelast);
494			break;
495		case KSTAT_DATA_STRING:
496			if (strcmp(ksp->ks_class, "dataset") == 0) {
497				SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
498				    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
499				    OID_AUTO, namelast, CTLTYPE_STRING |
500				    CTLFLAG_RD | CTLFLAG_MPSAFE,
501				    ksp, i, kstat_sysctl_dataset_string, "A",
502				    namelast);
503			} else {
504				SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
505				    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
506				    OID_AUTO, namelast, CTLTYPE_STRING |
507				    CTLFLAG_RD | CTLFLAG_MPSAFE,
508				    ksp, i, kstat_sysctl_string, "A",
509				    namelast);
510			}
511			break;
512		default:
513			panic("unsupported type: %d", typelast);
514		}
515	}
516}
517
518void
519kstat_install(kstat_t *ksp)
520{
521	struct sysctl_oid *root;
522
523	if (ksp->ks_ndata == UINT32_MAX)
524		VERIFY3U(ksp->ks_type, ==, KSTAT_TYPE_RAW);
525
526	switch (ksp->ks_type) {
527	case KSTAT_TYPE_NAMED:
528		return (kstat_install_named(ksp));
529	case KSTAT_TYPE_RAW:
530		if (ksp->ks_raw_ops.data) {
531			root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
532			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
533			    OID_AUTO, ksp->ks_name, CTLTYPE_STRING | CTLFLAG_RD
534			    | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
535			    ksp, 0, kstat_sysctl_raw, "A", ksp->ks_name);
536		} else {
537			root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
538			    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
539			    OID_AUTO, ksp->ks_name, CTLTYPE_OPAQUE | CTLFLAG_RD
540			    | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
541			    ksp, 0, kstat_sysctl_raw, "", ksp->ks_name);
542		}
543		break;
544	case KSTAT_TYPE_IO:
545		root = SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
546		    SYSCTL_CHILDREN(ksp->ks_sysctl_root),
547		    OID_AUTO, ksp->ks_name,
548		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
549		    ksp, 0, kstat_sysctl_io, "A", ksp->ks_name);
550		break;
551	case KSTAT_TYPE_TIMER:
552	case KSTAT_TYPE_INTR:
553	default:
554		panic("unsupported kstat type %d\n", ksp->ks_type);
555	}
556	VERIFY3P(root, !=, NULL);
557	ksp->ks_sysctl_root = root;
558}
559
560void
561kstat_delete(kstat_t *ksp)
562{
563
564	sysctl_ctx_free(&ksp->ks_sysctl_ctx);
565	ksp->ks_lock = NULL;
566	mutex_destroy(&ksp->ks_private_lock);
567	if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
568		kmem_free(ksp->ks_data, ksp->ks_data_size);
569	free(ksp, M_KSTAT);
570}
571