kern_sysctl.c revision 194368
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Mike Karels at Berkeley Software Design, Inc.
7 *
8 * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD
9 * project, to make these variables more userfriendly.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)kern_sysctl.c	8.4 (Berkeley) 4/14/94
36 */
37
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD: head/sys/kern/kern_sysctl.c 194368 2009-06-17 15:01:01Z bz $");
40
41#include "opt_compat.h"
42#include "opt_ktrace.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/sysctl.h>
48#include <sys/malloc.h>
49#include <sys/priv.h>
50#include <sys/proc.h>
51#include <sys/jail.h>
52#include <sys/lock.h>
53#include <sys/mutex.h>
54#include <sys/sx.h>
55#include <sys/sysproto.h>
56#include <sys/uio.h>
57#include <sys/vimage.h>
58#ifdef KTRACE
59#include <sys/ktrace.h>
60#endif
61
62#include <security/mac/mac_framework.h>
63
64#include <vm/vm.h>
65#include <vm/vm_extern.h>
66
67static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic");
68static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids");
69static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer");
70
71/*
72 * The sysctllock protects the MIB tree.  It also protects sysctl
73 * contexts used with dynamic sysctls.  The sysctl_register_oid() and
74 * sysctl_unregister_oid() routines require the sysctllock to already
75 * be held, so the sysctl_lock() and sysctl_unlock() routines are
76 * provided for the few places in the kernel which need to use that
77 * API rather than using the dynamic API.  Use of the dynamic API is
78 * strongly encouraged for most code.
79 *
80 * The sysctlmemlock is used to limit the amount of user memory wired for
81 * sysctl requests.  This is implemented by serializing any userland
82 * sysctl requests larger than a single page via an exclusive lock.
83 */
84static struct sx sysctllock;
85static struct sx sysctlmemlock;
86
87#define	SYSCTL_SLOCK()		sx_slock(&sysctllock)
88#define	SYSCTL_SUNLOCK()	sx_sunlock(&sysctllock)
89#define	SYSCTL_XLOCK()		sx_xlock(&sysctllock)
90#define	SYSCTL_XUNLOCK()	sx_xunlock(&sysctllock)
91#define	SYSCTL_ASSERT_XLOCKED()	sx_assert(&sysctllock, SA_XLOCKED)
92#define	SYSCTL_ASSERT_LOCKED()	sx_assert(&sysctllock, SA_LOCKED)
93#define	SYSCTL_INIT()		sx_init(&sysctllock, "sysctl lock")
94
95static int sysctl_root(SYSCTL_HANDLER_ARGS);
96
97struct sysctl_oid_list sysctl__children; /* root list */
98
99static int	sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del,
100		    int recurse);
101
102static struct sysctl_oid *
103sysctl_find_oidname(const char *name, struct sysctl_oid_list *list)
104{
105	struct sysctl_oid *oidp;
106
107	SYSCTL_ASSERT_LOCKED();
108	SLIST_FOREACH(oidp, list, oid_link) {
109		if (strcmp(oidp->oid_name, name) == 0) {
110			return (oidp);
111		}
112	}
113	return (NULL);
114}
115
116/*
117 * Initialization of the MIB tree.
118 *
119 * Order by number in each list.
120 */
121void
122sysctl_lock(void)
123{
124
125	SYSCTL_XLOCK();
126}
127
128void
129sysctl_unlock(void)
130{
131
132	SYSCTL_XUNLOCK();
133}
134
135void
136sysctl_register_oid(struct sysctl_oid *oidp)
137{
138	struct sysctl_oid_list *parent = oidp->oid_parent;
139	struct sysctl_oid *p;
140	struct sysctl_oid *q;
141
142	/*
143	 * First check if another oid with the same name already
144	 * exists in the parent's list.
145	 */
146	SYSCTL_ASSERT_XLOCKED();
147	p = sysctl_find_oidname(oidp->oid_name, parent);
148	if (p != NULL) {
149		if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
150			p->oid_refcnt++;
151			return;
152		} else {
153			printf("can't re-use a leaf (%s)!\n", p->oid_name);
154			return;
155		}
156	}
157	/*
158	 * If this oid has a number OID_AUTO, give it a number which
159	 * is greater than any current oid.
160	 * NOTE: DO NOT change the starting value here, change it in
161	 * <sys/sysctl.h>, and make sure it is at least 256 to
162	 * accomodate e.g. net.inet.raw as a static sysctl node.
163	 */
164	if (oidp->oid_number == OID_AUTO) {
165		static int newoid = CTL_AUTO_START;
166
167		oidp->oid_number = newoid++;
168		if (newoid == 0x7fffffff)
169			panic("out of oids");
170	}
171#if 0
172	else if (oidp->oid_number >= CTL_AUTO_START) {
173		/* do not panic; this happens when unregistering sysctl sets */
174		printf("static sysctl oid too high: %d", oidp->oid_number);
175	}
176#endif
177
178	/*
179	 * Insert the oid into the parent's list in order.
180	 */
181	q = NULL;
182	SLIST_FOREACH(p, parent, oid_link) {
183		if (oidp->oid_number < p->oid_number)
184			break;
185		q = p;
186	}
187	if (q)
188		SLIST_INSERT_AFTER(q, oidp, oid_link);
189	else
190		SLIST_INSERT_HEAD(parent, oidp, oid_link);
191}
192
193void
194sysctl_unregister_oid(struct sysctl_oid *oidp)
195{
196	struct sysctl_oid *p;
197	int error;
198
199	SYSCTL_ASSERT_XLOCKED();
200	error = ENOENT;
201	if (oidp->oid_number == OID_AUTO) {
202		error = EINVAL;
203	} else {
204		SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
205			if (p == oidp) {
206				SLIST_REMOVE(oidp->oid_parent, oidp,
207				    sysctl_oid, oid_link);
208				error = 0;
209				break;
210			}
211		}
212	}
213
214	/*
215	 * This can happen when a module fails to register and is
216	 * being unloaded afterwards.  It should not be a panic()
217	 * for normal use.
218	 */
219	if (error)
220		printf("%s: failed to unregister sysctl\n", __func__);
221}
222
223/* Initialize a new context to keep track of dynamically added sysctls. */
224int
225sysctl_ctx_init(struct sysctl_ctx_list *c)
226{
227
228	if (c == NULL) {
229		return (EINVAL);
230	}
231
232	/*
233	 * No locking here, the caller is responsible for not adding
234	 * new nodes to a context until after this function has
235	 * returned.
236	 */
237	TAILQ_INIT(c);
238	return (0);
239}
240
241/* Free the context, and destroy all dynamic oids registered in this context */
242int
243sysctl_ctx_free(struct sysctl_ctx_list *clist)
244{
245	struct sysctl_ctx_entry *e, *e1;
246	int error;
247
248	error = 0;
249	/*
250	 * First perform a "dry run" to check if it's ok to remove oids.
251	 * XXX FIXME
252	 * XXX This algorithm is a hack. But I don't know any
253	 * XXX better solution for now...
254	 */
255	SYSCTL_XLOCK();
256	TAILQ_FOREACH(e, clist, link) {
257		error = sysctl_remove_oid_locked(e->entry, 0, 0);
258		if (error)
259			break;
260	}
261	/*
262	 * Restore deregistered entries, either from the end,
263	 * or from the place where error occured.
264	 * e contains the entry that was not unregistered
265	 */
266	if (error)
267		e1 = TAILQ_PREV(e, sysctl_ctx_list, link);
268	else
269		e1 = TAILQ_LAST(clist, sysctl_ctx_list);
270	while (e1 != NULL) {
271		sysctl_register_oid(e1->entry);
272		e1 = TAILQ_PREV(e1, sysctl_ctx_list, link);
273	}
274	if (error) {
275		SYSCTL_XUNLOCK();
276		return(EBUSY);
277	}
278	/* Now really delete the entries */
279	e = TAILQ_FIRST(clist);
280	while (e != NULL) {
281		e1 = TAILQ_NEXT(e, link);
282		error = sysctl_remove_oid_locked(e->entry, 1, 0);
283		if (error)
284			panic("sysctl_remove_oid: corrupt tree, entry: %s",
285			    e->entry->oid_name);
286		free(e, M_SYSCTLOID);
287		e = e1;
288	}
289	SYSCTL_XUNLOCK();
290	return (error);
291}
292
293/* Add an entry to the context */
294struct sysctl_ctx_entry *
295sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
296{
297	struct sysctl_ctx_entry *e;
298
299	SYSCTL_ASSERT_XLOCKED();
300	if (clist == NULL || oidp == NULL)
301		return(NULL);
302	e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK);
303	e->entry = oidp;
304	TAILQ_INSERT_HEAD(clist, e, link);
305	return (e);
306}
307
308/* Find an entry in the context */
309struct sysctl_ctx_entry *
310sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
311{
312	struct sysctl_ctx_entry *e;
313
314	SYSCTL_ASSERT_LOCKED();
315	if (clist == NULL || oidp == NULL)
316		return(NULL);
317	TAILQ_FOREACH(e, clist, link) {
318		if(e->entry == oidp)
319			return(e);
320	}
321	return (e);
322}
323
324/*
325 * Delete an entry from the context.
326 * NOTE: this function doesn't free oidp! You have to remove it
327 * with sysctl_remove_oid().
328 */
329int
330sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
331{
332	struct sysctl_ctx_entry *e;
333
334	if (clist == NULL || oidp == NULL)
335		return (EINVAL);
336	SYSCTL_XLOCK();
337	e = sysctl_ctx_entry_find(clist, oidp);
338	if (e != NULL) {
339		TAILQ_REMOVE(clist, e, link);
340		SYSCTL_XUNLOCK();
341		free(e, M_SYSCTLOID);
342		return (0);
343	} else {
344		SYSCTL_XUNLOCK();
345		return (ENOENT);
346	}
347}
348
349/*
350 * Remove dynamically created sysctl trees.
351 * oidp - top of the tree to be removed
352 * del - if 0 - just deregister, otherwise free up entries as well
353 * recurse - if != 0 traverse the subtree to be deleted
354 */
355int
356sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse)
357{
358	int error;
359
360	SYSCTL_XLOCK();
361	error = sysctl_remove_oid_locked(oidp, del, recurse);
362	SYSCTL_XUNLOCK();
363	return (error);
364}
365
366static int
367sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
368{
369	struct sysctl_oid *p;
370	int error;
371
372	SYSCTL_ASSERT_XLOCKED();
373	if (oidp == NULL)
374		return(EINVAL);
375	if ((oidp->oid_kind & CTLFLAG_DYN) == 0) {
376		printf("can't remove non-dynamic nodes!\n");
377		return (EINVAL);
378	}
379	/*
380	 * WARNING: normal method to do this should be through
381	 * sysctl_ctx_free(). Use recursing as the last resort
382	 * method to purge your sysctl tree of leftovers...
383	 * However, if some other code still references these nodes,
384	 * it will panic.
385	 */
386	if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
387		if (oidp->oid_refcnt == 1) {
388			SLIST_FOREACH(p, SYSCTL_CHILDREN(oidp), oid_link) {
389				if (!recurse)
390					return (ENOTEMPTY);
391				error = sysctl_remove_oid_locked(p, del,
392				    recurse);
393				if (error)
394					return (error);
395			}
396			if (del)
397				free(SYSCTL_CHILDREN(oidp), M_SYSCTLOID);
398		}
399	}
400	if (oidp->oid_refcnt > 1 ) {
401		oidp->oid_refcnt--;
402	} else {
403		if (oidp->oid_refcnt == 0) {
404			printf("Warning: bad oid_refcnt=%u (%s)!\n",
405				oidp->oid_refcnt, oidp->oid_name);
406			return (EINVAL);
407		}
408		sysctl_unregister_oid(oidp);
409		if (del) {
410			if (oidp->oid_descr)
411				free((void *)(uintptr_t)(const void *)oidp->oid_descr, M_SYSCTLOID);
412			free((void *)(uintptr_t)(const void *)oidp->oid_name,
413			     M_SYSCTLOID);
414			free(oidp, M_SYSCTLOID);
415		}
416	}
417	return (0);
418}
419
420/*
421 * Create new sysctls at run time.
422 * clist may point to a valid context initialized with sysctl_ctx_init().
423 */
424struct sysctl_oid *
425sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
426	int number, const char *name, int kind, void *arg1, int arg2,
427	int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr)
428{
429	struct sysctl_oid *oidp;
430	ssize_t len;
431	char *newname;
432
433	/* You have to hook up somewhere.. */
434	if (parent == NULL)
435		return(NULL);
436	/* Check if the node already exists, otherwise create it */
437	SYSCTL_XLOCK();
438	oidp = sysctl_find_oidname(name, parent);
439	if (oidp != NULL) {
440		if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
441			oidp->oid_refcnt++;
442			/* Update the context */
443			if (clist != NULL)
444				sysctl_ctx_entry_add(clist, oidp);
445			SYSCTL_XUNLOCK();
446			return (oidp);
447		} else {
448			SYSCTL_XUNLOCK();
449			printf("can't re-use a leaf (%s)!\n", name);
450			return (NULL);
451		}
452	}
453	oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO);
454	oidp->oid_parent = parent;
455	SLIST_NEXT(oidp, oid_link) = NULL;
456	oidp->oid_number = number;
457	oidp->oid_refcnt = 1;
458	len = strlen(name);
459	newname = malloc(len + 1, M_SYSCTLOID, M_WAITOK);
460	bcopy(name, newname, len + 1);
461	newname[len] = '\0';
462	oidp->oid_name = newname;
463	oidp->oid_handler = handler;
464	oidp->oid_kind = CTLFLAG_DYN | kind;
465	if ((kind & CTLTYPE) == CTLTYPE_NODE) {
466		/* Allocate space for children */
467		SYSCTL_CHILDREN_SET(oidp, malloc(sizeof(struct sysctl_oid_list),
468		    M_SYSCTLOID, M_WAITOK));
469		SLIST_INIT(SYSCTL_CHILDREN(oidp));
470	} else {
471		oidp->oid_arg1 = arg1;
472		oidp->oid_arg2 = arg2;
473	}
474	oidp->oid_fmt = fmt;
475	if (descr) {
476		int len = strlen(descr) + 1;
477		oidp->oid_descr = malloc(len, M_SYSCTLOID, M_WAITOK);
478		if (oidp->oid_descr)
479			strcpy((char *)(uintptr_t)(const void *)oidp->oid_descr, descr);
480	}
481	/* Update the context, if used */
482	if (clist != NULL)
483		sysctl_ctx_entry_add(clist, oidp);
484	/* Register this oid */
485	sysctl_register_oid(oidp);
486	SYSCTL_XUNLOCK();
487	return (oidp);
488}
489
490/*
491 * Rename an existing oid.
492 */
493void
494sysctl_rename_oid(struct sysctl_oid *oidp, const char *name)
495{
496	ssize_t len;
497	char *newname;
498	void *oldname;
499
500	len = strlen(name);
501	newname = malloc(len + 1, M_SYSCTLOID, M_WAITOK);
502	bcopy(name, newname, len + 1);
503	newname[len] = '\0';
504	SYSCTL_XLOCK();
505	oldname = (void *)(uintptr_t)(const void *)oidp->oid_name;
506	oidp->oid_name = newname;
507	SYSCTL_XUNLOCK();
508	free(oldname, M_SYSCTLOID);
509}
510
511/*
512 * Reparent an existing oid.
513 */
514int
515sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent)
516{
517	struct sysctl_oid *oidp;
518
519	SYSCTL_XLOCK();
520	if (oid->oid_parent == parent) {
521		SYSCTL_XUNLOCK();
522		return (0);
523	}
524	oidp = sysctl_find_oidname(oid->oid_name, parent);
525	if (oidp != NULL) {
526		SYSCTL_XUNLOCK();
527		return (EEXIST);
528	}
529	sysctl_unregister_oid(oid);
530	oid->oid_parent = parent;
531	oid->oid_number = OID_AUTO;
532	sysctl_register_oid(oid);
533	SYSCTL_XUNLOCK();
534	return (0);
535}
536
537/*
538 * Register the kernel's oids on startup.
539 */
540SET_DECLARE(sysctl_set, struct sysctl_oid);
541
542static void
543sysctl_register_all(void *arg)
544{
545	struct sysctl_oid **oidp;
546
547	sx_init(&sysctlmemlock, "sysctl mem");
548	SYSCTL_INIT();
549	SYSCTL_XLOCK();
550	SET_FOREACH(oidp, sysctl_set)
551		sysctl_register_oid(*oidp);
552	SYSCTL_XUNLOCK();
553}
554SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_ANY, sysctl_register_all, 0);
555
556/*
557 * "Staff-functions"
558 *
559 * These functions implement a presently undocumented interface
560 * used by the sysctl program to walk the tree, and get the type
561 * so it can print the value.
562 * This interface is under work and consideration, and should probably
563 * be killed with a big axe by the first person who can find the time.
564 * (be aware though, that the proper interface isn't as obvious as it
565 * may seem, there are various conflicting requirements.
566 *
567 * {0,0}	printf the entire MIB-tree.
568 * {0,1,...}	return the name of the "..." OID.
569 * {0,2,...}	return the next OID.
570 * {0,3}	return the OID of the name in "new"
571 * {0,4,...}	return the kind & format info for the "..." OID.
572 * {0,5,...}	return the description the "..." OID.
573 */
574
575#ifdef SYSCTL_DEBUG
576static void
577sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
578{
579	int k;
580	struct sysctl_oid *oidp;
581
582	SYSCTL_ASSERT_LOCKED();
583	SLIST_FOREACH(oidp, l, oid_link) {
584
585		for (k=0; k<i; k++)
586			printf(" ");
587
588		printf("%d %s ", oidp->oid_number, oidp->oid_name);
589
590		printf("%c%c",
591			oidp->oid_kind & CTLFLAG_RD ? 'R':' ',
592			oidp->oid_kind & CTLFLAG_WR ? 'W':' ');
593
594		if (oidp->oid_handler)
595			printf(" *Handler");
596
597		switch (oidp->oid_kind & CTLTYPE) {
598			case CTLTYPE_NODE:
599				printf(" Node\n");
600				if (!oidp->oid_handler) {
601					sysctl_sysctl_debug_dump_node(
602						oidp->oid_arg1, i+2);
603				}
604				break;
605			case CTLTYPE_INT:    printf(" Int\n"); break;
606			case CTLTYPE_STRING: printf(" String\n"); break;
607			case CTLTYPE_QUAD:   printf(" Quad\n"); break;
608			case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break;
609			default:	     printf("\n");
610		}
611
612	}
613}
614
615static int
616sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS)
617{
618	int error;
619
620	error = priv_check(req->td, PRIV_SYSCTL_DEBUG);
621	if (error)
622		return (error);
623	sysctl_sysctl_debug_dump_node(&sysctl__children, 0);
624	return (ENOENT);
625}
626
627SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD,
628	0, 0, sysctl_sysctl_debug, "-", "");
629#endif
630
631static int
632sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
633{
634	int *name = (int *) arg1;
635	u_int namelen = arg2;
636	int error = 0;
637	struct sysctl_oid *oid;
638	struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
639	char buf[10];
640
641	SYSCTL_ASSERT_LOCKED();
642	while (namelen) {
643		if (!lsp) {
644			snprintf(buf,sizeof(buf),"%d",*name);
645			if (req->oldidx)
646				error = SYSCTL_OUT(req, ".", 1);
647			if (!error)
648				error = SYSCTL_OUT(req, buf, strlen(buf));
649			if (error)
650				return (error);
651			namelen--;
652			name++;
653			continue;
654		}
655		lsp2 = 0;
656		SLIST_FOREACH(oid, lsp, oid_link) {
657			if (oid->oid_number != *name)
658				continue;
659
660			if (req->oldidx)
661				error = SYSCTL_OUT(req, ".", 1);
662			if (!error)
663				error = SYSCTL_OUT(req, oid->oid_name,
664					strlen(oid->oid_name));
665			if (error)
666				return (error);
667
668			namelen--;
669			name++;
670
671			if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE)
672				break;
673
674			if (oid->oid_handler)
675				break;
676
677			lsp2 = (struct sysctl_oid_list *)oid->oid_arg1;
678			break;
679		}
680		lsp = lsp2;
681	}
682	return (SYSCTL_OUT(req, "", 1));
683}
684
685static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD, sysctl_sysctl_name, "");
686
687static int
688sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen,
689	int *next, int *len, int level, struct sysctl_oid **oidpp)
690{
691	struct sysctl_oid *oidp;
692
693	SYSCTL_ASSERT_LOCKED();
694	*len = level;
695	SLIST_FOREACH(oidp, lsp, oid_link) {
696		*next = oidp->oid_number;
697		*oidpp = oidp;
698
699		if (oidp->oid_kind & CTLFLAG_SKIP)
700			continue;
701
702		if (!namelen) {
703			if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
704				return (0);
705			if (oidp->oid_handler)
706				/* We really should call the handler here...*/
707				return (0);
708			lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
709			if (!sysctl_sysctl_next_ls(lsp, 0, 0, next+1,
710				len, level+1, oidpp))
711				return (0);
712			goto emptynode;
713		}
714
715		if (oidp->oid_number < *name)
716			continue;
717
718		if (oidp->oid_number > *name) {
719			if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
720				return (0);
721			if (oidp->oid_handler)
722				return (0);
723			lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
724			if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1,
725				next+1, len, level+1, oidpp))
726				return (0);
727			goto next;
728		}
729		if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
730			continue;
731
732		if (oidp->oid_handler)
733			continue;
734
735		lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
736		if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1,
737			len, level+1, oidpp))
738			return (0);
739	next:
740		namelen = 1;
741	emptynode:
742		*len = level;
743	}
744	return (1);
745}
746
747static int
748sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
749{
750	int *name = (int *) arg1;
751	u_int namelen = arg2;
752	int i, j, error;
753	struct sysctl_oid *oid;
754	struct sysctl_oid_list *lsp = &sysctl__children;
755	int newoid[CTL_MAXNAME];
756
757	i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid);
758	if (i)
759		return (ENOENT);
760	error = SYSCTL_OUT(req, newoid, j * sizeof (int));
761	return (error);
762}
763
764static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD, sysctl_sysctl_next, "");
765
766static int
767name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp)
768{
769	int i;
770	struct sysctl_oid *oidp;
771	struct sysctl_oid_list *lsp = &sysctl__children;
772	char *p;
773
774	SYSCTL_ASSERT_LOCKED();
775
776	if (!*name)
777		return (ENOENT);
778
779	p = name + strlen(name) - 1 ;
780	if (*p == '.')
781		*p = '\0';
782
783	*len = 0;
784
785	for (p = name; *p && *p != '.'; p++)
786		;
787	i = *p;
788	if (i == '.')
789		*p = '\0';
790
791	oidp = SLIST_FIRST(lsp);
792
793	while (oidp && *len < CTL_MAXNAME) {
794		if (strcmp(name, oidp->oid_name)) {
795			oidp = SLIST_NEXT(oidp, oid_link);
796			continue;
797		}
798		*oid++ = oidp->oid_number;
799		(*len)++;
800
801		if (!i) {
802			if (oidpp)
803				*oidpp = oidp;
804			return (0);
805		}
806
807		if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
808			break;
809
810		if (oidp->oid_handler)
811			break;
812
813		lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
814		oidp = SLIST_FIRST(lsp);
815		name = p+1;
816		for (p = name; *p && *p != '.'; p++)
817				;
818		i = *p;
819		if (i == '.')
820			*p = '\0';
821	}
822	return (ENOENT);
823}
824
825static int
826sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
827{
828	char *p;
829	int error, oid[CTL_MAXNAME], len;
830	struct sysctl_oid *op = 0;
831
832	SYSCTL_ASSERT_LOCKED();
833
834	if (!req->newlen)
835		return (ENOENT);
836	if (req->newlen >= MAXPATHLEN)	/* XXX arbitrary, undocumented */
837		return (ENAMETOOLONG);
838
839	p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK);
840
841	error = SYSCTL_IN(req, p, req->newlen);
842	if (error) {
843		free(p, M_SYSCTL);
844		return (error);
845	}
846
847	p [req->newlen] = '\0';
848
849	error = name2oid(p, oid, &len, &op);
850
851	free(p, M_SYSCTL);
852
853	if (error)
854		return (error);
855
856	error = SYSCTL_OUT(req, oid, len * sizeof *oid);
857	return (error);
858}
859
860SYSCTL_PROC(_sysctl, 3, name2oid, CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_MPSAFE,
861    0, 0, sysctl_sysctl_name2oid, "I", "");
862
863static int
864sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
865{
866	struct sysctl_oid *oid;
867	int error;
868
869	error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
870	if (error)
871		return (error);
872
873	if (!oid->oid_fmt)
874		return (ENOENT);
875	error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind));
876	if (error)
877		return (error);
878	error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1);
879	return (error);
880}
881
882
883static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE,
884    sysctl_sysctl_oidfmt, "");
885
886static int
887sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
888{
889	struct sysctl_oid *oid;
890	int error;
891
892	error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
893	if (error)
894		return (error);
895
896	if (!oid->oid_descr)
897		return (ENOENT);
898	error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1);
899	return (error);
900}
901
902static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD, sysctl_sysctl_oiddescr, "");
903
904/*
905 * Default "handler" functions.
906 */
907
908/*
909 * Handle an int, signed or unsigned.
910 * Two cases:
911 *     a variable:  point arg1 at it.
912 *     a constant:  pass it in arg2.
913 */
914
915int
916sysctl_handle_int(SYSCTL_HANDLER_ARGS)
917{
918	int tmpout, error = 0;
919
920	/*
921	 * Attempt to get a coherent snapshot by making a copy of the data.
922	 */
923	if (arg1)
924		tmpout = *(int *)arg1;
925	else
926		tmpout = arg2;
927	error = SYSCTL_OUT(req, &tmpout, sizeof(int));
928
929	if (error || !req->newptr)
930		return (error);
931
932	if (!arg1)
933		error = EPERM;
934	else
935		error = SYSCTL_IN(req, arg1, sizeof(int));
936	return (error);
937}
938
939#ifdef VIMAGE
940int
941sysctl_handle_v_int(SYSCTL_HANDLER_ARGS)
942{
943	int tmpout, error = 0;
944
945	SYSCTL_RESOLVE_V_ARG1();
946
947	/*
948	 * Attempt to get a coherent snapshot by making a copy of the data.
949	 */
950	tmpout = *(int *)arg1;
951	error = SYSCTL_OUT(req, &tmpout, sizeof(int));
952
953	if (error || !req->newptr)
954		return (error);
955
956	if (!arg1)
957		error = EPERM;
958	else
959		error = SYSCTL_IN(req, arg1, sizeof(int));
960	return (error);
961}
962#endif
963
964/*
965 * Based on on sysctl_handle_int() convert milliseconds into ticks.
966 */
967
968int
969sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
970{
971	int error, s, tt;
972
973	SYSCTL_RESOLVE_V_ARG1();
974
975	tt = *(int *)arg1;
976	s = (int)((int64_t)tt * 1000 / hz);
977
978	error = sysctl_handle_int(oidp, &s, 0, req);
979	if (error || !req->newptr)
980		return (error);
981
982	tt = (int)((int64_t)s * hz / 1000);
983	if (tt < 1)
984		return (EINVAL);
985
986	*(int *)arg1 = tt;
987	return (0);
988}
989
990
991/*
992 * Handle a long, signed or unsigned.  arg1 points to it.
993 */
994
995int
996sysctl_handle_long(SYSCTL_HANDLER_ARGS)
997{
998	int error = 0;
999	long tmplong;
1000#ifdef SCTL_MASK32
1001	int tmpint;
1002#endif
1003
1004	/*
1005	 * Attempt to get a coherent snapshot by making a copy of the data.
1006	 */
1007	if (!arg1)
1008		return (EINVAL);
1009	tmplong = *(long *)arg1;
1010#ifdef SCTL_MASK32
1011	if (req->flags & SCTL_MASK32) {
1012		tmpint = tmplong;
1013		error = SYSCTL_OUT(req, &tmpint, sizeof(int));
1014	} else
1015#endif
1016		error = SYSCTL_OUT(req, &tmplong, sizeof(long));
1017
1018	if (error || !req->newptr)
1019		return (error);
1020
1021#ifdef SCTL_MASK32
1022	if (req->flags & SCTL_MASK32) {
1023		error = SYSCTL_IN(req, &tmpint, sizeof(int));
1024		*(long *)arg1 = (long)tmpint;
1025	} else
1026#endif
1027		error = SYSCTL_IN(req, arg1, sizeof(long));
1028	return (error);
1029}
1030
1031/*
1032 * Handle a 64 bit int, signed or unsigned.  arg1 points to it.
1033 */
1034
1035int
1036sysctl_handle_quad(SYSCTL_HANDLER_ARGS)
1037{
1038	int error = 0;
1039	uint64_t tmpout;
1040
1041	/*
1042	 * Attempt to get a coherent snapshot by making a copy of the data.
1043	 */
1044	if (!arg1)
1045		return (EINVAL);
1046	tmpout = *(uint64_t *)arg1;
1047	error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t));
1048
1049	if (error || !req->newptr)
1050		return (error);
1051
1052	error = SYSCTL_IN(req, arg1, sizeof(uint64_t));
1053	return (error);
1054}
1055
1056/*
1057 * Handle our generic '\0' terminated 'C' string.
1058 * Two cases:
1059 * 	a variable string:  point arg1 at it, arg2 is max length.
1060 * 	a constant string:  point arg1 at it, arg2 is zero.
1061 */
1062
1063int
1064sysctl_handle_string(SYSCTL_HANDLER_ARGS)
1065{
1066	int error=0;
1067	char *tmparg;
1068	size_t outlen;
1069
1070	/*
1071	 * Attempt to get a coherent snapshot by copying to a
1072	 * temporary kernel buffer.
1073	 */
1074retry:
1075	outlen = strlen((char *)arg1)+1;
1076	tmparg = malloc(outlen, M_SYSCTLTMP, M_WAITOK);
1077
1078	if (strlcpy(tmparg, (char *)arg1, outlen) >= outlen) {
1079		free(tmparg, M_SYSCTLTMP);
1080		goto retry;
1081	}
1082
1083	error = SYSCTL_OUT(req, tmparg, outlen);
1084	free(tmparg, M_SYSCTLTMP);
1085
1086	if (error || !req->newptr)
1087		return (error);
1088
1089	if ((req->newlen - req->newidx) >= arg2) {
1090		error = EINVAL;
1091	} else {
1092		arg2 = (req->newlen - req->newidx);
1093		error = SYSCTL_IN(req, arg1, arg2);
1094		((char *)arg1)[arg2] = '\0';
1095	}
1096
1097	return (error);
1098}
1099
1100#ifdef VIMAGE
1101int
1102sysctl_handle_v_string(SYSCTL_HANDLER_ARGS)
1103{
1104	int error=0;
1105	char *tmparg;
1106	size_t outlen;
1107
1108	SYSCTL_RESOLVE_V_ARG1();
1109
1110	/*
1111	 * Attempt to get a coherent snapshot by copying to a
1112	 * temporary kernel buffer.
1113	 */
1114retry:
1115	outlen = strlen((char *)arg1)+1;
1116	tmparg = malloc(outlen, M_SYSCTLTMP, M_WAITOK);
1117
1118	if (strlcpy(tmparg, (char *)arg1, outlen) >= outlen) {
1119		free(tmparg, M_SYSCTLTMP);
1120		goto retry;
1121	}
1122
1123	error = SYSCTL_OUT(req, tmparg, outlen);
1124	free(tmparg, M_SYSCTLTMP);
1125
1126	if (error || !req->newptr)
1127		return (error);
1128
1129	if ((req->newlen - req->newidx) >= arg2) {
1130		error = EINVAL;
1131	} else {
1132		arg2 = (req->newlen - req->newidx);
1133		error = SYSCTL_IN(req, arg1, arg2);
1134		((char *)arg1)[arg2] = '\0';
1135	}
1136
1137	return (error);
1138}
1139#endif
1140
1141/*
1142 * Handle any kind of opaque data.
1143 * arg1 points to it, arg2 is the size.
1144 */
1145
1146int
1147sysctl_handle_opaque(SYSCTL_HANDLER_ARGS)
1148{
1149	int error, tries;
1150	u_int generation;
1151	struct sysctl_req req2;
1152
1153	/*
1154	 * Attempt to get a coherent snapshot, by using the thread
1155	 * pre-emption counter updated from within mi_switch() to
1156	 * determine if we were pre-empted during a bcopy() or
1157	 * copyout(). Make 3 attempts at doing this before giving up.
1158	 * If we encounter an error, stop immediately.
1159	 */
1160	tries = 0;
1161	req2 = *req;
1162retry:
1163	generation = curthread->td_generation;
1164	error = SYSCTL_OUT(req, arg1, arg2);
1165	if (error)
1166		return (error);
1167	tries++;
1168	if (generation != curthread->td_generation && tries < 3) {
1169		*req = req2;
1170		goto retry;
1171	}
1172
1173	error = SYSCTL_IN(req, arg1, arg2);
1174
1175	return (error);
1176}
1177
1178#ifdef VIMAGE
1179int
1180sysctl_handle_v_opaque(SYSCTL_HANDLER_ARGS)
1181{
1182	int error, tries;
1183	u_int generation;
1184	struct sysctl_req req2;
1185
1186	SYSCTL_RESOLVE_V_ARG1();
1187
1188	tries = 0;
1189	req2 = *req;
1190retry:
1191	generation = curthread->td_generation;
1192	error = SYSCTL_OUT(req, arg1, arg2);
1193	if (error)
1194		return (error);
1195	tries++;
1196	if (generation != curthread->td_generation && tries < 3) {
1197		*req = req2;
1198		goto retry;
1199	}
1200
1201	error = SYSCTL_IN(req, arg1, arg2);
1202
1203	return (error);
1204}
1205#endif
1206
1207/*
1208 * Transfer functions to/from kernel space.
1209 * XXX: rather untested at this point
1210 */
1211static int
1212sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l)
1213{
1214	size_t i = 0;
1215
1216	if (req->oldptr) {
1217		i = l;
1218		if (req->oldlen <= req->oldidx)
1219			i = 0;
1220		else
1221			if (i > req->oldlen - req->oldidx)
1222				i = req->oldlen - req->oldidx;
1223		if (i > 0)
1224			bcopy(p, (char *)req->oldptr + req->oldidx, i);
1225	}
1226	req->oldidx += l;
1227	if (req->oldptr && i != l)
1228		return (ENOMEM);
1229	return (0);
1230}
1231
1232static int
1233sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l)
1234{
1235	if (!req->newptr)
1236		return (0);
1237	if (req->newlen - req->newidx < l)
1238		return (EINVAL);
1239	bcopy((char *)req->newptr + req->newidx, p, l);
1240	req->newidx += l;
1241	return (0);
1242}
1243
1244int
1245kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
1246    size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags)
1247{
1248	int error = 0;
1249	struct sysctl_req req;
1250
1251	bzero(&req, sizeof req);
1252
1253	req.td = td;
1254	req.flags = flags;
1255
1256	if (oldlenp) {
1257		req.oldlen = *oldlenp;
1258	}
1259	req.validlen = req.oldlen;
1260
1261	if (old) {
1262		req.oldptr= old;
1263	}
1264
1265	if (new != NULL) {
1266		req.newlen = newlen;
1267		req.newptr = new;
1268	}
1269
1270	req.oldfunc = sysctl_old_kernel;
1271	req.newfunc = sysctl_new_kernel;
1272	req.lock = REQ_LOCKED;
1273
1274	SYSCTL_SLOCK();
1275	error = sysctl_root(0, name, namelen, &req);
1276	SYSCTL_SUNLOCK();
1277
1278	if (req.lock == REQ_WIRED && req.validlen > 0)
1279		vsunlock(req.oldptr, req.validlen);
1280
1281	if (error && error != ENOMEM)
1282		return (error);
1283
1284	if (retval) {
1285		if (req.oldptr && req.oldidx > req.validlen)
1286			*retval = req.validlen;
1287		else
1288			*retval = req.oldidx;
1289	}
1290	return (error);
1291}
1292
1293int
1294kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp,
1295    void *new, size_t newlen, size_t *retval, int flags)
1296{
1297        int oid[CTL_MAXNAME];
1298        size_t oidlen, plen;
1299	int error;
1300
1301	oid[0] = 0;		/* sysctl internal magic */
1302	oid[1] = 3;		/* name2oid */
1303	oidlen = sizeof(oid);
1304
1305	error = kernel_sysctl(td, oid, 2, oid, &oidlen,
1306	    (void *)name, strlen(name), &plen, flags);
1307	if (error)
1308		return (error);
1309
1310	error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp,
1311	    new, newlen, retval, flags);
1312	return (error);
1313}
1314
1315/*
1316 * Transfer function to/from user space.
1317 */
1318static int
1319sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
1320{
1321	int error = 0;
1322	size_t i, len, origidx;
1323
1324	origidx = req->oldidx;
1325	req->oldidx += l;
1326	if (req->oldptr == NULL)
1327		return (0);
1328	/*
1329	 * If we have not wired the user supplied buffer and we are currently
1330	 * holding locks, drop a witness warning, as it's possible that
1331	 * write operations to the user page can sleep.
1332	 */
1333	if (req->lock != REQ_WIRED)
1334		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1335		    "sysctl_old_user()");
1336	i = l;
1337	len = req->validlen;
1338	if (len <= origidx)
1339		i = 0;
1340	else {
1341		if (i > len - origidx)
1342			i = len - origidx;
1343		error = copyout(p, (char *)req->oldptr + origidx, i);
1344	}
1345	if (error)
1346		return (error);
1347	if (i < l)
1348		return (ENOMEM);
1349	return (0);
1350}
1351
1352static int
1353sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
1354{
1355	int error;
1356
1357	if (!req->newptr)
1358		return (0);
1359	if (req->newlen - req->newidx < l)
1360		return (EINVAL);
1361	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1362	    "sysctl_new_user()");
1363	error = copyin((char *)req->newptr + req->newidx, p, l);
1364	req->newidx += l;
1365	return (error);
1366}
1367
1368/*
1369 * Wire the user space destination buffer.  If set to a value greater than
1370 * zero, the len parameter limits the maximum amount of wired memory.
1371 */
1372int
1373sysctl_wire_old_buffer(struct sysctl_req *req, size_t len)
1374{
1375	int ret;
1376	size_t wiredlen;
1377
1378	wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen;
1379	ret = 0;
1380	if (req->lock == REQ_LOCKED && req->oldptr &&
1381	    req->oldfunc == sysctl_old_user) {
1382		if (wiredlen != 0) {
1383			ret = vslock(req->oldptr, wiredlen);
1384			if (ret != 0) {
1385				if (ret != ENOMEM)
1386					return (ret);
1387				wiredlen = 0;
1388			}
1389		}
1390		req->lock = REQ_WIRED;
1391		req->validlen = wiredlen;
1392	}
1393	return (0);
1394}
1395
1396int
1397sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
1398    int *nindx, struct sysctl_req *req)
1399{
1400	struct sysctl_oid *oid;
1401	int indx;
1402
1403	SYSCTL_ASSERT_LOCKED();
1404	oid = SLIST_FIRST(&sysctl__children);
1405	indx = 0;
1406	while (oid && indx < CTL_MAXNAME) {
1407		if (oid->oid_number == name[indx]) {
1408			indx++;
1409			if (oid->oid_kind & CTLFLAG_NOLOCK)
1410				req->lock = REQ_UNLOCKED;
1411			if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
1412				if (oid->oid_handler != NULL ||
1413				    indx == namelen) {
1414					*noid = oid;
1415					if (nindx != NULL)
1416						*nindx = indx;
1417					return (0);
1418				}
1419				oid = SLIST_FIRST(
1420				    (struct sysctl_oid_list *)oid->oid_arg1);
1421			} else if (indx == namelen) {
1422				*noid = oid;
1423				if (nindx != NULL)
1424					*nindx = indx;
1425				return (0);
1426			} else {
1427				return (ENOTDIR);
1428			}
1429		} else {
1430			oid = SLIST_NEXT(oid, oid_link);
1431		}
1432	}
1433	return (ENOENT);
1434}
1435
1436/*
1437 * Traverse our tree, and find the right node, execute whatever it points
1438 * to, and return the resulting error code.
1439 */
1440
1441static int
1442sysctl_root(SYSCTL_HANDLER_ARGS)
1443{
1444	struct sysctl_oid *oid;
1445	int error, indx, lvl;
1446
1447	SYSCTL_ASSERT_LOCKED();
1448
1449	error = sysctl_find_oid(arg1, arg2, &oid, &indx, req);
1450	if (error)
1451		return (error);
1452
1453	if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
1454		/*
1455		 * You can't call a sysctl when it's a node, but has
1456		 * no handler.  Inform the user that it's a node.
1457		 * The indx may or may not be the same as namelen.
1458		 */
1459		if (oid->oid_handler == NULL)
1460			return (EISDIR);
1461	}
1462
1463	/* Is this sysctl writable? */
1464	if (req->newptr && !(oid->oid_kind & CTLFLAG_WR))
1465		return (EPERM);
1466
1467	KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL"));
1468
1469	/* Is this sysctl sensitive to securelevels? */
1470	if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) {
1471		lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE;
1472		error = securelevel_gt(req->td->td_ucred, lvl);
1473		if (error)
1474			return (error);
1475	}
1476
1477	/* Is this sysctl writable by only privileged users? */
1478	if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) {
1479		if (oid->oid_kind & CTLFLAG_PRISON)
1480			error = priv_check(req->td, PRIV_SYSCTL_WRITEJAIL);
1481		else
1482			error = priv_check(req->td, PRIV_SYSCTL_WRITE);
1483		if (error)
1484			return (error);
1485	}
1486
1487	if (!oid->oid_handler)
1488		return (EINVAL);
1489
1490	if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
1491		arg1 = (int *)arg1 + indx;
1492		arg2 -= indx;
1493	} else {
1494		arg1 = oid->oid_arg1;
1495		arg2 = oid->oid_arg2;
1496	}
1497#ifdef MAC
1498	error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2,
1499	    req);
1500	if (error != 0)
1501		return (error);
1502#endif
1503	if (!(oid->oid_kind & CTLFLAG_MPSAFE))
1504		mtx_lock(&Giant);
1505	error = oid->oid_handler(oid, arg1, arg2, req);
1506	if (!(oid->oid_kind & CTLFLAG_MPSAFE))
1507		mtx_unlock(&Giant);
1508
1509	return (error);
1510}
1511
1512#ifndef _SYS_SYSPROTO_H_
1513struct sysctl_args {
1514	int	*name;
1515	u_int	namelen;
1516	void	*old;
1517	size_t	*oldlenp;
1518	void	*new;
1519	size_t	newlen;
1520};
1521#endif
1522int
1523__sysctl(struct thread *td, struct sysctl_args *uap)
1524{
1525	int error, i, name[CTL_MAXNAME];
1526	size_t j;
1527
1528	if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
1529		return (EINVAL);
1530
1531 	error = copyin(uap->name, &name, uap->namelen * sizeof(int));
1532 	if (error)
1533		return (error);
1534
1535	error = userland_sysctl(td, name, uap->namelen,
1536		uap->old, uap->oldlenp, 0,
1537		uap->new, uap->newlen, &j, 0);
1538	if (error && error != ENOMEM)
1539		return (error);
1540	if (uap->oldlenp) {
1541		i = copyout(&j, uap->oldlenp, sizeof(j));
1542		if (i)
1543			return (i);
1544	}
1545	return (error);
1546}
1547
1548/*
1549 * This is used from various compatibility syscalls too.  That's why name
1550 * must be in kernel space.
1551 */
1552int
1553userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
1554    size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval,
1555    int flags)
1556{
1557	int error = 0, memlocked;
1558	struct sysctl_req req;
1559
1560	bzero(&req, sizeof req);
1561
1562	req.td = td;
1563	req.flags = flags;
1564
1565	if (oldlenp) {
1566		if (inkernel) {
1567			req.oldlen = *oldlenp;
1568		} else {
1569			error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp));
1570			if (error)
1571				return (error);
1572		}
1573	}
1574	req.validlen = req.oldlen;
1575
1576	if (old) {
1577		if (!useracc(old, req.oldlen, VM_PROT_WRITE))
1578			return (EFAULT);
1579		req.oldptr= old;
1580	}
1581
1582	if (new != NULL) {
1583		if (!useracc(new, newlen, VM_PROT_READ))
1584			return (EFAULT);
1585		req.newlen = newlen;
1586		req.newptr = new;
1587	}
1588
1589	req.oldfunc = sysctl_old_user;
1590	req.newfunc = sysctl_new_user;
1591	req.lock = REQ_LOCKED;
1592
1593#ifdef KTRACE
1594	if (KTRPOINT(curthread, KTR_SYSCTL))
1595		ktrsysctl(name, namelen);
1596#endif
1597
1598	if (req.oldlen > PAGE_SIZE) {
1599		memlocked = 1;
1600		sx_xlock(&sysctlmemlock);
1601	} else
1602		memlocked = 0;
1603	CURVNET_SET(TD_TO_VNET(td));
1604
1605	for (;;) {
1606		req.oldidx = 0;
1607		req.newidx = 0;
1608		SYSCTL_SLOCK();
1609		error = sysctl_root(0, name, namelen, &req);
1610		SYSCTL_SUNLOCK();
1611		if (error != EAGAIN)
1612			break;
1613		uio_yield();
1614	}
1615
1616	CURVNET_RESTORE();
1617
1618	if (req.lock == REQ_WIRED && req.validlen > 0)
1619		vsunlock(req.oldptr, req.validlen);
1620	if (memlocked)
1621		sx_xunlock(&sysctlmemlock);
1622
1623	if (error && error != ENOMEM)
1624		return (error);
1625
1626	if (retval) {
1627		if (req.oldptr && req.oldidx > req.validlen)
1628			*retval = req.validlen;
1629		else
1630			*retval = req.oldidx;
1631	}
1632	return (error);
1633}
1634