kern_sysctl.c revision 273564
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Karels at Berkeley Software Design, Inc. 7 * 8 * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD 9 * project, to make these variables more userfriendly. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 36 */ 37 38#include <sys/cdefs.h> 39__FBSDID("$FreeBSD: head/sys/kern/kern_sysctl.c 273564 2014-10-23 22:42:56Z des $"); 40 41#include "opt_capsicum.h" 42#include "opt_compat.h" 43#include "opt_ktrace.h" 44 45#include <sys/param.h> 46#include <sys/fail.h> 47#include <sys/systm.h> 48#include <sys/capsicum.h> 49#include <sys/kernel.h> 50#include <sys/sysctl.h> 51#include <sys/malloc.h> 52#include <sys/priv.h> 53#include <sys/proc.h> 54#include <sys/jail.h> 55#include <sys/lock.h> 56#include <sys/mutex.h> 57#include <sys/sbuf.h> 58#include <sys/sx.h> 59#include <sys/sysproto.h> 60#include <sys/uio.h> 61#ifdef KTRACE 62#include <sys/ktrace.h> 63#endif 64 65#include <net/vnet.h> 66 67#include <security/mac/mac_framework.h> 68 69#include <vm/vm.h> 70#include <vm/vm_extern.h> 71 72static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic"); 73static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids"); 74static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer"); 75 76/* 77 * The sysctllock protects the MIB tree. It also protects sysctl 78 * contexts used with dynamic sysctls. The sysctl_register_oid() and 79 * sysctl_unregister_oid() routines require the sysctllock to already 80 * be held, so the sysctl_lock() and sysctl_unlock() routines are 81 * provided for the few places in the kernel which need to use that 82 * API rather than using the dynamic API. Use of the dynamic API is 83 * strongly encouraged for most code. 84 * 85 * The sysctlmemlock is used to limit the amount of user memory wired for 86 * sysctl requests. This is implemented by serializing any userland 87 * sysctl requests larger than a single page via an exclusive lock. 88 */ 89static struct sx sysctllock; 90static struct sx sysctlmemlock; 91 92#define SYSCTL_XLOCK() sx_xlock(&sysctllock) 93#define SYSCTL_XUNLOCK() sx_xunlock(&sysctllock) 94#define SYSCTL_SLOCK() sx_slock(&sysctllock) 95#define SYSCTL_SUNLOCK() sx_sunlock(&sysctllock) 96#define SYSCTL_XLOCKED() sx_xlocked(&sysctllock) 97#define SYSCTL_ASSERT_LOCKED() sx_assert(&sysctllock, SA_LOCKED) 98#define SYSCTL_ASSERT_XLOCKED() sx_assert(&sysctllock, SA_XLOCKED) 99#define SYSCTL_INIT() sx_init(&sysctllock, "sysctl lock") 100#define SYSCTL_SLEEP(ch, wmesg, timo) \ 101 sx_sleep(ch, &sysctllock, 0, wmesg, timo) 102 103static int sysctl_root(SYSCTL_HANDLER_ARGS); 104 105/* Root list */ 106struct sysctl_oid_list sysctl__children = SLIST_HEAD_INITIALIZER(&sysctl__children); 107 108static int sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, 109 int recurse); 110static int sysctl_old_kernel(struct sysctl_req *, const void *, size_t); 111static int sysctl_new_kernel(struct sysctl_req *, void *, size_t); 112 113static void 114sysctl_lock(bool xlock) 115{ 116 117 if (xlock) 118 SYSCTL_XLOCK(); 119 else 120 SYSCTL_SLOCK(); 121} 122 123static bool 124sysctl_unlock(void) 125{ 126 bool xlocked; 127 128 xlocked = SYSCTL_XLOCKED(); 129 if (xlocked) 130 SYSCTL_XUNLOCK(); 131 else 132 SYSCTL_SUNLOCK(); 133 return (xlocked); 134} 135 136static struct sysctl_oid * 137sysctl_find_oidname(const char *name, struct sysctl_oid_list *list) 138{ 139 struct sysctl_oid *oidp; 140 141 SYSCTL_ASSERT_LOCKED(); 142 SLIST_FOREACH(oidp, list, oid_link) { 143 if (strcmp(oidp->oid_name, name) == 0) { 144 return (oidp); 145 } 146 } 147 return (NULL); 148} 149 150/* 151 * Initialization of the MIB tree. 152 * 153 * Order by number in each list. 154 */ 155void 156sysctl_xlock(void) 157{ 158 159 SYSCTL_XLOCK(); 160} 161 162void 163sysctl_xunlock(void) 164{ 165 166 SYSCTL_XUNLOCK(); 167} 168 169static int 170sysctl_root_handler_locked(struct sysctl_oid *oid, void *arg1, intptr_t arg2, 171 struct sysctl_req *req) 172{ 173 int error; 174 bool xlocked; 175 176 atomic_add_int(&oid->oid_running, 1); 177 xlocked = sysctl_unlock(); 178 179 if (!(oid->oid_kind & CTLFLAG_MPSAFE)) 180 mtx_lock(&Giant); 181 error = oid->oid_handler(oid, arg1, arg2, req); 182 if (!(oid->oid_kind & CTLFLAG_MPSAFE)) 183 mtx_unlock(&Giant); 184 185 sysctl_lock(xlocked); 186 if (atomic_fetchadd_int(&oid->oid_running, -1) == 1 && 187 (oid->oid_kind & CTLFLAG_DYING) != 0) 188 wakeup(&oid->oid_running); 189 190 return (error); 191} 192 193static void 194sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp) 195{ 196 struct sysctl_req req; 197 struct sysctl_oid *curr; 198 char *penv = NULL; 199 char path[64]; 200 ssize_t rem = sizeof(path); 201 ssize_t len; 202 int val_int; 203 long val_long; 204 int64_t val_64; 205 quad_t val_quad; 206 int error; 207 208 path[--rem] = 0; 209 210 for (curr = oidp; curr != NULL; curr = SYSCTL_PARENT(curr)) { 211 len = strlen(curr->oid_name); 212 rem -= len; 213 if (curr != oidp) 214 rem -= 1; 215 if (rem < 0) { 216 printf("OID path exceeds %d bytes\n", (int)sizeof(path)); 217 return; 218 } 219 memcpy(path + rem, curr->oid_name, len); 220 if (curr != oidp) 221 path[rem + len] = '.'; 222 } 223 224 memset(&req, 0, sizeof(req)); 225 226 req.td = curthread; 227 req.oldfunc = sysctl_old_kernel; 228 req.newfunc = sysctl_new_kernel; 229 req.lock = REQ_UNWIRED; 230 231 switch (oidp->oid_kind & CTLTYPE) { 232 case CTLTYPE_INT: 233 if (getenv_int(path + rem, &val_int) == 0) 234 return; 235 req.newlen = sizeof(val_int); 236 req.newptr = &val_int; 237 break; 238 case CTLTYPE_UINT: 239 if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0) 240 return; 241 req.newlen = sizeof(val_int); 242 req.newptr = &val_int; 243 break; 244 case CTLTYPE_LONG: 245 if (getenv_long(path + rem, &val_long) == 0) 246 return; 247 req.newlen = sizeof(val_long); 248 req.newptr = &val_long; 249 break; 250 case CTLTYPE_ULONG: 251 if (getenv_ulong(path + rem, (unsigned long *)&val_long) == 0) 252 return; 253 req.newlen = sizeof(val_long); 254 req.newptr = &val_long; 255 break; 256 case CTLTYPE_S64: 257 if (getenv_quad(path + rem, &val_quad) == 0) 258 return; 259 val_64 = val_quad; 260 req.newlen = sizeof(val_64); 261 req.newptr = &val_64; 262 break; 263 case CTLTYPE_U64: 264 /* XXX there is no getenv_uquad() */ 265 if (getenv_quad(path + rem, &val_quad) == 0) 266 return; 267 val_64 = val_quad; 268 req.newlen = sizeof(val_64); 269 req.newptr = &val_64; 270 break; 271 case CTLTYPE_STRING: 272 penv = kern_getenv(path + rem); 273 if (penv == NULL) 274 return; 275 req.newlen = strlen(penv); 276 req.newptr = penv; 277 break; 278 default: 279 return; 280 } 281 error = sysctl_root_handler_locked(oidp, oidp->oid_arg1, 282 oidp->oid_arg2, &req); 283 if (error != 0) 284 printf("Setting sysctl %s failed: %d\n", path, error); 285 if (penv != NULL) 286 freeenv(penv); 287} 288 289void 290sysctl_register_oid(struct sysctl_oid *oidp) 291{ 292 struct sysctl_oid_list *parent = oidp->oid_parent; 293 struct sysctl_oid *p; 294 struct sysctl_oid *q; 295 296 /* 297 * First check if another oid with the same name already 298 * exists in the parent's list. 299 */ 300 SYSCTL_ASSERT_XLOCKED(); 301 p = sysctl_find_oidname(oidp->oid_name, parent); 302 if (p != NULL) { 303 if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 304 p->oid_refcnt++; 305 return; 306 } else { 307 printf("can't re-use a leaf (%s)!\n", p->oid_name); 308 return; 309 } 310 } 311 /* 312 * If this oid has a number OID_AUTO, give it a number which 313 * is greater than any current oid. 314 * NOTE: DO NOT change the starting value here, change it in 315 * <sys/sysctl.h>, and make sure it is at least 256 to 316 * accomodate e.g. net.inet.raw as a static sysctl node. 317 */ 318 if (oidp->oid_number == OID_AUTO) { 319 static int newoid = CTL_AUTO_START; 320 321 oidp->oid_number = newoid++; 322 if (newoid == 0x7fffffff) 323 panic("out of oids"); 324 } 325#if 0 326 else if (oidp->oid_number >= CTL_AUTO_START) { 327 /* do not panic; this happens when unregistering sysctl sets */ 328 printf("static sysctl oid too high: %d", oidp->oid_number); 329 } 330#endif 331 332 /* 333 * Insert the oid into the parent's list in order. 334 */ 335 q = NULL; 336 SLIST_FOREACH(p, parent, oid_link) { 337 if (oidp->oid_number < p->oid_number) 338 break; 339 q = p; 340 } 341 if (q) 342 SLIST_INSERT_AFTER(q, oidp, oid_link); 343 else 344 SLIST_INSERT_HEAD(parent, oidp, oid_link); 345 346 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE && 347#ifdef VIMAGE 348 (oidp->oid_kind & CTLFLAG_VNET) == 0 && 349#endif 350 (oidp->oid_kind & CTLFLAG_TUN) != 0 && 351 (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) { 352 sysctl_load_tunable_by_oid_locked(oidp); 353 } 354} 355 356void 357sysctl_unregister_oid(struct sysctl_oid *oidp) 358{ 359 struct sysctl_oid *p; 360 int error; 361 362 SYSCTL_ASSERT_XLOCKED(); 363 error = ENOENT; 364 if (oidp->oid_number == OID_AUTO) { 365 error = EINVAL; 366 } else { 367 SLIST_FOREACH(p, oidp->oid_parent, oid_link) { 368 if (p == oidp) { 369 SLIST_REMOVE(oidp->oid_parent, oidp, 370 sysctl_oid, oid_link); 371 error = 0; 372 break; 373 } 374 } 375 } 376 377 /* 378 * This can happen when a module fails to register and is 379 * being unloaded afterwards. It should not be a panic() 380 * for normal use. 381 */ 382 if (error) 383 printf("%s: failed to unregister sysctl\n", __func__); 384} 385 386/* Initialize a new context to keep track of dynamically added sysctls. */ 387int 388sysctl_ctx_init(struct sysctl_ctx_list *c) 389{ 390 391 if (c == NULL) { 392 return (EINVAL); 393 } 394 395 /* 396 * No locking here, the caller is responsible for not adding 397 * new nodes to a context until after this function has 398 * returned. 399 */ 400 TAILQ_INIT(c); 401 return (0); 402} 403 404/* Free the context, and destroy all dynamic oids registered in this context */ 405int 406sysctl_ctx_free(struct sysctl_ctx_list *clist) 407{ 408 struct sysctl_ctx_entry *e, *e1; 409 int error; 410 411 error = 0; 412 /* 413 * First perform a "dry run" to check if it's ok to remove oids. 414 * XXX FIXME 415 * XXX This algorithm is a hack. But I don't know any 416 * XXX better solution for now... 417 */ 418 SYSCTL_XLOCK(); 419 TAILQ_FOREACH(e, clist, link) { 420 error = sysctl_remove_oid_locked(e->entry, 0, 0); 421 if (error) 422 break; 423 } 424 /* 425 * Restore deregistered entries, either from the end, 426 * or from the place where error occured. 427 * e contains the entry that was not unregistered 428 */ 429 if (error) 430 e1 = TAILQ_PREV(e, sysctl_ctx_list, link); 431 else 432 e1 = TAILQ_LAST(clist, sysctl_ctx_list); 433 while (e1 != NULL) { 434 sysctl_register_oid(e1->entry); 435 e1 = TAILQ_PREV(e1, sysctl_ctx_list, link); 436 } 437 if (error) { 438 SYSCTL_XUNLOCK(); 439 return(EBUSY); 440 } 441 /* Now really delete the entries */ 442 e = TAILQ_FIRST(clist); 443 while (e != NULL) { 444 e1 = TAILQ_NEXT(e, link); 445 error = sysctl_remove_oid_locked(e->entry, 1, 0); 446 if (error) 447 panic("sysctl_remove_oid: corrupt tree, entry: %s", 448 e->entry->oid_name); 449 free(e, M_SYSCTLOID); 450 e = e1; 451 } 452 SYSCTL_XUNLOCK(); 453 return (error); 454} 455 456/* Add an entry to the context */ 457struct sysctl_ctx_entry * 458sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 459{ 460 struct sysctl_ctx_entry *e; 461 462 SYSCTL_ASSERT_XLOCKED(); 463 if (clist == NULL || oidp == NULL) 464 return(NULL); 465 e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK); 466 e->entry = oidp; 467 TAILQ_INSERT_HEAD(clist, e, link); 468 return (e); 469} 470 471/* Find an entry in the context */ 472struct sysctl_ctx_entry * 473sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 474{ 475 struct sysctl_ctx_entry *e; 476 477 SYSCTL_ASSERT_XLOCKED(); 478 if (clist == NULL || oidp == NULL) 479 return(NULL); 480 TAILQ_FOREACH(e, clist, link) { 481 if(e->entry == oidp) 482 return(e); 483 } 484 return (e); 485} 486 487/* 488 * Delete an entry from the context. 489 * NOTE: this function doesn't free oidp! You have to remove it 490 * with sysctl_remove_oid(). 491 */ 492int 493sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 494{ 495 struct sysctl_ctx_entry *e; 496 497 if (clist == NULL || oidp == NULL) 498 return (EINVAL); 499 SYSCTL_XLOCK(); 500 e = sysctl_ctx_entry_find(clist, oidp); 501 if (e != NULL) { 502 TAILQ_REMOVE(clist, e, link); 503 SYSCTL_XUNLOCK(); 504 free(e, M_SYSCTLOID); 505 return (0); 506 } else { 507 SYSCTL_XUNLOCK(); 508 return (ENOENT); 509 } 510} 511 512/* 513 * Remove dynamically created sysctl trees. 514 * oidp - top of the tree to be removed 515 * del - if 0 - just deregister, otherwise free up entries as well 516 * recurse - if != 0 traverse the subtree to be deleted 517 */ 518int 519sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse) 520{ 521 int error; 522 523 SYSCTL_XLOCK(); 524 error = sysctl_remove_oid_locked(oidp, del, recurse); 525 SYSCTL_XUNLOCK(); 526 return (error); 527} 528 529int 530sysctl_remove_name(struct sysctl_oid *parent, const char *name, 531 int del, int recurse) 532{ 533 struct sysctl_oid *p, *tmp; 534 int error; 535 536 error = ENOENT; 537 SYSCTL_XLOCK(); 538 SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) { 539 if (strcmp(p->oid_name, name) == 0) { 540 error = sysctl_remove_oid_locked(p, del, recurse); 541 break; 542 } 543 } 544 SYSCTL_XUNLOCK(); 545 546 return (error); 547} 548 549 550static int 551sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) 552{ 553 struct sysctl_oid *p, *tmp; 554 int error; 555 556 SYSCTL_ASSERT_XLOCKED(); 557 if (oidp == NULL) 558 return(EINVAL); 559 if ((oidp->oid_kind & CTLFLAG_DYN) == 0) { 560 printf("can't remove non-dynamic nodes!\n"); 561 return (EINVAL); 562 } 563 /* 564 * WARNING: normal method to do this should be through 565 * sysctl_ctx_free(). Use recursing as the last resort 566 * method to purge your sysctl tree of leftovers... 567 * However, if some other code still references these nodes, 568 * it will panic. 569 */ 570 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 571 if (oidp->oid_refcnt == 1) { 572 SLIST_FOREACH_SAFE(p, 573 SYSCTL_CHILDREN(oidp), oid_link, tmp) { 574 if (!recurse) { 575 printf("Warning: failed attempt to " 576 "remove oid %s with child %s\n", 577 oidp->oid_name, p->oid_name); 578 return (ENOTEMPTY); 579 } 580 error = sysctl_remove_oid_locked(p, del, 581 recurse); 582 if (error) 583 return (error); 584 } 585 } 586 } 587 if (oidp->oid_refcnt > 1 ) { 588 oidp->oid_refcnt--; 589 } else { 590 if (oidp->oid_refcnt == 0) { 591 printf("Warning: bad oid_refcnt=%u (%s)!\n", 592 oidp->oid_refcnt, oidp->oid_name); 593 return (EINVAL); 594 } 595 sysctl_unregister_oid(oidp); 596 if (del) { 597 /* 598 * Wait for all threads running the handler to drain. 599 * This preserves the previous behavior when the 600 * sysctl lock was held across a handler invocation, 601 * and is necessary for module unload correctness. 602 */ 603 while (oidp->oid_running > 0) { 604 oidp->oid_kind |= CTLFLAG_DYING; 605 SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0); 606 } 607 if (oidp->oid_descr) 608 free(__DECONST(char *, oidp->oid_descr), 609 M_SYSCTLOID); 610 free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID); 611 free(oidp, M_SYSCTLOID); 612 } 613 } 614 return (0); 615} 616/* 617 * Create new sysctls at run time. 618 * clist may point to a valid context initialized with sysctl_ctx_init(). 619 */ 620struct sysctl_oid * 621sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, 622 int number, const char *name, int kind, void *arg1, intptr_t arg2, 623 int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr) 624{ 625 struct sysctl_oid *oidp; 626 627 /* You have to hook up somewhere.. */ 628 if (parent == NULL) 629 return(NULL); 630 /* Check if the node already exists, otherwise create it */ 631 SYSCTL_XLOCK(); 632 oidp = sysctl_find_oidname(name, parent); 633 if (oidp != NULL) { 634 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 635 oidp->oid_refcnt++; 636 /* Update the context */ 637 if (clist != NULL) 638 sysctl_ctx_entry_add(clist, oidp); 639 SYSCTL_XUNLOCK(); 640 return (oidp); 641 } else { 642 SYSCTL_XUNLOCK(); 643 printf("can't re-use a leaf (%s)!\n", name); 644 return (NULL); 645 } 646 } 647 oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO); 648 oidp->oid_parent = parent; 649 SLIST_INIT(&oidp->oid_children); 650 oidp->oid_number = number; 651 oidp->oid_refcnt = 1; 652 oidp->oid_name = strdup(name, M_SYSCTLOID); 653 oidp->oid_handler = handler; 654 oidp->oid_kind = CTLFLAG_DYN | kind; 655 oidp->oid_arg1 = arg1; 656 oidp->oid_arg2 = arg2; 657 oidp->oid_fmt = fmt; 658 if (descr != NULL) 659 oidp->oid_descr = strdup(descr, M_SYSCTLOID); 660 /* Update the context, if used */ 661 if (clist != NULL) 662 sysctl_ctx_entry_add(clist, oidp); 663 /* Register this oid */ 664 sysctl_register_oid(oidp); 665 SYSCTL_XUNLOCK(); 666 return (oidp); 667} 668 669/* 670 * Rename an existing oid. 671 */ 672void 673sysctl_rename_oid(struct sysctl_oid *oidp, const char *name) 674{ 675 char *newname; 676 char *oldname; 677 678 newname = strdup(name, M_SYSCTLOID); 679 SYSCTL_XLOCK(); 680 oldname = __DECONST(char *, oidp->oid_name); 681 oidp->oid_name = newname; 682 SYSCTL_XUNLOCK(); 683 free(oldname, M_SYSCTLOID); 684} 685 686/* 687 * Reparent an existing oid. 688 */ 689int 690sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent) 691{ 692 struct sysctl_oid *oidp; 693 694 SYSCTL_XLOCK(); 695 if (oid->oid_parent == parent) { 696 SYSCTL_XUNLOCK(); 697 return (0); 698 } 699 oidp = sysctl_find_oidname(oid->oid_name, parent); 700 if (oidp != NULL) { 701 SYSCTL_XUNLOCK(); 702 return (EEXIST); 703 } 704 sysctl_unregister_oid(oid); 705 oid->oid_parent = parent; 706 oid->oid_number = OID_AUTO; 707 sysctl_register_oid(oid); 708 SYSCTL_XUNLOCK(); 709 return (0); 710} 711 712/* 713 * Register the kernel's oids on startup. 714 */ 715SET_DECLARE(sysctl_set, struct sysctl_oid); 716 717static void 718sysctl_register_all(void *arg) 719{ 720 struct sysctl_oid **oidp; 721 722 sx_init(&sysctlmemlock, "sysctl mem"); 723 SYSCTL_INIT(); 724 SYSCTL_XLOCK(); 725 SET_FOREACH(oidp, sysctl_set) 726 sysctl_register_oid(*oidp); 727 SYSCTL_XUNLOCK(); 728} 729SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, 0); 730 731/* 732 * "Staff-functions" 733 * 734 * These functions implement a presently undocumented interface 735 * used by the sysctl program to walk the tree, and get the type 736 * so it can print the value. 737 * This interface is under work and consideration, and should probably 738 * be killed with a big axe by the first person who can find the time. 739 * (be aware though, that the proper interface isn't as obvious as it 740 * may seem, there are various conflicting requirements. 741 * 742 * {0,0} printf the entire MIB-tree. 743 * {0,1,...} return the name of the "..." OID. 744 * {0,2,...} return the next OID. 745 * {0,3} return the OID of the name in "new" 746 * {0,4,...} return the kind & format info for the "..." OID. 747 * {0,5,...} return the description the "..." OID. 748 */ 749 750#ifdef SYSCTL_DEBUG 751static void 752sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i) 753{ 754 int k; 755 struct sysctl_oid *oidp; 756 757 SYSCTL_ASSERT_LOCKED(); 758 SLIST_FOREACH(oidp, l, oid_link) { 759 760 for (k=0; k<i; k++) 761 printf(" "); 762 763 printf("%d %s ", oidp->oid_number, oidp->oid_name); 764 765 printf("%c%c", 766 oidp->oid_kind & CTLFLAG_RD ? 'R':' ', 767 oidp->oid_kind & CTLFLAG_WR ? 'W':' '); 768 769 if (oidp->oid_handler) 770 printf(" *Handler"); 771 772 switch (oidp->oid_kind & CTLTYPE) { 773 case CTLTYPE_NODE: 774 printf(" Node\n"); 775 if (!oidp->oid_handler) { 776 sysctl_sysctl_debug_dump_node( 777 SYSCTL_CHILDREN(oidp), i + 2); 778 } 779 break; 780 case CTLTYPE_INT: printf(" Int\n"); break; 781 case CTLTYPE_UINT: printf(" u_int\n"); break; 782 case CTLTYPE_LONG: printf(" Long\n"); break; 783 case CTLTYPE_ULONG: printf(" u_long\n"); break; 784 case CTLTYPE_STRING: printf(" String\n"); break; 785 case CTLTYPE_U64: printf(" uint64_t\n"); break; 786 case CTLTYPE_S64: printf(" int64_t\n"); break; 787 case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break; 788 default: printf("\n"); 789 } 790 791 } 792} 793 794static int 795sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS) 796{ 797 int error; 798 799 error = priv_check(req->td, PRIV_SYSCTL_DEBUG); 800 if (error) 801 return (error); 802 SYSCTL_SLOCK(); 803 sysctl_sysctl_debug_dump_node(&sysctl__children, 0); 804 SYSCTL_SUNLOCK(); 805 return (ENOENT); 806} 807 808SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE, 809 0, 0, sysctl_sysctl_debug, "-", ""); 810#endif 811 812static int 813sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) 814{ 815 int *name = (int *) arg1; 816 u_int namelen = arg2; 817 int error = 0; 818 struct sysctl_oid *oid; 819 struct sysctl_oid_list *lsp = &sysctl__children, *lsp2; 820 char buf[10]; 821 822 SYSCTL_SLOCK(); 823 while (namelen) { 824 if (!lsp) { 825 snprintf(buf,sizeof(buf),"%d",*name); 826 if (req->oldidx) 827 error = SYSCTL_OUT(req, ".", 1); 828 if (!error) 829 error = SYSCTL_OUT(req, buf, strlen(buf)); 830 if (error) 831 goto out; 832 namelen--; 833 name++; 834 continue; 835 } 836 lsp2 = 0; 837 SLIST_FOREACH(oid, lsp, oid_link) { 838 if (oid->oid_number != *name) 839 continue; 840 841 if (req->oldidx) 842 error = SYSCTL_OUT(req, ".", 1); 843 if (!error) 844 error = SYSCTL_OUT(req, oid->oid_name, 845 strlen(oid->oid_name)); 846 if (error) 847 goto out; 848 849 namelen--; 850 name++; 851 852 if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) 853 break; 854 855 if (oid->oid_handler) 856 break; 857 858 lsp2 = SYSCTL_CHILDREN(oid); 859 break; 860 } 861 lsp = lsp2; 862 } 863 error = SYSCTL_OUT(req, "", 1); 864 out: 865 SYSCTL_SUNLOCK(); 866 return (error); 867} 868 869/* 870 * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in 871 * capability mode. 872 */ 873static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, 874 sysctl_sysctl_name, ""); 875 876static int 877sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, 878 int *next, int *len, int level, struct sysctl_oid **oidpp) 879{ 880 struct sysctl_oid *oidp; 881 882 SYSCTL_ASSERT_LOCKED(); 883 *len = level; 884 SLIST_FOREACH(oidp, lsp, oid_link) { 885 *next = oidp->oid_number; 886 *oidpp = oidp; 887 888 if (oidp->oid_kind & CTLFLAG_SKIP) 889 continue; 890 891 if (!namelen) { 892 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 893 return (0); 894 if (oidp->oid_handler) 895 /* We really should call the handler here...*/ 896 return (0); 897 lsp = SYSCTL_CHILDREN(oidp); 898 if (!sysctl_sysctl_next_ls(lsp, 0, 0, next+1, 899 len, level+1, oidpp)) 900 return (0); 901 goto emptynode; 902 } 903 904 if (oidp->oid_number < *name) 905 continue; 906 907 if (oidp->oid_number > *name) { 908 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 909 return (0); 910 if (oidp->oid_handler) 911 return (0); 912 lsp = SYSCTL_CHILDREN(oidp); 913 if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, 914 next+1, len, level+1, oidpp)) 915 return (0); 916 goto next; 917 } 918 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 919 continue; 920 921 if (oidp->oid_handler) 922 continue; 923 924 lsp = SYSCTL_CHILDREN(oidp); 925 if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1, 926 len, level+1, oidpp)) 927 return (0); 928 next: 929 namelen = 1; 930 emptynode: 931 *len = level; 932 } 933 return (1); 934} 935 936static int 937sysctl_sysctl_next(SYSCTL_HANDLER_ARGS) 938{ 939 int *name = (int *) arg1; 940 u_int namelen = arg2; 941 int i, j, error; 942 struct sysctl_oid *oid; 943 struct sysctl_oid_list *lsp = &sysctl__children; 944 int newoid[CTL_MAXNAME]; 945 946 SYSCTL_SLOCK(); 947 i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid); 948 SYSCTL_SUNLOCK(); 949 if (i) 950 return (ENOENT); 951 error = SYSCTL_OUT(req, newoid, j * sizeof (int)); 952 return (error); 953} 954 955/* 956 * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in 957 * capability mode. 958 */ 959static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, 960 sysctl_sysctl_next, ""); 961 962static int 963name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) 964{ 965 struct sysctl_oid *oidp; 966 struct sysctl_oid_list *lsp = &sysctl__children; 967 char *p; 968 969 SYSCTL_ASSERT_LOCKED(); 970 971 for (*len = 0; *len < CTL_MAXNAME;) { 972 p = strsep(&name, "."); 973 974 oidp = SLIST_FIRST(lsp); 975 for (;; oidp = SLIST_NEXT(oidp, oid_link)) { 976 if (oidp == NULL) 977 return (ENOENT); 978 if (strcmp(p, oidp->oid_name) == 0) 979 break; 980 } 981 *oid++ = oidp->oid_number; 982 (*len)++; 983 984 if (name == NULL || *name == '\0') { 985 if (oidpp) 986 *oidpp = oidp; 987 return (0); 988 } 989 990 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 991 break; 992 993 if (oidp->oid_handler) 994 break; 995 996 lsp = SYSCTL_CHILDREN(oidp); 997 } 998 return (ENOENT); 999} 1000 1001static int 1002sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) 1003{ 1004 char *p; 1005 int error, oid[CTL_MAXNAME], len = 0; 1006 struct sysctl_oid *op = 0; 1007 1008 if (!req->newlen) 1009 return (ENOENT); 1010 if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */ 1011 return (ENAMETOOLONG); 1012 1013 p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK); 1014 1015 error = SYSCTL_IN(req, p, req->newlen); 1016 if (error) { 1017 free(p, M_SYSCTL); 1018 return (error); 1019 } 1020 1021 p [req->newlen] = '\0'; 1022 1023 SYSCTL_SLOCK(); 1024 error = name2oid(p, oid, &len, &op); 1025 SYSCTL_SUNLOCK(); 1026 1027 free(p, M_SYSCTL); 1028 1029 if (error) 1030 return (error); 1031 1032 error = SYSCTL_OUT(req, oid, len * sizeof *oid); 1033 return (error); 1034} 1035 1036/* 1037 * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in 1038 * capability mode. 1039 */ 1040SYSCTL_PROC(_sysctl, 3, name2oid, 1041 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE 1042 | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", ""); 1043 1044static int 1045sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) 1046{ 1047 struct sysctl_oid *oid; 1048 int error; 1049 1050 SYSCTL_SLOCK(); 1051 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); 1052 if (error) 1053 goto out; 1054 1055 if (oid->oid_fmt == NULL) { 1056 error = ENOENT; 1057 goto out; 1058 } 1059 error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind)); 1060 if (error) 1061 goto out; 1062 error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1); 1063 out: 1064 SYSCTL_SUNLOCK(); 1065 return (error); 1066} 1067 1068 1069static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, 1070 sysctl_sysctl_oidfmt, ""); 1071 1072static int 1073sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS) 1074{ 1075 struct sysctl_oid *oid; 1076 int error; 1077 1078 SYSCTL_SLOCK(); 1079 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); 1080 if (error) 1081 goto out; 1082 1083 if (oid->oid_descr == NULL) { 1084 error = ENOENT; 1085 goto out; 1086 } 1087 error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1); 1088 out: 1089 SYSCTL_SUNLOCK(); 1090 return (error); 1091} 1092 1093static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, 1094 sysctl_sysctl_oiddescr, ""); 1095 1096/* 1097 * Default "handler" functions. 1098 */ 1099 1100/* 1101 * Handle an int, signed or unsigned. 1102 * Two cases: 1103 * a variable: point arg1 at it. 1104 * a constant: pass it in arg2. 1105 */ 1106 1107int 1108sysctl_handle_int(SYSCTL_HANDLER_ARGS) 1109{ 1110 int tmpout, error = 0; 1111 1112 /* 1113 * Attempt to get a coherent snapshot by making a copy of the data. 1114 */ 1115 if (arg1) 1116 tmpout = *(int *)arg1; 1117 else 1118 tmpout = arg2; 1119 error = SYSCTL_OUT(req, &tmpout, sizeof(int)); 1120 1121 if (error || !req->newptr) 1122 return (error); 1123 1124 if (!arg1) 1125 error = EPERM; 1126 else 1127 error = SYSCTL_IN(req, arg1, sizeof(int)); 1128 return (error); 1129} 1130 1131/* 1132 * Based on on sysctl_handle_int() convert milliseconds into ticks. 1133 * Note: this is used by TCP. 1134 */ 1135 1136int 1137sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) 1138{ 1139 int error, s, tt; 1140 1141 tt = *(int *)arg1; 1142 s = (int)((int64_t)tt * 1000 / hz); 1143 1144 error = sysctl_handle_int(oidp, &s, 0, req); 1145 if (error || !req->newptr) 1146 return (error); 1147 1148 tt = (int)((int64_t)s * hz / 1000); 1149 if (tt < 1) 1150 return (EINVAL); 1151 1152 *(int *)arg1 = tt; 1153 return (0); 1154} 1155 1156 1157/* 1158 * Handle a long, signed or unsigned. 1159 * Two cases: 1160 * a variable: point arg1 at it. 1161 * a constant: pass it in arg2. 1162 */ 1163 1164int 1165sysctl_handle_long(SYSCTL_HANDLER_ARGS) 1166{ 1167 int error = 0; 1168 long tmplong; 1169#ifdef SCTL_MASK32 1170 int tmpint; 1171#endif 1172 1173 /* 1174 * Attempt to get a coherent snapshot by making a copy of the data. 1175 */ 1176 if (arg1) 1177 tmplong = *(long *)arg1; 1178 else 1179 tmplong = arg2; 1180#ifdef SCTL_MASK32 1181 if (req->flags & SCTL_MASK32) { 1182 tmpint = tmplong; 1183 error = SYSCTL_OUT(req, &tmpint, sizeof(int)); 1184 } else 1185#endif 1186 error = SYSCTL_OUT(req, &tmplong, sizeof(long)); 1187 1188 if (error || !req->newptr) 1189 return (error); 1190 1191 if (!arg1) 1192 error = EPERM; 1193#ifdef SCTL_MASK32 1194 else if (req->flags & SCTL_MASK32) { 1195 error = SYSCTL_IN(req, &tmpint, sizeof(int)); 1196 *(long *)arg1 = (long)tmpint; 1197 } 1198#endif 1199 else 1200 error = SYSCTL_IN(req, arg1, sizeof(long)); 1201 return (error); 1202} 1203 1204/* 1205 * Handle a 64 bit int, signed or unsigned. 1206 * Two cases: 1207 * a variable: point arg1 at it. 1208 * a constant: pass it in arg2. 1209 */ 1210int 1211sysctl_handle_64(SYSCTL_HANDLER_ARGS) 1212{ 1213 int error = 0; 1214 uint64_t tmpout; 1215 1216 /* 1217 * Attempt to get a coherent snapshot by making a copy of the data. 1218 */ 1219 if (arg1) 1220 tmpout = *(uint64_t *)arg1; 1221 else 1222 tmpout = arg2; 1223 error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t)); 1224 1225 if (error || !req->newptr) 1226 return (error); 1227 1228 if (!arg1) 1229 error = EPERM; 1230 else 1231 error = SYSCTL_IN(req, arg1, sizeof(uint64_t)); 1232 return (error); 1233} 1234 1235/* 1236 * Handle our generic '\0' terminated 'C' string. 1237 * Two cases: 1238 * a variable string: point arg1 at it, arg2 is max length. 1239 * a constant string: point arg1 at it, arg2 is zero. 1240 */ 1241 1242int 1243sysctl_handle_string(SYSCTL_HANDLER_ARGS) 1244{ 1245 size_t outlen; 1246 int error = 0, ro_string = 0; 1247 1248 /* 1249 * A zero-length buffer indicates a fixed size read-only 1250 * string: 1251 */ 1252 if (arg2 == 0) { 1253 arg2 = strlen((char *)arg1) + 1; 1254 ro_string = 1; 1255 } 1256 1257 if (req->oldptr != NULL) { 1258 char *tmparg; 1259 1260 if (ro_string) { 1261 tmparg = arg1; 1262 } else { 1263 /* try to make a coherent snapshot of the string */ 1264 tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK); 1265 memcpy(tmparg, arg1, arg2); 1266 } 1267 1268 outlen = strnlen(tmparg, arg2 - 1) + 1; 1269 error = SYSCTL_OUT(req, tmparg, outlen); 1270 1271 if (!ro_string) 1272 free(tmparg, M_SYSCTLTMP); 1273 } else { 1274 outlen = strnlen((char *)arg1, arg2 - 1) + 1; 1275 error = SYSCTL_OUT(req, NULL, outlen); 1276 } 1277 if (error || !req->newptr) 1278 return (error); 1279 1280 if ((req->newlen - req->newidx) >= arg2) { 1281 error = EINVAL; 1282 } else { 1283 arg2 = (req->newlen - req->newidx); 1284 error = SYSCTL_IN(req, arg1, arg2); 1285 ((char *)arg1)[arg2] = '\0'; 1286 } 1287 return (error); 1288} 1289 1290/* 1291 * Handle any kind of opaque data. 1292 * arg1 points to it, arg2 is the size. 1293 */ 1294 1295int 1296sysctl_handle_opaque(SYSCTL_HANDLER_ARGS) 1297{ 1298 int error, tries; 1299 u_int generation; 1300 struct sysctl_req req2; 1301 1302 /* 1303 * Attempt to get a coherent snapshot, by using the thread 1304 * pre-emption counter updated from within mi_switch() to 1305 * determine if we were pre-empted during a bcopy() or 1306 * copyout(). Make 3 attempts at doing this before giving up. 1307 * If we encounter an error, stop immediately. 1308 */ 1309 tries = 0; 1310 req2 = *req; 1311retry: 1312 generation = curthread->td_generation; 1313 error = SYSCTL_OUT(req, arg1, arg2); 1314 if (error) 1315 return (error); 1316 tries++; 1317 if (generation != curthread->td_generation && tries < 3) { 1318 *req = req2; 1319 goto retry; 1320 } 1321 1322 error = SYSCTL_IN(req, arg1, arg2); 1323 1324 return (error); 1325} 1326 1327/* 1328 * Transfer functions to/from kernel space. 1329 * XXX: rather untested at this point 1330 */ 1331static int 1332sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l) 1333{ 1334 size_t i = 0; 1335 1336 if (req->oldptr) { 1337 i = l; 1338 if (req->oldlen <= req->oldidx) 1339 i = 0; 1340 else 1341 if (i > req->oldlen - req->oldidx) 1342 i = req->oldlen - req->oldidx; 1343 if (i > 0) 1344 bcopy(p, (char *)req->oldptr + req->oldidx, i); 1345 } 1346 req->oldidx += l; 1347 if (req->oldptr && i != l) 1348 return (ENOMEM); 1349 return (0); 1350} 1351 1352static int 1353sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l) 1354{ 1355 if (!req->newptr) 1356 return (0); 1357 if (req->newlen - req->newidx < l) 1358 return (EINVAL); 1359 bcopy((char *)req->newptr + req->newidx, p, l); 1360 req->newidx += l; 1361 return (0); 1362} 1363 1364int 1365kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old, 1366 size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags) 1367{ 1368 int error = 0; 1369 struct sysctl_req req; 1370 1371 bzero(&req, sizeof req); 1372 1373 req.td = td; 1374 req.flags = flags; 1375 1376 if (oldlenp) { 1377 req.oldlen = *oldlenp; 1378 } 1379 req.validlen = req.oldlen; 1380 1381 if (old) { 1382 req.oldptr= old; 1383 } 1384 1385 if (new != NULL) { 1386 req.newlen = newlen; 1387 req.newptr = new; 1388 } 1389 1390 req.oldfunc = sysctl_old_kernel; 1391 req.newfunc = sysctl_new_kernel; 1392 req.lock = REQ_UNWIRED; 1393 1394 SYSCTL_SLOCK(); 1395 error = sysctl_root(0, name, namelen, &req); 1396 SYSCTL_SUNLOCK(); 1397 1398 if (req.lock == REQ_WIRED && req.validlen > 0) 1399 vsunlock(req.oldptr, req.validlen); 1400 1401 if (error && error != ENOMEM) 1402 return (error); 1403 1404 if (retval) { 1405 if (req.oldptr && req.oldidx > req.validlen) 1406 *retval = req.validlen; 1407 else 1408 *retval = req.oldidx; 1409 } 1410 return (error); 1411} 1412 1413int 1414kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp, 1415 void *new, size_t newlen, size_t *retval, int flags) 1416{ 1417 int oid[CTL_MAXNAME]; 1418 size_t oidlen, plen; 1419 int error; 1420 1421 oid[0] = 0; /* sysctl internal magic */ 1422 oid[1] = 3; /* name2oid */ 1423 oidlen = sizeof(oid); 1424 1425 error = kernel_sysctl(td, oid, 2, oid, &oidlen, 1426 (void *)name, strlen(name), &plen, flags); 1427 if (error) 1428 return (error); 1429 1430 error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp, 1431 new, newlen, retval, flags); 1432 return (error); 1433} 1434 1435/* 1436 * Transfer function to/from user space. 1437 */ 1438static int 1439sysctl_old_user(struct sysctl_req *req, const void *p, size_t l) 1440{ 1441 size_t i, len, origidx; 1442 int error; 1443 1444 origidx = req->oldidx; 1445 req->oldidx += l; 1446 if (req->oldptr == NULL) 1447 return (0); 1448 /* 1449 * If we have not wired the user supplied buffer and we are currently 1450 * holding locks, drop a witness warning, as it's possible that 1451 * write operations to the user page can sleep. 1452 */ 1453 if (req->lock != REQ_WIRED) 1454 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1455 "sysctl_old_user()"); 1456 i = l; 1457 len = req->validlen; 1458 if (len <= origidx) 1459 i = 0; 1460 else { 1461 if (i > len - origidx) 1462 i = len - origidx; 1463 if (req->lock == REQ_WIRED) { 1464 error = copyout_nofault(p, (char *)req->oldptr + 1465 origidx, i); 1466 } else 1467 error = copyout(p, (char *)req->oldptr + origidx, i); 1468 if (error != 0) 1469 return (error); 1470 } 1471 if (i < l) 1472 return (ENOMEM); 1473 return (0); 1474} 1475 1476static int 1477sysctl_new_user(struct sysctl_req *req, void *p, size_t l) 1478{ 1479 int error; 1480 1481 if (!req->newptr) 1482 return (0); 1483 if (req->newlen - req->newidx < l) 1484 return (EINVAL); 1485 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1486 "sysctl_new_user()"); 1487 error = copyin((char *)req->newptr + req->newidx, p, l); 1488 req->newidx += l; 1489 return (error); 1490} 1491 1492/* 1493 * Wire the user space destination buffer. If set to a value greater than 1494 * zero, the len parameter limits the maximum amount of wired memory. 1495 */ 1496int 1497sysctl_wire_old_buffer(struct sysctl_req *req, size_t len) 1498{ 1499 int ret; 1500 size_t wiredlen; 1501 1502 wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen; 1503 ret = 0; 1504 if (req->lock != REQ_WIRED && req->oldptr && 1505 req->oldfunc == sysctl_old_user) { 1506 if (wiredlen != 0) { 1507 ret = vslock(req->oldptr, wiredlen); 1508 if (ret != 0) { 1509 if (ret != ENOMEM) 1510 return (ret); 1511 wiredlen = 0; 1512 } 1513 } 1514 req->lock = REQ_WIRED; 1515 req->validlen = wiredlen; 1516 } 1517 return (0); 1518} 1519 1520int 1521sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, 1522 int *nindx, struct sysctl_req *req) 1523{ 1524 struct sysctl_oid_list *lsp; 1525 struct sysctl_oid *oid; 1526 int indx; 1527 1528 SYSCTL_ASSERT_LOCKED(); 1529 lsp = &sysctl__children; 1530 indx = 0; 1531 while (indx < CTL_MAXNAME) { 1532 SLIST_FOREACH(oid, lsp, oid_link) { 1533 if (oid->oid_number == name[indx]) 1534 break; 1535 } 1536 if (oid == NULL) 1537 return (ENOENT); 1538 1539 indx++; 1540 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1541 if (oid->oid_handler != NULL || indx == namelen) { 1542 *noid = oid; 1543 if (nindx != NULL) 1544 *nindx = indx; 1545 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, 1546 ("%s found DYING node %p", __func__, oid)); 1547 return (0); 1548 } 1549 lsp = SYSCTL_CHILDREN(oid); 1550 } else if (indx == namelen) { 1551 *noid = oid; 1552 if (nindx != NULL) 1553 *nindx = indx; 1554 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, 1555 ("%s found DYING node %p", __func__, oid)); 1556 return (0); 1557 } else { 1558 return (ENOTDIR); 1559 } 1560 } 1561 return (ENOENT); 1562} 1563 1564/* 1565 * Traverse our tree, and find the right node, execute whatever it points 1566 * to, and return the resulting error code. 1567 */ 1568 1569static int 1570sysctl_root(SYSCTL_HANDLER_ARGS) 1571{ 1572 struct sysctl_oid *oid; 1573 int error, indx, lvl; 1574 1575 SYSCTL_ASSERT_LOCKED(); 1576 1577 error = sysctl_find_oid(arg1, arg2, &oid, &indx, req); 1578 if (error) 1579 return (error); 1580 1581 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1582 /* 1583 * You can't call a sysctl when it's a node, but has 1584 * no handler. Inform the user that it's a node. 1585 * The indx may or may not be the same as namelen. 1586 */ 1587 if (oid->oid_handler == NULL) 1588 return (EISDIR); 1589 } 1590 1591 /* Is this sysctl writable? */ 1592 if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) 1593 return (EPERM); 1594 1595 KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL")); 1596 1597#ifdef CAPABILITY_MODE 1598 /* 1599 * If the process is in capability mode, then don't permit reading or 1600 * writing unless specifically granted for the node. 1601 */ 1602 if (IN_CAPABILITY_MODE(req->td)) { 1603 if (req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) 1604 return (EPERM); 1605 if (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR)) 1606 return (EPERM); 1607 } 1608#endif 1609 1610 /* Is this sysctl sensitive to securelevels? */ 1611 if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) { 1612 lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE; 1613 error = securelevel_gt(req->td->td_ucred, lvl); 1614 if (error) 1615 return (error); 1616 } 1617 1618 /* Is this sysctl writable by only privileged users? */ 1619 if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) { 1620 int priv; 1621 1622 if (oid->oid_kind & CTLFLAG_PRISON) 1623 priv = PRIV_SYSCTL_WRITEJAIL; 1624#ifdef VIMAGE 1625 else if ((oid->oid_kind & CTLFLAG_VNET) && 1626 prison_owns_vnet(req->td->td_ucred)) 1627 priv = PRIV_SYSCTL_WRITEJAIL; 1628#endif 1629 else 1630 priv = PRIV_SYSCTL_WRITE; 1631 error = priv_check(req->td, priv); 1632 if (error) 1633 return (error); 1634 } 1635 1636 if (!oid->oid_handler) 1637 return (EINVAL); 1638 1639 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1640 arg1 = (int *)arg1 + indx; 1641 arg2 -= indx; 1642 } else { 1643 arg1 = oid->oid_arg1; 1644 arg2 = oid->oid_arg2; 1645 } 1646#ifdef MAC 1647 error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2, 1648 req); 1649 if (error != 0) 1650 return (error); 1651#endif 1652#ifdef VIMAGE 1653 if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL) 1654 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 1655#endif 1656 error = sysctl_root_handler_locked(oid, arg1, arg2, req); 1657 1658 KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error); 1659 1660 return (error); 1661} 1662 1663#ifndef _SYS_SYSPROTO_H_ 1664struct sysctl_args { 1665 int *name; 1666 u_int namelen; 1667 void *old; 1668 size_t *oldlenp; 1669 void *new; 1670 size_t newlen; 1671}; 1672#endif 1673int 1674sys___sysctl(struct thread *td, struct sysctl_args *uap) 1675{ 1676 int error, i, name[CTL_MAXNAME]; 1677 size_t j; 1678 1679 if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) 1680 return (EINVAL); 1681 1682 error = copyin(uap->name, &name, uap->namelen * sizeof(int)); 1683 if (error) 1684 return (error); 1685 1686 error = userland_sysctl(td, name, uap->namelen, 1687 uap->old, uap->oldlenp, 0, 1688 uap->new, uap->newlen, &j, 0); 1689 if (error && error != ENOMEM) 1690 return (error); 1691 if (uap->oldlenp) { 1692 i = copyout(&j, uap->oldlenp, sizeof(j)); 1693 if (i) 1694 return (i); 1695 } 1696 return (error); 1697} 1698 1699/* 1700 * This is used from various compatibility syscalls too. That's why name 1701 * must be in kernel space. 1702 */ 1703int 1704userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, 1705 size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval, 1706 int flags) 1707{ 1708 int error = 0, memlocked; 1709 struct sysctl_req req; 1710 1711 bzero(&req, sizeof req); 1712 1713 req.td = td; 1714 req.flags = flags; 1715 1716 if (oldlenp) { 1717 if (inkernel) { 1718 req.oldlen = *oldlenp; 1719 } else { 1720 error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp)); 1721 if (error) 1722 return (error); 1723 } 1724 } 1725 req.validlen = req.oldlen; 1726 1727 if (old) { 1728 if (!useracc(old, req.oldlen, VM_PROT_WRITE)) 1729 return (EFAULT); 1730 req.oldptr= old; 1731 } 1732 1733 if (new != NULL) { 1734 if (!useracc(new, newlen, VM_PROT_READ)) 1735 return (EFAULT); 1736 req.newlen = newlen; 1737 req.newptr = new; 1738 } 1739 1740 req.oldfunc = sysctl_old_user; 1741 req.newfunc = sysctl_new_user; 1742 req.lock = REQ_UNWIRED; 1743 1744#ifdef KTRACE 1745 if (KTRPOINT(curthread, KTR_SYSCTL)) 1746 ktrsysctl(name, namelen); 1747#endif 1748 1749 if (req.oldlen > PAGE_SIZE) { 1750 memlocked = 1; 1751 sx_xlock(&sysctlmemlock); 1752 } else 1753 memlocked = 0; 1754 CURVNET_SET(TD_TO_VNET(td)); 1755 1756 for (;;) { 1757 req.oldidx = 0; 1758 req.newidx = 0; 1759 SYSCTL_SLOCK(); 1760 error = sysctl_root(0, name, namelen, &req); 1761 SYSCTL_SUNLOCK(); 1762 if (error != EAGAIN) 1763 break; 1764 kern_yield(PRI_USER); 1765 } 1766 1767 CURVNET_RESTORE(); 1768 1769 if (req.lock == REQ_WIRED && req.validlen > 0) 1770 vsunlock(req.oldptr, req.validlen); 1771 if (memlocked) 1772 sx_xunlock(&sysctlmemlock); 1773 1774 if (error && error != ENOMEM) 1775 return (error); 1776 1777 if (retval) { 1778 if (req.oldptr && req.oldidx > req.validlen) 1779 *retval = req.validlen; 1780 else 1781 *retval = req.oldidx; 1782 } 1783 return (error); 1784} 1785 1786/* 1787 * Drain into a sysctl struct. The user buffer should be wired if a page 1788 * fault would cause issue. 1789 */ 1790static int 1791sbuf_sysctl_drain(void *arg, const char *data, int len) 1792{ 1793 struct sysctl_req *req = arg; 1794 int error; 1795 1796 error = SYSCTL_OUT(req, data, len); 1797 KASSERT(error >= 0, ("Got unexpected negative value %d", error)); 1798 return (error == 0 ? len : -error); 1799} 1800 1801struct sbuf * 1802sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length, 1803 struct sysctl_req *req) 1804{ 1805 1806 s = sbuf_new(s, buf, length, SBUF_FIXEDLEN); 1807 sbuf_set_drain(s, sbuf_sysctl_drain, req); 1808 return (s); 1809} 1810