kern_sysctl.c revision 267961
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Karels at Berkeley Software Design, Inc. 7 * 8 * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD 9 * project, to make these variables more userfriendly. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 36 */ 37 38#include <sys/cdefs.h> 39__FBSDID("$FreeBSD: head/sys/kern/kern_sysctl.c 267961 2014-06-27 16:33:43Z hselasky $"); 40 41#include "opt_capsicum.h" 42#include "opt_compat.h" 43#include "opt_ktrace.h" 44 45#include <sys/param.h> 46#include <sys/fail.h> 47#include <sys/systm.h> 48#include <sys/capsicum.h> 49#include <sys/kernel.h> 50#include <sys/sysctl.h> 51#include <sys/malloc.h> 52#include <sys/priv.h> 53#include <sys/proc.h> 54#include <sys/jail.h> 55#include <sys/lock.h> 56#include <sys/mutex.h> 57#include <sys/sbuf.h> 58#include <sys/sx.h> 59#include <sys/sysproto.h> 60#include <sys/uio.h> 61#ifdef KTRACE 62#include <sys/ktrace.h> 63#endif 64 65#include <net/vnet.h> 66 67#include <security/mac/mac_framework.h> 68 69#include <vm/vm.h> 70#include <vm/vm_extern.h> 71 72static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic"); 73static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids"); 74static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer"); 75 76/* 77 * The sysctllock protects the MIB tree. It also protects sysctl 78 * contexts used with dynamic sysctls. The sysctl_register_oid() and 79 * sysctl_unregister_oid() routines require the sysctllock to already 80 * be held, so the sysctl_lock() and sysctl_unlock() routines are 81 * provided for the few places in the kernel which need to use that 82 * API rather than using the dynamic API. Use of the dynamic API is 83 * strongly encouraged for most code. 84 * 85 * The sysctlmemlock is used to limit the amount of user memory wired for 86 * sysctl requests. This is implemented by serializing any userland 87 * sysctl requests larger than a single page via an exclusive lock. 88 */ 89static struct sx sysctllock; 90static struct sx sysctlmemlock; 91 92#define SYSCTL_XLOCK() sx_xlock(&sysctllock) 93#define SYSCTL_XUNLOCK() sx_xunlock(&sysctllock) 94#define SYSCTL_ASSERT_XLOCKED() sx_assert(&sysctllock, SA_XLOCKED) 95#define SYSCTL_INIT() sx_init(&sysctllock, "sysctl lock") 96#define SYSCTL_SLEEP(ch, wmesg, timo) \ 97 sx_sleep(ch, &sysctllock, 0, wmesg, timo) 98 99static int sysctl_root(SYSCTL_HANDLER_ARGS); 100 101/* Root list */ 102struct sysctl_oid_list sysctl__children = SLIST_HEAD_INITIALIZER(&sysctl__children); 103 104static int sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, 105 int recurse); 106static int sysctl_old_kernel(struct sysctl_req *, const void *, size_t); 107static int sysctl_new_kernel(struct sysctl_req *, void *, size_t); 108 109static struct sysctl_oid * 110sysctl_find_oidname(const char *name, struct sysctl_oid_list *list) 111{ 112 struct sysctl_oid *oidp; 113 114 SYSCTL_ASSERT_XLOCKED(); 115 SLIST_FOREACH(oidp, list, oid_link) { 116 if (strcmp(oidp->oid_name, name) == 0) { 117 return (oidp); 118 } 119 } 120 return (NULL); 121} 122 123/* 124 * Initialization of the MIB tree. 125 * 126 * Order by number in each list. 127 */ 128void 129sysctl_lock(void) 130{ 131 132 SYSCTL_XLOCK(); 133} 134 135void 136sysctl_unlock(void) 137{ 138 139 SYSCTL_XUNLOCK(); 140} 141 142static int 143sysctl_root_handler_locked(struct sysctl_oid *oid, void *arg1, intptr_t arg2, 144 struct sysctl_req *req) 145{ 146 int error; 147 148 oid->oid_running++; 149 SYSCTL_XUNLOCK(); 150 151 if (!(oid->oid_kind & CTLFLAG_MPSAFE)) 152 mtx_lock(&Giant); 153 error = oid->oid_handler(oid, arg1, arg2, req); 154 if (!(oid->oid_kind & CTLFLAG_MPSAFE)) 155 mtx_unlock(&Giant); 156 157 SYSCTL_XLOCK(); 158 oid->oid_running--; 159 if (oid->oid_running == 0 && (oid->oid_kind & CTLFLAG_DYING) != 0) 160 wakeup(&oid->oid_running); 161 162 return (error); 163} 164 165static void 166sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp) 167{ 168 struct sysctl_req req; 169 struct sysctl_oid *curr; 170 char *penv; 171 char path[64]; 172 ssize_t rem = sizeof(path); 173 ssize_t len; 174 int val_int; 175 long val_long; 176 int64_t val_64; 177 int error; 178 179 path[--rem] = 0; 180 181 for (curr = oidp; curr != NULL; curr = SYSCTL_PARENT(curr)) { 182 len = strlen(curr->oid_name); 183 rem -= len; 184 if (curr != oidp) 185 rem -= 1; 186 if (rem < 0) { 187 printf("OID path exceeds %d bytes\n", (int)sizeof(path)); 188 return; 189 } 190 memcpy(path + rem, curr->oid_name, len); 191 if (curr != oidp) 192 path[rem + len] = '.'; 193 } 194 195 penv = getenv(path + rem); 196 if (penv == NULL) 197 return; 198 199 memset(&req, 0, sizeof(req)); 200 201 req.td = curthread; 202 req.oldfunc = sysctl_old_kernel; 203 req.newfunc = sysctl_new_kernel; 204 req.lock = REQ_UNWIRED; 205 206 switch (oidp->oid_kind & CTLTYPE) { 207 case CTLTYPE_INT: 208 val_int = strtoq(penv, NULL, 0); 209 req.newlen = sizeof(val_int); 210 req.newptr = &val_int; 211 break; 212 case CTLTYPE_UINT: 213 val_int = strtouq(penv, NULL, 0); 214 req.newlen = sizeof(val_int); 215 req.newptr = &val_int; 216 break; 217 case CTLTYPE_LONG: 218 val_long = strtoq(penv, NULL, 0); 219 req.newlen = sizeof(val_long); 220 req.newptr = &val_long; 221 break; 222 case CTLTYPE_ULONG: 223 val_long = strtouq(penv, NULL, 0); 224 req.newlen = sizeof(val_long); 225 req.newptr = &val_long; 226 break; 227 case CTLTYPE_S64: 228 val_64 = strtoq(penv, NULL, 0); 229 req.newlen = sizeof(val_64); 230 req.newptr = &val_64; 231 break; 232 case CTLTYPE_U64: 233 val_64 = strtouq(penv, NULL, 0); 234 req.newlen = sizeof(val_64); 235 req.newptr = &val_64; 236 break; 237 case CTLTYPE_STRING: 238 req.newlen = strlen(penv); 239 req.newptr = penv; 240 break; 241 default: 242 freeenv(penv); 243 return; 244 } 245 error = sysctl_root_handler_locked(oidp, oidp->oid_arg1, 246 oidp->oid_arg2, &req); 247 if (error != 0) { 248 printf("Setting sysctl '%s' to '%s' failed: %d\n", 249 path, penv, error); 250 } 251 freeenv(penv); 252} 253 254void 255sysctl_register_oid(struct sysctl_oid *oidp) 256{ 257 struct sysctl_oid_list *parent = oidp->oid_parent; 258 struct sysctl_oid *p; 259 struct sysctl_oid *q; 260 261 /* 262 * First check if another oid with the same name already 263 * exists in the parent's list. 264 */ 265 SYSCTL_ASSERT_XLOCKED(); 266 p = sysctl_find_oidname(oidp->oid_name, parent); 267 if (p != NULL) { 268 if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 269 p->oid_refcnt++; 270 return; 271 } else { 272 printf("can't re-use a leaf (%s)!\n", p->oid_name); 273 return; 274 } 275 } 276 /* 277 * If this oid has a number OID_AUTO, give it a number which 278 * is greater than any current oid. 279 * NOTE: DO NOT change the starting value here, change it in 280 * <sys/sysctl.h>, and make sure it is at least 256 to 281 * accomodate e.g. net.inet.raw as a static sysctl node. 282 */ 283 if (oidp->oid_number == OID_AUTO) { 284 static int newoid = CTL_AUTO_START; 285 286 oidp->oid_number = newoid++; 287 if (newoid == 0x7fffffff) 288 panic("out of oids"); 289 } 290#if 0 291 else if (oidp->oid_number >= CTL_AUTO_START) { 292 /* do not panic; this happens when unregistering sysctl sets */ 293 printf("static sysctl oid too high: %d", oidp->oid_number); 294 } 295#endif 296 297 /* 298 * Insert the oid into the parent's list in order. 299 */ 300 q = NULL; 301 SLIST_FOREACH(p, parent, oid_link) { 302 if (oidp->oid_number < p->oid_number) 303 break; 304 q = p; 305 } 306 if (q) 307 SLIST_INSERT_AFTER(q, oidp, oid_link); 308 else 309 SLIST_INSERT_HEAD(parent, oidp, oid_link); 310 311 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE && 312#ifdef VIMAGE 313 (oidp->oid_kind & CTLFLAG_VNET) == 0 && 314#endif 315 (oidp->oid_kind & CTLFLAG_TUN) != 0 && 316 (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) { 317 sysctl_load_tunable_by_oid_locked(oidp); 318 } 319} 320 321void 322sysctl_unregister_oid(struct sysctl_oid *oidp) 323{ 324 struct sysctl_oid *p; 325 int error; 326 327 SYSCTL_ASSERT_XLOCKED(); 328 error = ENOENT; 329 if (oidp->oid_number == OID_AUTO) { 330 error = EINVAL; 331 } else { 332 SLIST_FOREACH(p, oidp->oid_parent, oid_link) { 333 if (p == oidp) { 334 SLIST_REMOVE(oidp->oid_parent, oidp, 335 sysctl_oid, oid_link); 336 error = 0; 337 break; 338 } 339 } 340 } 341 342 /* 343 * This can happen when a module fails to register and is 344 * being unloaded afterwards. It should not be a panic() 345 * for normal use. 346 */ 347 if (error) 348 printf("%s: failed to unregister sysctl\n", __func__); 349} 350 351/* Initialize a new context to keep track of dynamically added sysctls. */ 352int 353sysctl_ctx_init(struct sysctl_ctx_list *c) 354{ 355 356 if (c == NULL) { 357 return (EINVAL); 358 } 359 360 /* 361 * No locking here, the caller is responsible for not adding 362 * new nodes to a context until after this function has 363 * returned. 364 */ 365 TAILQ_INIT(c); 366 return (0); 367} 368 369/* Free the context, and destroy all dynamic oids registered in this context */ 370int 371sysctl_ctx_free(struct sysctl_ctx_list *clist) 372{ 373 struct sysctl_ctx_entry *e, *e1; 374 int error; 375 376 error = 0; 377 /* 378 * First perform a "dry run" to check if it's ok to remove oids. 379 * XXX FIXME 380 * XXX This algorithm is a hack. But I don't know any 381 * XXX better solution for now... 382 */ 383 SYSCTL_XLOCK(); 384 TAILQ_FOREACH(e, clist, link) { 385 error = sysctl_remove_oid_locked(e->entry, 0, 0); 386 if (error) 387 break; 388 } 389 /* 390 * Restore deregistered entries, either from the end, 391 * or from the place where error occured. 392 * e contains the entry that was not unregistered 393 */ 394 if (error) 395 e1 = TAILQ_PREV(e, sysctl_ctx_list, link); 396 else 397 e1 = TAILQ_LAST(clist, sysctl_ctx_list); 398 while (e1 != NULL) { 399 sysctl_register_oid(e1->entry); 400 e1 = TAILQ_PREV(e1, sysctl_ctx_list, link); 401 } 402 if (error) { 403 SYSCTL_XUNLOCK(); 404 return(EBUSY); 405 } 406 /* Now really delete the entries */ 407 e = TAILQ_FIRST(clist); 408 while (e != NULL) { 409 e1 = TAILQ_NEXT(e, link); 410 error = sysctl_remove_oid_locked(e->entry, 1, 0); 411 if (error) 412 panic("sysctl_remove_oid: corrupt tree, entry: %s", 413 e->entry->oid_name); 414 free(e, M_SYSCTLOID); 415 e = e1; 416 } 417 SYSCTL_XUNLOCK(); 418 return (error); 419} 420 421/* Add an entry to the context */ 422struct sysctl_ctx_entry * 423sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 424{ 425 struct sysctl_ctx_entry *e; 426 427 SYSCTL_ASSERT_XLOCKED(); 428 if (clist == NULL || oidp == NULL) 429 return(NULL); 430 e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK); 431 e->entry = oidp; 432 TAILQ_INSERT_HEAD(clist, e, link); 433 return (e); 434} 435 436/* Find an entry in the context */ 437struct sysctl_ctx_entry * 438sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 439{ 440 struct sysctl_ctx_entry *e; 441 442 SYSCTL_ASSERT_XLOCKED(); 443 if (clist == NULL || oidp == NULL) 444 return(NULL); 445 TAILQ_FOREACH(e, clist, link) { 446 if(e->entry == oidp) 447 return(e); 448 } 449 return (e); 450} 451 452/* 453 * Delete an entry from the context. 454 * NOTE: this function doesn't free oidp! You have to remove it 455 * with sysctl_remove_oid(). 456 */ 457int 458sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 459{ 460 struct sysctl_ctx_entry *e; 461 462 if (clist == NULL || oidp == NULL) 463 return (EINVAL); 464 SYSCTL_XLOCK(); 465 e = sysctl_ctx_entry_find(clist, oidp); 466 if (e != NULL) { 467 TAILQ_REMOVE(clist, e, link); 468 SYSCTL_XUNLOCK(); 469 free(e, M_SYSCTLOID); 470 return (0); 471 } else { 472 SYSCTL_XUNLOCK(); 473 return (ENOENT); 474 } 475} 476 477/* 478 * Remove dynamically created sysctl trees. 479 * oidp - top of the tree to be removed 480 * del - if 0 - just deregister, otherwise free up entries as well 481 * recurse - if != 0 traverse the subtree to be deleted 482 */ 483int 484sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse) 485{ 486 int error; 487 488 SYSCTL_XLOCK(); 489 error = sysctl_remove_oid_locked(oidp, del, recurse); 490 SYSCTL_XUNLOCK(); 491 return (error); 492} 493 494int 495sysctl_remove_name(struct sysctl_oid *parent, const char *name, 496 int del, int recurse) 497{ 498 struct sysctl_oid *p, *tmp; 499 int error; 500 501 error = ENOENT; 502 SYSCTL_XLOCK(); 503 SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) { 504 if (strcmp(p->oid_name, name) == 0) { 505 error = sysctl_remove_oid_locked(p, del, recurse); 506 break; 507 } 508 } 509 SYSCTL_XUNLOCK(); 510 511 return (error); 512} 513 514 515static int 516sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) 517{ 518 struct sysctl_oid *p, *tmp; 519 int error; 520 521 SYSCTL_ASSERT_XLOCKED(); 522 if (oidp == NULL) 523 return(EINVAL); 524 if ((oidp->oid_kind & CTLFLAG_DYN) == 0) { 525 printf("can't remove non-dynamic nodes!\n"); 526 return (EINVAL); 527 } 528 /* 529 * WARNING: normal method to do this should be through 530 * sysctl_ctx_free(). Use recursing as the last resort 531 * method to purge your sysctl tree of leftovers... 532 * However, if some other code still references these nodes, 533 * it will panic. 534 */ 535 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 536 if (oidp->oid_refcnt == 1) { 537 SLIST_FOREACH_SAFE(p, 538 SYSCTL_CHILDREN(oidp), oid_link, tmp) { 539 if (!recurse) { 540 printf("Warning: failed attempt to " 541 "remove oid %s with child %s\n", 542 oidp->oid_name, p->oid_name); 543 return (ENOTEMPTY); 544 } 545 error = sysctl_remove_oid_locked(p, del, 546 recurse); 547 if (error) 548 return (error); 549 } 550 } 551 } 552 if (oidp->oid_refcnt > 1 ) { 553 oidp->oid_refcnt--; 554 } else { 555 if (oidp->oid_refcnt == 0) { 556 printf("Warning: bad oid_refcnt=%u (%s)!\n", 557 oidp->oid_refcnt, oidp->oid_name); 558 return (EINVAL); 559 } 560 sysctl_unregister_oid(oidp); 561 if (del) { 562 /* 563 * Wait for all threads running the handler to drain. 564 * This preserves the previous behavior when the 565 * sysctl lock was held across a handler invocation, 566 * and is necessary for module unload correctness. 567 */ 568 while (oidp->oid_running > 0) { 569 oidp->oid_kind |= CTLFLAG_DYING; 570 SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0); 571 } 572 if (oidp->oid_descr) 573 free(__DECONST(char *, oidp->oid_descr), 574 M_SYSCTLOID); 575 free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID); 576 free(oidp, M_SYSCTLOID); 577 } 578 } 579 return (0); 580} 581/* 582 * Create new sysctls at run time. 583 * clist may point to a valid context initialized with sysctl_ctx_init(). 584 */ 585struct sysctl_oid * 586sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, 587 int number, const char *name, int kind, void *arg1, intptr_t arg2, 588 int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr) 589{ 590 struct sysctl_oid *oidp; 591 592 /* You have to hook up somewhere.. */ 593 if (parent == NULL) 594 return(NULL); 595 /* Check if the node already exists, otherwise create it */ 596 SYSCTL_XLOCK(); 597 oidp = sysctl_find_oidname(name, parent); 598 if (oidp != NULL) { 599 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 600 oidp->oid_refcnt++; 601 /* Update the context */ 602 if (clist != NULL) 603 sysctl_ctx_entry_add(clist, oidp); 604 SYSCTL_XUNLOCK(); 605 return (oidp); 606 } else { 607 SYSCTL_XUNLOCK(); 608 printf("can't re-use a leaf (%s)!\n", name); 609 return (NULL); 610 } 611 } 612 oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO); 613 oidp->oid_parent = parent; 614 SLIST_INIT(&oidp->oid_children); 615 oidp->oid_number = number; 616 oidp->oid_refcnt = 1; 617 oidp->oid_name = strdup(name, M_SYSCTLOID); 618 oidp->oid_handler = handler; 619 oidp->oid_kind = CTLFLAG_DYN | kind; 620 oidp->oid_arg1 = arg1; 621 oidp->oid_arg2 = arg2; 622 oidp->oid_fmt = fmt; 623 if (descr != NULL) 624 oidp->oid_descr = strdup(descr, M_SYSCTLOID); 625 /* Update the context, if used */ 626 if (clist != NULL) 627 sysctl_ctx_entry_add(clist, oidp); 628 /* Register this oid */ 629 sysctl_register_oid(oidp); 630 SYSCTL_XUNLOCK(); 631 return (oidp); 632} 633 634/* 635 * Rename an existing oid. 636 */ 637void 638sysctl_rename_oid(struct sysctl_oid *oidp, const char *name) 639{ 640 char *newname; 641 char *oldname; 642 643 newname = strdup(name, M_SYSCTLOID); 644 SYSCTL_XLOCK(); 645 oldname = __DECONST(char *, oidp->oid_name); 646 oidp->oid_name = newname; 647 SYSCTL_XUNLOCK(); 648 free(oldname, M_SYSCTLOID); 649} 650 651/* 652 * Reparent an existing oid. 653 */ 654int 655sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent) 656{ 657 struct sysctl_oid *oidp; 658 659 SYSCTL_XLOCK(); 660 if (oid->oid_parent == parent) { 661 SYSCTL_XUNLOCK(); 662 return (0); 663 } 664 oidp = sysctl_find_oidname(oid->oid_name, parent); 665 if (oidp != NULL) { 666 SYSCTL_XUNLOCK(); 667 return (EEXIST); 668 } 669 sysctl_unregister_oid(oid); 670 oid->oid_parent = parent; 671 oid->oid_number = OID_AUTO; 672 sysctl_register_oid(oid); 673 SYSCTL_XUNLOCK(); 674 return (0); 675} 676 677/* 678 * Register the kernel's oids on startup. 679 */ 680SET_DECLARE(sysctl_set, struct sysctl_oid); 681 682static void 683sysctl_register_all(void *arg) 684{ 685 struct sysctl_oid **oidp; 686 687 sx_init(&sysctlmemlock, "sysctl mem"); 688 SYSCTL_INIT(); 689 SYSCTL_XLOCK(); 690 SET_FOREACH(oidp, sysctl_set) 691 sysctl_register_oid(*oidp); 692 SYSCTL_XUNLOCK(); 693} 694SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, 0); 695 696/* 697 * "Staff-functions" 698 * 699 * These functions implement a presently undocumented interface 700 * used by the sysctl program to walk the tree, and get the type 701 * so it can print the value. 702 * This interface is under work and consideration, and should probably 703 * be killed with a big axe by the first person who can find the time. 704 * (be aware though, that the proper interface isn't as obvious as it 705 * may seem, there are various conflicting requirements. 706 * 707 * {0,0} printf the entire MIB-tree. 708 * {0,1,...} return the name of the "..." OID. 709 * {0,2,...} return the next OID. 710 * {0,3} return the OID of the name in "new" 711 * {0,4,...} return the kind & format info for the "..." OID. 712 * {0,5,...} return the description the "..." OID. 713 */ 714 715#ifdef SYSCTL_DEBUG 716static void 717sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i) 718{ 719 int k; 720 struct sysctl_oid *oidp; 721 722 SYSCTL_ASSERT_XLOCKED(); 723 SLIST_FOREACH(oidp, l, oid_link) { 724 725 for (k=0; k<i; k++) 726 printf(" "); 727 728 printf("%d %s ", oidp->oid_number, oidp->oid_name); 729 730 printf("%c%c", 731 oidp->oid_kind & CTLFLAG_RD ? 'R':' ', 732 oidp->oid_kind & CTLFLAG_WR ? 'W':' '); 733 734 if (oidp->oid_handler) 735 printf(" *Handler"); 736 737 switch (oidp->oid_kind & CTLTYPE) { 738 case CTLTYPE_NODE: 739 printf(" Node\n"); 740 if (!oidp->oid_handler) { 741 sysctl_sysctl_debug_dump_node( 742 SYSCTL_CHILDREN(oidp), i + 2); 743 } 744 break; 745 case CTLTYPE_INT: printf(" Int\n"); break; 746 case CTLTYPE_UINT: printf(" u_int\n"); break; 747 case CTLTYPE_LONG: printf(" Long\n"); break; 748 case CTLTYPE_ULONG: printf(" u_long\n"); break; 749 case CTLTYPE_STRING: printf(" String\n"); break; 750 case CTLTYPE_U64: printf(" uint64_t\n"); break; 751 case CTLTYPE_S64: printf(" int64_t\n"); break; 752 case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break; 753 default: printf("\n"); 754 } 755 756 } 757} 758 759static int 760sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS) 761{ 762 int error; 763 764 error = priv_check(req->td, PRIV_SYSCTL_DEBUG); 765 if (error) 766 return (error); 767 SYSCTL_XLOCK(); 768 sysctl_sysctl_debug_dump_node(&sysctl__children, 0); 769 SYSCTL_XUNLOCK(); 770 return (ENOENT); 771} 772 773SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD, 774 0, 0, sysctl_sysctl_debug, "-", ""); 775#endif 776 777static int 778sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) 779{ 780 int *name = (int *) arg1; 781 u_int namelen = arg2; 782 int error = 0; 783 struct sysctl_oid *oid; 784 struct sysctl_oid_list *lsp = &sysctl__children, *lsp2; 785 char buf[10]; 786 787 SYSCTL_XLOCK(); 788 while (namelen) { 789 if (!lsp) { 790 snprintf(buf,sizeof(buf),"%d",*name); 791 if (req->oldidx) 792 error = SYSCTL_OUT(req, ".", 1); 793 if (!error) 794 error = SYSCTL_OUT(req, buf, strlen(buf)); 795 if (error) 796 goto out; 797 namelen--; 798 name++; 799 continue; 800 } 801 lsp2 = 0; 802 SLIST_FOREACH(oid, lsp, oid_link) { 803 if (oid->oid_number != *name) 804 continue; 805 806 if (req->oldidx) 807 error = SYSCTL_OUT(req, ".", 1); 808 if (!error) 809 error = SYSCTL_OUT(req, oid->oid_name, 810 strlen(oid->oid_name)); 811 if (error) 812 goto out; 813 814 namelen--; 815 name++; 816 817 if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) 818 break; 819 820 if (oid->oid_handler) 821 break; 822 823 lsp2 = SYSCTL_CHILDREN(oid); 824 break; 825 } 826 lsp = lsp2; 827 } 828 error = SYSCTL_OUT(req, "", 1); 829 out: 830 SYSCTL_XUNLOCK(); 831 return (error); 832} 833 834/* 835 * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in 836 * capability mode. 837 */ 838static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_CAPRD, 839 sysctl_sysctl_name, ""); 840 841static int 842sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, 843 int *next, int *len, int level, struct sysctl_oid **oidpp) 844{ 845 struct sysctl_oid *oidp; 846 847 SYSCTL_ASSERT_XLOCKED(); 848 *len = level; 849 SLIST_FOREACH(oidp, lsp, oid_link) { 850 *next = oidp->oid_number; 851 *oidpp = oidp; 852 853 if (oidp->oid_kind & CTLFLAG_SKIP) 854 continue; 855 856 if (!namelen) { 857 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 858 return (0); 859 if (oidp->oid_handler) 860 /* We really should call the handler here...*/ 861 return (0); 862 lsp = SYSCTL_CHILDREN(oidp); 863 if (!sysctl_sysctl_next_ls(lsp, 0, 0, next+1, 864 len, level+1, oidpp)) 865 return (0); 866 goto emptynode; 867 } 868 869 if (oidp->oid_number < *name) 870 continue; 871 872 if (oidp->oid_number > *name) { 873 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 874 return (0); 875 if (oidp->oid_handler) 876 return (0); 877 lsp = SYSCTL_CHILDREN(oidp); 878 if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, 879 next+1, len, level+1, oidpp)) 880 return (0); 881 goto next; 882 } 883 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 884 continue; 885 886 if (oidp->oid_handler) 887 continue; 888 889 lsp = SYSCTL_CHILDREN(oidp); 890 if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1, 891 len, level+1, oidpp)) 892 return (0); 893 next: 894 namelen = 1; 895 emptynode: 896 *len = level; 897 } 898 return (1); 899} 900 901static int 902sysctl_sysctl_next(SYSCTL_HANDLER_ARGS) 903{ 904 int *name = (int *) arg1; 905 u_int namelen = arg2; 906 int i, j, error; 907 struct sysctl_oid *oid; 908 struct sysctl_oid_list *lsp = &sysctl__children; 909 int newoid[CTL_MAXNAME]; 910 911 SYSCTL_XLOCK(); 912 i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid); 913 SYSCTL_XUNLOCK(); 914 if (i) 915 return (ENOENT); 916 error = SYSCTL_OUT(req, newoid, j * sizeof (int)); 917 return (error); 918} 919 920/* 921 * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in 922 * capability mode. 923 */ 924static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_CAPRD, 925 sysctl_sysctl_next, ""); 926 927static int 928name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) 929{ 930 struct sysctl_oid *oidp; 931 struct sysctl_oid_list *lsp = &sysctl__children; 932 char *p; 933 934 SYSCTL_ASSERT_XLOCKED(); 935 936 for (*len = 0; *len < CTL_MAXNAME;) { 937 p = strsep(&name, "."); 938 939 oidp = SLIST_FIRST(lsp); 940 for (;; oidp = SLIST_NEXT(oidp, oid_link)) { 941 if (oidp == NULL) 942 return (ENOENT); 943 if (strcmp(p, oidp->oid_name) == 0) 944 break; 945 } 946 *oid++ = oidp->oid_number; 947 (*len)++; 948 949 if (name == NULL || *name == '\0') { 950 if (oidpp) 951 *oidpp = oidp; 952 return (0); 953 } 954 955 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 956 break; 957 958 if (oidp->oid_handler) 959 break; 960 961 lsp = SYSCTL_CHILDREN(oidp); 962 } 963 return (ENOENT); 964} 965 966static int 967sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) 968{ 969 char *p; 970 int error, oid[CTL_MAXNAME], len = 0; 971 struct sysctl_oid *op = 0; 972 973 if (!req->newlen) 974 return (ENOENT); 975 if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */ 976 return (ENAMETOOLONG); 977 978 p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK); 979 980 error = SYSCTL_IN(req, p, req->newlen); 981 if (error) { 982 free(p, M_SYSCTL); 983 return (error); 984 } 985 986 p [req->newlen] = '\0'; 987 988 SYSCTL_XLOCK(); 989 error = name2oid(p, oid, &len, &op); 990 SYSCTL_XUNLOCK(); 991 992 free(p, M_SYSCTL); 993 994 if (error) 995 return (error); 996 997 error = SYSCTL_OUT(req, oid, len * sizeof *oid); 998 return (error); 999} 1000 1001/* 1002 * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in 1003 * capability mode. 1004 */ 1005SYSCTL_PROC(_sysctl, 3, name2oid, 1006 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE 1007 | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", ""); 1008 1009static int 1010sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) 1011{ 1012 struct sysctl_oid *oid; 1013 int error; 1014 1015 SYSCTL_XLOCK(); 1016 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); 1017 if (error) 1018 goto out; 1019 1020 if (oid->oid_fmt == NULL) { 1021 error = ENOENT; 1022 goto out; 1023 } 1024 error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind)); 1025 if (error) 1026 goto out; 1027 error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1); 1028 out: 1029 SYSCTL_XUNLOCK(); 1030 return (error); 1031} 1032 1033 1034static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, 1035 sysctl_sysctl_oidfmt, ""); 1036 1037static int 1038sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS) 1039{ 1040 struct sysctl_oid *oid; 1041 int error; 1042 1043 SYSCTL_XLOCK(); 1044 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); 1045 if (error) 1046 goto out; 1047 1048 if (oid->oid_descr == NULL) { 1049 error = ENOENT; 1050 goto out; 1051 } 1052 error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1); 1053 out: 1054 SYSCTL_XUNLOCK(); 1055 return (error); 1056} 1057 1058static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_CAPRD, 1059 sysctl_sysctl_oiddescr, ""); 1060 1061/* 1062 * Default "handler" functions. 1063 */ 1064 1065/* 1066 * Handle an int, signed or unsigned. 1067 * Two cases: 1068 * a variable: point arg1 at it. 1069 * a constant: pass it in arg2. 1070 */ 1071 1072int 1073sysctl_handle_int(SYSCTL_HANDLER_ARGS) 1074{ 1075 int tmpout, error = 0; 1076 1077 /* 1078 * Attempt to get a coherent snapshot by making a copy of the data. 1079 */ 1080 if (arg1) 1081 tmpout = *(int *)arg1; 1082 else 1083 tmpout = arg2; 1084 error = SYSCTL_OUT(req, &tmpout, sizeof(int)); 1085 1086 if (error || !req->newptr) 1087 return (error); 1088 1089 if (!arg1) 1090 error = EPERM; 1091 else 1092 error = SYSCTL_IN(req, arg1, sizeof(int)); 1093 return (error); 1094} 1095 1096/* 1097 * Based on on sysctl_handle_int() convert milliseconds into ticks. 1098 * Note: this is used by TCP. 1099 */ 1100 1101int 1102sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) 1103{ 1104 int error, s, tt; 1105 1106 tt = *(int *)arg1; 1107 s = (int)((int64_t)tt * 1000 / hz); 1108 1109 error = sysctl_handle_int(oidp, &s, 0, req); 1110 if (error || !req->newptr) 1111 return (error); 1112 1113 tt = (int)((int64_t)s * hz / 1000); 1114 if (tt < 1) 1115 return (EINVAL); 1116 1117 *(int *)arg1 = tt; 1118 return (0); 1119} 1120 1121 1122/* 1123 * Handle a long, signed or unsigned. 1124 * Two cases: 1125 * a variable: point arg1 at it. 1126 * a constant: pass it in arg2. 1127 */ 1128 1129int 1130sysctl_handle_long(SYSCTL_HANDLER_ARGS) 1131{ 1132 int error = 0; 1133 long tmplong; 1134#ifdef SCTL_MASK32 1135 int tmpint; 1136#endif 1137 1138 /* 1139 * Attempt to get a coherent snapshot by making a copy of the data. 1140 */ 1141 if (arg1) 1142 tmplong = *(long *)arg1; 1143 else 1144 tmplong = arg2; 1145#ifdef SCTL_MASK32 1146 if (req->flags & SCTL_MASK32) { 1147 tmpint = tmplong; 1148 error = SYSCTL_OUT(req, &tmpint, sizeof(int)); 1149 } else 1150#endif 1151 error = SYSCTL_OUT(req, &tmplong, sizeof(long)); 1152 1153 if (error || !req->newptr) 1154 return (error); 1155 1156 if (!arg1) 1157 error = EPERM; 1158#ifdef SCTL_MASK32 1159 else if (req->flags & SCTL_MASK32) { 1160 error = SYSCTL_IN(req, &tmpint, sizeof(int)); 1161 *(long *)arg1 = (long)tmpint; 1162 } 1163#endif 1164 else 1165 error = SYSCTL_IN(req, arg1, sizeof(long)); 1166 return (error); 1167} 1168 1169/* 1170 * Handle a 64 bit int, signed or unsigned. 1171 * Two cases: 1172 * a variable: point arg1 at it. 1173 * a constant: pass it in arg2. 1174 */ 1175int 1176sysctl_handle_64(SYSCTL_HANDLER_ARGS) 1177{ 1178 int error = 0; 1179 uint64_t tmpout; 1180 1181 /* 1182 * Attempt to get a coherent snapshot by making a copy of the data. 1183 */ 1184 if (arg1) 1185 tmpout = *(uint64_t *)arg1; 1186 else 1187 tmpout = arg2; 1188 error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t)); 1189 1190 if (error || !req->newptr) 1191 return (error); 1192 1193 if (!arg1) 1194 error = EPERM; 1195 else 1196 error = SYSCTL_IN(req, arg1, sizeof(uint64_t)); 1197 return (error); 1198} 1199 1200/* 1201 * Handle our generic '\0' terminated 'C' string. 1202 * Two cases: 1203 * a variable string: point arg1 at it, arg2 is max length. 1204 * a constant string: point arg1 at it, arg2 is zero. 1205 */ 1206 1207int 1208sysctl_handle_string(SYSCTL_HANDLER_ARGS) 1209{ 1210 size_t outlen; 1211 int error = 0; 1212 1213 /* check for zero-length buffer */ 1214 if (arg2 == 0) 1215 return (ENOMEM); 1216 1217 if (req->oldptr != NULL) { 1218 char *tmparg; 1219 1220 /* try to make a coherent snapshot of the string */ 1221 tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK); 1222 memcpy(tmparg, arg1, arg2); 1223 1224 outlen = strnlen(tmparg, arg2 - 1) + 1; 1225 error = SYSCTL_OUT(req, tmparg, outlen); 1226 1227 free(tmparg, M_SYSCTLTMP); 1228 } else { 1229 outlen = strnlen((char *)arg1, arg2 - 1) + 1; 1230 error = SYSCTL_OUT(req, NULL, outlen); 1231 } 1232 if (error || !req->newptr) 1233 return (error); 1234 1235 if ((req->newlen - req->newidx) >= arg2) { 1236 error = EINVAL; 1237 } else { 1238 arg2 = (req->newlen - req->newidx); 1239 error = SYSCTL_IN(req, arg1, arg2); 1240 ((char *)arg1)[arg2] = '\0'; 1241 } 1242 return (error); 1243} 1244 1245/* 1246 * Handle any kind of opaque data. 1247 * arg1 points to it, arg2 is the size. 1248 */ 1249 1250int 1251sysctl_handle_opaque(SYSCTL_HANDLER_ARGS) 1252{ 1253 int error, tries; 1254 u_int generation; 1255 struct sysctl_req req2; 1256 1257 /* 1258 * Attempt to get a coherent snapshot, by using the thread 1259 * pre-emption counter updated from within mi_switch() to 1260 * determine if we were pre-empted during a bcopy() or 1261 * copyout(). Make 3 attempts at doing this before giving up. 1262 * If we encounter an error, stop immediately. 1263 */ 1264 tries = 0; 1265 req2 = *req; 1266retry: 1267 generation = curthread->td_generation; 1268 error = SYSCTL_OUT(req, arg1, arg2); 1269 if (error) 1270 return (error); 1271 tries++; 1272 if (generation != curthread->td_generation && tries < 3) { 1273 *req = req2; 1274 goto retry; 1275 } 1276 1277 error = SYSCTL_IN(req, arg1, arg2); 1278 1279 return (error); 1280} 1281 1282/* 1283 * Transfer functions to/from kernel space. 1284 * XXX: rather untested at this point 1285 */ 1286static int 1287sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l) 1288{ 1289 size_t i = 0; 1290 1291 if (req->oldptr) { 1292 i = l; 1293 if (req->oldlen <= req->oldidx) 1294 i = 0; 1295 else 1296 if (i > req->oldlen - req->oldidx) 1297 i = req->oldlen - req->oldidx; 1298 if (i > 0) 1299 bcopy(p, (char *)req->oldptr + req->oldidx, i); 1300 } 1301 req->oldidx += l; 1302 if (req->oldptr && i != l) 1303 return (ENOMEM); 1304 return (0); 1305} 1306 1307static int 1308sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l) 1309{ 1310 if (!req->newptr) 1311 return (0); 1312 if (req->newlen - req->newidx < l) 1313 return (EINVAL); 1314 bcopy((char *)req->newptr + req->newidx, p, l); 1315 req->newidx += l; 1316 return (0); 1317} 1318 1319int 1320kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old, 1321 size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags) 1322{ 1323 int error = 0; 1324 struct sysctl_req req; 1325 1326 bzero(&req, sizeof req); 1327 1328 req.td = td; 1329 req.flags = flags; 1330 1331 if (oldlenp) { 1332 req.oldlen = *oldlenp; 1333 } 1334 req.validlen = req.oldlen; 1335 1336 if (old) { 1337 req.oldptr= old; 1338 } 1339 1340 if (new != NULL) { 1341 req.newlen = newlen; 1342 req.newptr = new; 1343 } 1344 1345 req.oldfunc = sysctl_old_kernel; 1346 req.newfunc = sysctl_new_kernel; 1347 req.lock = REQ_UNWIRED; 1348 1349 SYSCTL_XLOCK(); 1350 error = sysctl_root(0, name, namelen, &req); 1351 SYSCTL_XUNLOCK(); 1352 1353 if (req.lock == REQ_WIRED && req.validlen > 0) 1354 vsunlock(req.oldptr, req.validlen); 1355 1356 if (error && error != ENOMEM) 1357 return (error); 1358 1359 if (retval) { 1360 if (req.oldptr && req.oldidx > req.validlen) 1361 *retval = req.validlen; 1362 else 1363 *retval = req.oldidx; 1364 } 1365 return (error); 1366} 1367 1368int 1369kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp, 1370 void *new, size_t newlen, size_t *retval, int flags) 1371{ 1372 int oid[CTL_MAXNAME]; 1373 size_t oidlen, plen; 1374 int error; 1375 1376 oid[0] = 0; /* sysctl internal magic */ 1377 oid[1] = 3; /* name2oid */ 1378 oidlen = sizeof(oid); 1379 1380 error = kernel_sysctl(td, oid, 2, oid, &oidlen, 1381 (void *)name, strlen(name), &plen, flags); 1382 if (error) 1383 return (error); 1384 1385 error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp, 1386 new, newlen, retval, flags); 1387 return (error); 1388} 1389 1390/* 1391 * Transfer function to/from user space. 1392 */ 1393static int 1394sysctl_old_user(struct sysctl_req *req, const void *p, size_t l) 1395{ 1396 size_t i, len, origidx; 1397 int error; 1398 1399 origidx = req->oldidx; 1400 req->oldidx += l; 1401 if (req->oldptr == NULL) 1402 return (0); 1403 /* 1404 * If we have not wired the user supplied buffer and we are currently 1405 * holding locks, drop a witness warning, as it's possible that 1406 * write operations to the user page can sleep. 1407 */ 1408 if (req->lock != REQ_WIRED) 1409 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1410 "sysctl_old_user()"); 1411 i = l; 1412 len = req->validlen; 1413 if (len <= origidx) 1414 i = 0; 1415 else { 1416 if (i > len - origidx) 1417 i = len - origidx; 1418 if (req->lock == REQ_WIRED) { 1419 error = copyout_nofault(p, (char *)req->oldptr + 1420 origidx, i); 1421 } else 1422 error = copyout(p, (char *)req->oldptr + origidx, i); 1423 if (error != 0) 1424 return (error); 1425 } 1426 if (i < l) 1427 return (ENOMEM); 1428 return (0); 1429} 1430 1431static int 1432sysctl_new_user(struct sysctl_req *req, void *p, size_t l) 1433{ 1434 int error; 1435 1436 if (!req->newptr) 1437 return (0); 1438 if (req->newlen - req->newidx < l) 1439 return (EINVAL); 1440 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1441 "sysctl_new_user()"); 1442 error = copyin((char *)req->newptr + req->newidx, p, l); 1443 req->newidx += l; 1444 return (error); 1445} 1446 1447/* 1448 * Wire the user space destination buffer. If set to a value greater than 1449 * zero, the len parameter limits the maximum amount of wired memory. 1450 */ 1451int 1452sysctl_wire_old_buffer(struct sysctl_req *req, size_t len) 1453{ 1454 int ret; 1455 size_t wiredlen; 1456 1457 wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen; 1458 ret = 0; 1459 if (req->lock != REQ_WIRED && req->oldptr && 1460 req->oldfunc == sysctl_old_user) { 1461 if (wiredlen != 0) { 1462 ret = vslock(req->oldptr, wiredlen); 1463 if (ret != 0) { 1464 if (ret != ENOMEM) 1465 return (ret); 1466 wiredlen = 0; 1467 } 1468 } 1469 req->lock = REQ_WIRED; 1470 req->validlen = wiredlen; 1471 } 1472 return (0); 1473} 1474 1475int 1476sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, 1477 int *nindx, struct sysctl_req *req) 1478{ 1479 struct sysctl_oid_list *lsp; 1480 struct sysctl_oid *oid; 1481 int indx; 1482 1483 SYSCTL_ASSERT_XLOCKED(); 1484 lsp = &sysctl__children; 1485 indx = 0; 1486 while (indx < CTL_MAXNAME) { 1487 SLIST_FOREACH(oid, lsp, oid_link) { 1488 if (oid->oid_number == name[indx]) 1489 break; 1490 } 1491 if (oid == NULL) 1492 return (ENOENT); 1493 1494 indx++; 1495 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1496 if (oid->oid_handler != NULL || indx == namelen) { 1497 *noid = oid; 1498 if (nindx != NULL) 1499 *nindx = indx; 1500 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, 1501 ("%s found DYING node %p", __func__, oid)); 1502 return (0); 1503 } 1504 lsp = SYSCTL_CHILDREN(oid); 1505 } else if (indx == namelen) { 1506 *noid = oid; 1507 if (nindx != NULL) 1508 *nindx = indx; 1509 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, 1510 ("%s found DYING node %p", __func__, oid)); 1511 return (0); 1512 } else { 1513 return (ENOTDIR); 1514 } 1515 } 1516 return (ENOENT); 1517} 1518 1519/* 1520 * Traverse our tree, and find the right node, execute whatever it points 1521 * to, and return the resulting error code. 1522 */ 1523 1524static int 1525sysctl_root(SYSCTL_HANDLER_ARGS) 1526{ 1527 struct sysctl_oid *oid; 1528 int error, indx, lvl; 1529 1530 SYSCTL_ASSERT_XLOCKED(); 1531 1532 error = sysctl_find_oid(arg1, arg2, &oid, &indx, req); 1533 if (error) 1534 return (error); 1535 1536 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1537 /* 1538 * You can't call a sysctl when it's a node, but has 1539 * no handler. Inform the user that it's a node. 1540 * The indx may or may not be the same as namelen. 1541 */ 1542 if (oid->oid_handler == NULL) 1543 return (EISDIR); 1544 } 1545 1546 /* Is this sysctl writable? */ 1547 if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) 1548 return (EPERM); 1549 1550 KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL")); 1551 1552#ifdef CAPABILITY_MODE 1553 /* 1554 * If the process is in capability mode, then don't permit reading or 1555 * writing unless specifically granted for the node. 1556 */ 1557 if (IN_CAPABILITY_MODE(req->td)) { 1558 if (req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) 1559 return (EPERM); 1560 if (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR)) 1561 return (EPERM); 1562 } 1563#endif 1564 1565 /* Is this sysctl sensitive to securelevels? */ 1566 if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) { 1567 lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE; 1568 error = securelevel_gt(req->td->td_ucred, lvl); 1569 if (error) 1570 return (error); 1571 } 1572 1573 /* Is this sysctl writable by only privileged users? */ 1574 if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) { 1575 int priv; 1576 1577 if (oid->oid_kind & CTLFLAG_PRISON) 1578 priv = PRIV_SYSCTL_WRITEJAIL; 1579#ifdef VIMAGE 1580 else if ((oid->oid_kind & CTLFLAG_VNET) && 1581 prison_owns_vnet(req->td->td_ucred)) 1582 priv = PRIV_SYSCTL_WRITEJAIL; 1583#endif 1584 else 1585 priv = PRIV_SYSCTL_WRITE; 1586 error = priv_check(req->td, priv); 1587 if (error) 1588 return (error); 1589 } 1590 1591 if (!oid->oid_handler) 1592 return (EINVAL); 1593 1594 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1595 arg1 = (int *)arg1 + indx; 1596 arg2 -= indx; 1597 } else { 1598 arg1 = oid->oid_arg1; 1599 arg2 = oid->oid_arg2; 1600 } 1601#ifdef MAC 1602 error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2, 1603 req); 1604 if (error != 0) 1605 return (error); 1606#endif 1607#ifdef VIMAGE 1608 if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL) 1609 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 1610#endif 1611 error = sysctl_root_handler_locked(oid, arg1, arg2, req); 1612 1613 KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error); 1614 1615 return (error); 1616} 1617 1618#ifndef _SYS_SYSPROTO_H_ 1619struct sysctl_args { 1620 int *name; 1621 u_int namelen; 1622 void *old; 1623 size_t *oldlenp; 1624 void *new; 1625 size_t newlen; 1626}; 1627#endif 1628int 1629sys___sysctl(struct thread *td, struct sysctl_args *uap) 1630{ 1631 int error, i, name[CTL_MAXNAME]; 1632 size_t j; 1633 1634 if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) 1635 return (EINVAL); 1636 1637 error = copyin(uap->name, &name, uap->namelen * sizeof(int)); 1638 if (error) 1639 return (error); 1640 1641 error = userland_sysctl(td, name, uap->namelen, 1642 uap->old, uap->oldlenp, 0, 1643 uap->new, uap->newlen, &j, 0); 1644 if (error && error != ENOMEM) 1645 return (error); 1646 if (uap->oldlenp) { 1647 i = copyout(&j, uap->oldlenp, sizeof(j)); 1648 if (i) 1649 return (i); 1650 } 1651 return (error); 1652} 1653 1654/* 1655 * This is used from various compatibility syscalls too. That's why name 1656 * must be in kernel space. 1657 */ 1658int 1659userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, 1660 size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval, 1661 int flags) 1662{ 1663 int error = 0, memlocked; 1664 struct sysctl_req req; 1665 1666 bzero(&req, sizeof req); 1667 1668 req.td = td; 1669 req.flags = flags; 1670 1671 if (oldlenp) { 1672 if (inkernel) { 1673 req.oldlen = *oldlenp; 1674 } else { 1675 error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp)); 1676 if (error) 1677 return (error); 1678 } 1679 } 1680 req.validlen = req.oldlen; 1681 1682 if (old) { 1683 if (!useracc(old, req.oldlen, VM_PROT_WRITE)) 1684 return (EFAULT); 1685 req.oldptr= old; 1686 } 1687 1688 if (new != NULL) { 1689 if (!useracc(new, newlen, VM_PROT_READ)) 1690 return (EFAULT); 1691 req.newlen = newlen; 1692 req.newptr = new; 1693 } 1694 1695 req.oldfunc = sysctl_old_user; 1696 req.newfunc = sysctl_new_user; 1697 req.lock = REQ_UNWIRED; 1698 1699#ifdef KTRACE 1700 if (KTRPOINT(curthread, KTR_SYSCTL)) 1701 ktrsysctl(name, namelen); 1702#endif 1703 1704 if (req.oldlen > PAGE_SIZE) { 1705 memlocked = 1; 1706 sx_xlock(&sysctlmemlock); 1707 } else 1708 memlocked = 0; 1709 CURVNET_SET(TD_TO_VNET(td)); 1710 1711 for (;;) { 1712 req.oldidx = 0; 1713 req.newidx = 0; 1714 SYSCTL_XLOCK(); 1715 error = sysctl_root(0, name, namelen, &req); 1716 SYSCTL_XUNLOCK(); 1717 if (error != EAGAIN) 1718 break; 1719 kern_yield(PRI_USER); 1720 } 1721 1722 CURVNET_RESTORE(); 1723 1724 if (req.lock == REQ_WIRED && req.validlen > 0) 1725 vsunlock(req.oldptr, req.validlen); 1726 if (memlocked) 1727 sx_xunlock(&sysctlmemlock); 1728 1729 if (error && error != ENOMEM) 1730 return (error); 1731 1732 if (retval) { 1733 if (req.oldptr && req.oldidx > req.validlen) 1734 *retval = req.validlen; 1735 else 1736 *retval = req.oldidx; 1737 } 1738 return (error); 1739} 1740 1741/* 1742 * Drain into a sysctl struct. The user buffer should be wired if a page 1743 * fault would cause issue. 1744 */ 1745static int 1746sbuf_sysctl_drain(void *arg, const char *data, int len) 1747{ 1748 struct sysctl_req *req = arg; 1749 int error; 1750 1751 error = SYSCTL_OUT(req, data, len); 1752 KASSERT(error >= 0, ("Got unexpected negative value %d", error)); 1753 return (error == 0 ? len : -error); 1754} 1755 1756struct sbuf * 1757sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length, 1758 struct sysctl_req *req) 1759{ 1760 1761 s = sbuf_new(s, buf, length, SBUF_FIXEDLEN); 1762 sbuf_set_drain(s, sbuf_sysctl_drain, req); 1763 return (s); 1764} 1765