kern_sysctl.c revision 267993
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Karels at Berkeley Software Design, Inc. 7 * 8 * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD 9 * project, to make these variables more userfriendly. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 36 */ 37 38#include <sys/cdefs.h> 39__FBSDID("$FreeBSD: head/sys/kern/kern_sysctl.c 267993 2014-06-28 03:59:04Z hselasky $"); 40 41#include "opt_capsicum.h" 42#include "opt_compat.h" 43#include "opt_ktrace.h" 44 45#include <sys/param.h> 46#include <sys/fail.h> 47#include <sys/systm.h> 48#include <sys/capsicum.h> 49#include <sys/kernel.h> 50#include <sys/sysctl.h> 51#include <sys/malloc.h> 52#include <sys/priv.h> 53#include <sys/proc.h> 54#include <sys/jail.h> 55#include <sys/lock.h> 56#include <sys/mutex.h> 57#include <sys/sbuf.h> 58#include <sys/sx.h> 59#include <sys/sysproto.h> 60#include <sys/uio.h> 61#ifdef KTRACE 62#include <sys/ktrace.h> 63#endif 64 65#include <net/vnet.h> 66 67#include <security/mac/mac_framework.h> 68 69#include <vm/vm.h> 70#include <vm/vm_extern.h> 71 72static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic"); 73static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids"); 74static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer"); 75 76/* 77 * The sysctllock protects the MIB tree. It also protects sysctl 78 * contexts used with dynamic sysctls. The sysctl_register_oid() and 79 * sysctl_unregister_oid() routines require the sysctllock to already 80 * be held, so the sysctl_lock() and sysctl_unlock() routines are 81 * provided for the few places in the kernel which need to use that 82 * API rather than using the dynamic API. Use of the dynamic API is 83 * strongly encouraged for most code. 84 * 85 * The sysctlmemlock is used to limit the amount of user memory wired for 86 * sysctl requests. This is implemented by serializing any userland 87 * sysctl requests larger than a single page via an exclusive lock. 88 */ 89static struct sx sysctllock; 90static struct sx sysctlmemlock; 91 92#define SYSCTL_XLOCK() sx_xlock(&sysctllock) 93#define SYSCTL_XUNLOCK() sx_xunlock(&sysctllock) 94#define SYSCTL_ASSERT_XLOCKED() sx_assert(&sysctllock, SA_XLOCKED) 95#define SYSCTL_INIT() sx_init(&sysctllock, "sysctl lock") 96#define SYSCTL_SLEEP(ch, wmesg, timo) \ 97 sx_sleep(ch, &sysctllock, 0, wmesg, timo) 98 99static int sysctl_root(SYSCTL_HANDLER_ARGS); 100 101/* Root list */ 102struct sysctl_oid_list sysctl__children = SLIST_HEAD_INITIALIZER(&sysctl__children); 103 104static int sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, 105 int recurse); 106static int sysctl_old_kernel(struct sysctl_req *, const void *, size_t); 107static int sysctl_new_kernel(struct sysctl_req *, void *, size_t); 108 109static struct sysctl_oid * 110sysctl_find_oidname(const char *name, struct sysctl_oid_list *list) 111{ 112 struct sysctl_oid *oidp; 113 114 SYSCTL_ASSERT_XLOCKED(); 115 SLIST_FOREACH(oidp, list, oid_link) { 116 if (strcmp(oidp->oid_name, name) == 0) { 117 return (oidp); 118 } 119 } 120 return (NULL); 121} 122 123/* 124 * Initialization of the MIB tree. 125 * 126 * Order by number in each list. 127 */ 128void 129sysctl_lock(void) 130{ 131 132 SYSCTL_XLOCK(); 133} 134 135void 136sysctl_unlock(void) 137{ 138 139 SYSCTL_XUNLOCK(); 140} 141 142static int 143sysctl_root_handler_locked(struct sysctl_oid *oid, void *arg1, intptr_t arg2, 144 struct sysctl_req *req) 145{ 146 int error; 147 148 oid->oid_running++; 149 SYSCTL_XUNLOCK(); 150 151 if (!(oid->oid_kind & CTLFLAG_MPSAFE)) 152 mtx_lock(&Giant); 153 error = oid->oid_handler(oid, arg1, arg2, req); 154 if (!(oid->oid_kind & CTLFLAG_MPSAFE)) 155 mtx_unlock(&Giant); 156 157 SYSCTL_XLOCK(); 158 oid->oid_running--; 159 if (oid->oid_running == 0 && (oid->oid_kind & CTLFLAG_DYING) != 0) 160 wakeup(&oid->oid_running); 161 162 return (error); 163} 164 165static void 166sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp) 167{ 168 struct sysctl_req req; 169 struct sysctl_oid *curr; 170 char *penv; 171 char path[64]; 172 ssize_t rem = sizeof(path); 173 ssize_t len; 174 int val_int; 175 long val_long; 176 int64_t val_64; 177 int error; 178 179 path[--rem] = 0; 180 181 for (curr = oidp; curr != NULL; curr = SYSCTL_PARENT(curr)) { 182 len = strlen(curr->oid_name); 183 rem -= len; 184 if (curr != oidp) 185 rem -= 1; 186 if (rem < 0) { 187 printf("OID path exceeds %d bytes\n", (int)sizeof(path)); 188 return; 189 } 190 memcpy(path + rem, curr->oid_name, len); 191 if (curr != oidp) 192 path[rem + len] = '.'; 193 } 194 195 penv = getenv(path + rem); 196 if (penv == NULL) 197 return; 198 199 memset(&req, 0, sizeof(req)); 200 201 req.td = curthread; 202 req.oldfunc = sysctl_old_kernel; 203 req.newfunc = sysctl_new_kernel; 204 req.lock = REQ_UNWIRED; 205 206 switch (oidp->oid_kind & CTLTYPE) { 207 case CTLTYPE_INT: 208 val_int = strtoq(penv, NULL, 0); 209 req.newlen = sizeof(val_int); 210 req.newptr = &val_int; 211 break; 212 case CTLTYPE_UINT: 213 val_int = strtouq(penv, NULL, 0); 214 req.newlen = sizeof(val_int); 215 req.newptr = &val_int; 216 break; 217 case CTLTYPE_LONG: 218 val_long = strtoq(penv, NULL, 0); 219 req.newlen = sizeof(val_long); 220 req.newptr = &val_long; 221 break; 222 case CTLTYPE_ULONG: 223 val_long = strtouq(penv, NULL, 0); 224 req.newlen = sizeof(val_long); 225 req.newptr = &val_long; 226 break; 227 case CTLTYPE_S64: 228 val_64 = strtoq(penv, NULL, 0); 229 req.newlen = sizeof(val_64); 230 req.newptr = &val_64; 231 break; 232 case CTLTYPE_U64: 233 val_64 = strtouq(penv, NULL, 0); 234 req.newlen = sizeof(val_64); 235 req.newptr = &val_64; 236 break; 237 case CTLTYPE_STRING: 238 req.newlen = strlen(penv); 239 req.newptr = penv; 240 break; 241 default: 242 freeenv(penv); 243 return; 244 } 245 error = sysctl_root_handler_locked(oidp, oidp->oid_arg1, 246 oidp->oid_arg2, &req); 247 if (error != 0) { 248 printf("Setting sysctl '%s' to '%s' failed: %d\n", 249 path, penv, error); 250 } 251 freeenv(penv); 252} 253 254void 255sysctl_register_oid(struct sysctl_oid *oidp) 256{ 257 struct sysctl_oid_list *parent = oidp->oid_parent; 258 struct sysctl_oid *p; 259 struct sysctl_oid *q; 260 261 /* 262 * First check if another oid with the same name already 263 * exists in the parent's list. 264 */ 265 SYSCTL_ASSERT_XLOCKED(); 266 p = sysctl_find_oidname(oidp->oid_name, parent); 267 if (p != NULL) { 268 if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 269 p->oid_refcnt++; 270 return; 271 } else { 272 printf("can't re-use a leaf (%s)!\n", p->oid_name); 273 return; 274 } 275 } 276 /* 277 * If this oid has a number OID_AUTO, give it a number which 278 * is greater than any current oid. 279 * NOTE: DO NOT change the starting value here, change it in 280 * <sys/sysctl.h>, and make sure it is at least 256 to 281 * accomodate e.g. net.inet.raw as a static sysctl node. 282 */ 283 if (oidp->oid_number == OID_AUTO) { 284 static int newoid = CTL_AUTO_START; 285 286 oidp->oid_number = newoid++; 287 if (newoid == 0x7fffffff) 288 panic("out of oids"); 289 } 290#if 0 291 else if (oidp->oid_number >= CTL_AUTO_START) { 292 /* do not panic; this happens when unregistering sysctl sets */ 293 printf("static sysctl oid too high: %d", oidp->oid_number); 294 } 295#endif 296 297 /* 298 * Insert the oid into the parent's list in order. 299 */ 300 q = NULL; 301 SLIST_FOREACH(p, parent, oid_link) { 302 if (oidp->oid_number < p->oid_number) 303 break; 304 q = p; 305 } 306 if (q) 307 SLIST_INSERT_AFTER(q, oidp, oid_link); 308 else 309 SLIST_INSERT_HEAD(parent, oidp, oid_link); 310 311 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE && 312#ifdef VIMAGE 313 (oidp->oid_kind & CTLFLAG_VNET) == 0 && 314#endif 315 (oidp->oid_kind & CTLFLAG_TUN) != 0 && 316 (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) { 317 sysctl_load_tunable_by_oid_locked(oidp); 318 } 319} 320 321void 322sysctl_unregister_oid(struct sysctl_oid *oidp) 323{ 324 struct sysctl_oid *p; 325 int error; 326 327 SYSCTL_ASSERT_XLOCKED(); 328 error = ENOENT; 329 if (oidp->oid_number == OID_AUTO) { 330 error = EINVAL; 331 } else { 332 SLIST_FOREACH(p, oidp->oid_parent, oid_link) { 333 if (p == oidp) { 334 SLIST_REMOVE(oidp->oid_parent, oidp, 335 sysctl_oid, oid_link); 336 error = 0; 337 break; 338 } 339 } 340 } 341 342 /* 343 * This can happen when a module fails to register and is 344 * being unloaded afterwards. It should not be a panic() 345 * for normal use. 346 */ 347 if (error) 348 printf("%s: failed to unregister sysctl\n", __func__); 349} 350 351/* Initialize a new context to keep track of dynamically added sysctls. */ 352int 353sysctl_ctx_init(struct sysctl_ctx_list *c) 354{ 355 356 if (c == NULL) { 357 return (EINVAL); 358 } 359 360 /* 361 * No locking here, the caller is responsible for not adding 362 * new nodes to a context until after this function has 363 * returned. 364 */ 365 TAILQ_INIT(c); 366 return (0); 367} 368 369/* Free the context, and destroy all dynamic oids registered in this context */ 370int 371sysctl_ctx_free(struct sysctl_ctx_list *clist) 372{ 373 struct sysctl_ctx_entry *e, *e1; 374 int error; 375 376 error = 0; 377 /* 378 * First perform a "dry run" to check if it's ok to remove oids. 379 * XXX FIXME 380 * XXX This algorithm is a hack. But I don't know any 381 * XXX better solution for now... 382 */ 383 SYSCTL_XLOCK(); 384 TAILQ_FOREACH(e, clist, link) { 385 error = sysctl_remove_oid_locked(e->entry, 0, 0); 386 if (error) 387 break; 388 } 389 /* 390 * Restore deregistered entries, either from the end, 391 * or from the place where error occured. 392 * e contains the entry that was not unregistered 393 */ 394 if (error) 395 e1 = TAILQ_PREV(e, sysctl_ctx_list, link); 396 else 397 e1 = TAILQ_LAST(clist, sysctl_ctx_list); 398 while (e1 != NULL) { 399 sysctl_register_oid(e1->entry); 400 e1 = TAILQ_PREV(e1, sysctl_ctx_list, link); 401 } 402 if (error) { 403 SYSCTL_XUNLOCK(); 404 return(EBUSY); 405 } 406 /* Now really delete the entries */ 407 e = TAILQ_FIRST(clist); 408 while (e != NULL) { 409 e1 = TAILQ_NEXT(e, link); 410 error = sysctl_remove_oid_locked(e->entry, 1, 0); 411 if (error) 412 panic("sysctl_remove_oid: corrupt tree, entry: %s", 413 e->entry->oid_name); 414 free(e, M_SYSCTLOID); 415 e = e1; 416 } 417 SYSCTL_XUNLOCK(); 418 return (error); 419} 420 421/* Add an entry to the context */ 422struct sysctl_ctx_entry * 423sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 424{ 425 struct sysctl_ctx_entry *e; 426 427 SYSCTL_ASSERT_XLOCKED(); 428 if (clist == NULL || oidp == NULL) 429 return(NULL); 430 e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK); 431 e->entry = oidp; 432 TAILQ_INSERT_HEAD(clist, e, link); 433 return (e); 434} 435 436/* Find an entry in the context */ 437struct sysctl_ctx_entry * 438sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 439{ 440 struct sysctl_ctx_entry *e; 441 442 SYSCTL_ASSERT_XLOCKED(); 443 if (clist == NULL || oidp == NULL) 444 return(NULL); 445 TAILQ_FOREACH(e, clist, link) { 446 if(e->entry == oidp) 447 return(e); 448 } 449 return (e); 450} 451 452/* 453 * Delete an entry from the context. 454 * NOTE: this function doesn't free oidp! You have to remove it 455 * with sysctl_remove_oid(). 456 */ 457int 458sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 459{ 460 struct sysctl_ctx_entry *e; 461 462 if (clist == NULL || oidp == NULL) 463 return (EINVAL); 464 SYSCTL_XLOCK(); 465 e = sysctl_ctx_entry_find(clist, oidp); 466 if (e != NULL) { 467 TAILQ_REMOVE(clist, e, link); 468 SYSCTL_XUNLOCK(); 469 free(e, M_SYSCTLOID); 470 return (0); 471 } else { 472 SYSCTL_XUNLOCK(); 473 return (ENOENT); 474 } 475} 476 477/* 478 * Remove dynamically created sysctl trees. 479 * oidp - top of the tree to be removed 480 * del - if 0 - just deregister, otherwise free up entries as well 481 * recurse - if != 0 traverse the subtree to be deleted 482 */ 483int 484sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse) 485{ 486 int error; 487 488 SYSCTL_XLOCK(); 489 error = sysctl_remove_oid_locked(oidp, del, recurse); 490 SYSCTL_XUNLOCK(); 491 return (error); 492} 493 494int 495sysctl_remove_name(struct sysctl_oid *parent, const char *name, 496 int del, int recurse) 497{ 498 struct sysctl_oid *p, *tmp; 499 int error; 500 501 error = ENOENT; 502 SYSCTL_XLOCK(); 503 SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) { 504 if (strcmp(p->oid_name, name) == 0) { 505 error = sysctl_remove_oid_locked(p, del, recurse); 506 break; 507 } 508 } 509 SYSCTL_XUNLOCK(); 510 511 return (error); 512} 513 514 515static int 516sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) 517{ 518 struct sysctl_oid *p, *tmp; 519 int error; 520 521 SYSCTL_ASSERT_XLOCKED(); 522 if (oidp == NULL) 523 return(EINVAL); 524 if ((oidp->oid_kind & CTLFLAG_DYN) == 0) { 525 printf("can't remove non-dynamic nodes!\n"); 526 return (EINVAL); 527 } 528 /* 529 * WARNING: normal method to do this should be through 530 * sysctl_ctx_free(). Use recursing as the last resort 531 * method to purge your sysctl tree of leftovers... 532 * However, if some other code still references these nodes, 533 * it will panic. 534 */ 535 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 536 if (oidp->oid_refcnt == 1) { 537 SLIST_FOREACH_SAFE(p, 538 SYSCTL_CHILDREN(oidp), oid_link, tmp) { 539 if (!recurse) { 540 printf("Warning: failed attempt to " 541 "remove oid %s with child %s\n", 542 oidp->oid_name, p->oid_name); 543 return (ENOTEMPTY); 544 } 545 error = sysctl_remove_oid_locked(p, del, 546 recurse); 547 if (error) 548 return (error); 549 } 550 } 551 } 552 if (oidp->oid_refcnt > 1 ) { 553 oidp->oid_refcnt--; 554 } else { 555 if (oidp->oid_refcnt == 0) { 556 printf("Warning: bad oid_refcnt=%u (%s)!\n", 557 oidp->oid_refcnt, oidp->oid_name); 558 return (EINVAL); 559 } 560 sysctl_unregister_oid(oidp); 561 if (del) { 562 /* 563 * Wait for all threads running the handler to drain. 564 * This preserves the previous behavior when the 565 * sysctl lock was held across a handler invocation, 566 * and is necessary for module unload correctness. 567 */ 568 while (oidp->oid_running > 0) { 569 oidp->oid_kind |= CTLFLAG_DYING; 570 SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0); 571 } 572 if (oidp->oid_descr) 573 free(__DECONST(char *, oidp->oid_descr), 574 M_SYSCTLOID); 575 free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID); 576 free(oidp, M_SYSCTLOID); 577 } 578 } 579 return (0); 580} 581/* 582 * Create new sysctls at run time. 583 * clist may point to a valid context initialized with sysctl_ctx_init(). 584 */ 585struct sysctl_oid * 586sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, 587 int number, const char *name, int kind, void *arg1, intptr_t arg2, 588 int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr) 589{ 590 struct sysctl_oid *oidp; 591 592 /* You have to hook up somewhere.. */ 593 if (parent == NULL) 594 return(NULL); 595 /* Check if the node already exists, otherwise create it */ 596 SYSCTL_XLOCK(); 597 oidp = sysctl_find_oidname(name, parent); 598 if (oidp != NULL) { 599 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 600 oidp->oid_refcnt++; 601 /* Update the context */ 602 if (clist != NULL) 603 sysctl_ctx_entry_add(clist, oidp); 604 SYSCTL_XUNLOCK(); 605 return (oidp); 606 } else { 607 SYSCTL_XUNLOCK(); 608 printf("can't re-use a leaf (%s)!\n", name); 609 return (NULL); 610 } 611 } 612 oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO); 613 oidp->oid_parent = parent; 614 SLIST_INIT(&oidp->oid_children); 615 oidp->oid_number = number; 616 oidp->oid_refcnt = 1; 617 oidp->oid_name = strdup(name, M_SYSCTLOID); 618 oidp->oid_handler = handler; 619 oidp->oid_kind = CTLFLAG_DYN | kind; 620 oidp->oid_arg1 = arg1; 621 oidp->oid_arg2 = arg2; 622 oidp->oid_fmt = fmt; 623 if (descr != NULL) 624 oidp->oid_descr = strdup(descr, M_SYSCTLOID); 625 /* Update the context, if used */ 626 if (clist != NULL) 627 sysctl_ctx_entry_add(clist, oidp); 628 /* Register this oid */ 629 sysctl_register_oid(oidp); 630 SYSCTL_XUNLOCK(); 631 return (oidp); 632} 633 634/* 635 * Rename an existing oid. 636 */ 637void 638sysctl_rename_oid(struct sysctl_oid *oidp, const char *name) 639{ 640 char *newname; 641 char *oldname; 642 643 newname = strdup(name, M_SYSCTLOID); 644 SYSCTL_XLOCK(); 645 oldname = __DECONST(char *, oidp->oid_name); 646 oidp->oid_name = newname; 647 SYSCTL_XUNLOCK(); 648 free(oldname, M_SYSCTLOID); 649} 650 651/* 652 * Reparent an existing oid. 653 */ 654int 655sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent) 656{ 657 struct sysctl_oid *oidp; 658 659 SYSCTL_XLOCK(); 660 if (oid->oid_parent == parent) { 661 SYSCTL_XUNLOCK(); 662 return (0); 663 } 664 oidp = sysctl_find_oidname(oid->oid_name, parent); 665 if (oidp != NULL) { 666 SYSCTL_XUNLOCK(); 667 return (EEXIST); 668 } 669 sysctl_unregister_oid(oid); 670 oid->oid_parent = parent; 671 oid->oid_number = OID_AUTO; 672 sysctl_register_oid(oid); 673 SYSCTL_XUNLOCK(); 674 return (0); 675} 676 677/* 678 * Register the kernel's oids on startup. 679 */ 680SET_DECLARE(sysctl_set, struct sysctl_oid); 681 682static void 683sysctl_register_all(void *arg) 684{ 685 struct sysctl_oid **oidp; 686 687 sx_init(&sysctlmemlock, "sysctl mem"); 688 SYSCTL_INIT(); 689 SYSCTL_XLOCK(); 690 SET_FOREACH(oidp, sysctl_set) 691 sysctl_register_oid(*oidp); 692 SYSCTL_XUNLOCK(); 693} 694SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, 0); 695 696/* 697 * "Staff-functions" 698 * 699 * These functions implement a presently undocumented interface 700 * used by the sysctl program to walk the tree, and get the type 701 * so it can print the value. 702 * This interface is under work and consideration, and should probably 703 * be killed with a big axe by the first person who can find the time. 704 * (be aware though, that the proper interface isn't as obvious as it 705 * may seem, there are various conflicting requirements. 706 * 707 * {0,0} printf the entire MIB-tree. 708 * {0,1,...} return the name of the "..." OID. 709 * {0,2,...} return the next OID. 710 * {0,3} return the OID of the name in "new" 711 * {0,4,...} return the kind & format info for the "..." OID. 712 * {0,5,...} return the description the "..." OID. 713 */ 714 715#ifdef SYSCTL_DEBUG 716static void 717sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i) 718{ 719 int k; 720 struct sysctl_oid *oidp; 721 722 SYSCTL_ASSERT_XLOCKED(); 723 SLIST_FOREACH(oidp, l, oid_link) { 724 725 for (k=0; k<i; k++) 726 printf(" "); 727 728 printf("%d %s ", oidp->oid_number, oidp->oid_name); 729 730 printf("%c%c", 731 oidp->oid_kind & CTLFLAG_RD ? 'R':' ', 732 oidp->oid_kind & CTLFLAG_WR ? 'W':' '); 733 734 if (oidp->oid_handler) 735 printf(" *Handler"); 736 737 switch (oidp->oid_kind & CTLTYPE) { 738 case CTLTYPE_NODE: 739 printf(" Node\n"); 740 if (!oidp->oid_handler) { 741 sysctl_sysctl_debug_dump_node( 742 SYSCTL_CHILDREN(oidp), i + 2); 743 } 744 break; 745 case CTLTYPE_INT: printf(" Int\n"); break; 746 case CTLTYPE_UINT: printf(" u_int\n"); break; 747 case CTLTYPE_LONG: printf(" Long\n"); break; 748 case CTLTYPE_ULONG: printf(" u_long\n"); break; 749 case CTLTYPE_STRING: printf(" String\n"); break; 750 case CTLTYPE_U64: printf(" uint64_t\n"); break; 751 case CTLTYPE_S64: printf(" int64_t\n"); break; 752 case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break; 753 default: printf("\n"); 754 } 755 756 } 757} 758 759static int 760sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS) 761{ 762 int error; 763 764 error = priv_check(req->td, PRIV_SYSCTL_DEBUG); 765 if (error) 766 return (error); 767 SYSCTL_XLOCK(); 768 sysctl_sysctl_debug_dump_node(&sysctl__children, 0); 769 SYSCTL_XUNLOCK(); 770 return (ENOENT); 771} 772 773SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD, 774 0, 0, sysctl_sysctl_debug, "-", ""); 775#endif 776 777static int 778sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) 779{ 780 int *name = (int *) arg1; 781 u_int namelen = arg2; 782 int error = 0; 783 struct sysctl_oid *oid; 784 struct sysctl_oid_list *lsp = &sysctl__children, *lsp2; 785 char buf[10]; 786 787 SYSCTL_XLOCK(); 788 while (namelen) { 789 if (!lsp) { 790 snprintf(buf,sizeof(buf),"%d",*name); 791 if (req->oldidx) 792 error = SYSCTL_OUT(req, ".", 1); 793 if (!error) 794 error = SYSCTL_OUT(req, buf, strlen(buf)); 795 if (error) 796 goto out; 797 namelen--; 798 name++; 799 continue; 800 } 801 lsp2 = 0; 802 SLIST_FOREACH(oid, lsp, oid_link) { 803 if (oid->oid_number != *name) 804 continue; 805 806 if (req->oldidx) 807 error = SYSCTL_OUT(req, ".", 1); 808 if (!error) 809 error = SYSCTL_OUT(req, oid->oid_name, 810 strlen(oid->oid_name)); 811 if (error) 812 goto out; 813 814 namelen--; 815 name++; 816 817 if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) 818 break; 819 820 if (oid->oid_handler) 821 break; 822 823 lsp2 = SYSCTL_CHILDREN(oid); 824 break; 825 } 826 lsp = lsp2; 827 } 828 error = SYSCTL_OUT(req, "", 1); 829 out: 830 SYSCTL_XUNLOCK(); 831 return (error); 832} 833 834/* 835 * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in 836 * capability mode. 837 */ 838static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_CAPRD, 839 sysctl_sysctl_name, ""); 840 841static int 842sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, 843 int *next, int *len, int level, struct sysctl_oid **oidpp) 844{ 845 struct sysctl_oid *oidp; 846 847 SYSCTL_ASSERT_XLOCKED(); 848 *len = level; 849 SLIST_FOREACH(oidp, lsp, oid_link) { 850 *next = oidp->oid_number; 851 *oidpp = oidp; 852 853 if (oidp->oid_kind & CTLFLAG_SKIP) 854 continue; 855 856 if (!namelen) { 857 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 858 return (0); 859 if (oidp->oid_handler) 860 /* We really should call the handler here...*/ 861 return (0); 862 lsp = SYSCTL_CHILDREN(oidp); 863 if (!sysctl_sysctl_next_ls(lsp, 0, 0, next+1, 864 len, level+1, oidpp)) 865 return (0); 866 goto emptynode; 867 } 868 869 if (oidp->oid_number < *name) 870 continue; 871 872 if (oidp->oid_number > *name) { 873 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 874 return (0); 875 if (oidp->oid_handler) 876 return (0); 877 lsp = SYSCTL_CHILDREN(oidp); 878 if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, 879 next+1, len, level+1, oidpp)) 880 return (0); 881 goto next; 882 } 883 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 884 continue; 885 886 if (oidp->oid_handler) 887 continue; 888 889 lsp = SYSCTL_CHILDREN(oidp); 890 if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1, 891 len, level+1, oidpp)) 892 return (0); 893 next: 894 namelen = 1; 895 emptynode: 896 *len = level; 897 } 898 return (1); 899} 900 901static int 902sysctl_sysctl_next(SYSCTL_HANDLER_ARGS) 903{ 904 int *name = (int *) arg1; 905 u_int namelen = arg2; 906 int i, j, error; 907 struct sysctl_oid *oid; 908 struct sysctl_oid_list *lsp = &sysctl__children; 909 int newoid[CTL_MAXNAME]; 910 911 SYSCTL_XLOCK(); 912 i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid); 913 SYSCTL_XUNLOCK(); 914 if (i) 915 return (ENOENT); 916 error = SYSCTL_OUT(req, newoid, j * sizeof (int)); 917 return (error); 918} 919 920/* 921 * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in 922 * capability mode. 923 */ 924static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_CAPRD, 925 sysctl_sysctl_next, ""); 926 927static int 928name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) 929{ 930 struct sysctl_oid *oidp; 931 struct sysctl_oid_list *lsp = &sysctl__children; 932 char *p; 933 934 SYSCTL_ASSERT_XLOCKED(); 935 936 for (*len = 0; *len < CTL_MAXNAME;) { 937 p = strsep(&name, "."); 938 939 oidp = SLIST_FIRST(lsp); 940 for (;; oidp = SLIST_NEXT(oidp, oid_link)) { 941 if (oidp == NULL) 942 return (ENOENT); 943 if (strcmp(p, oidp->oid_name) == 0) 944 break; 945 } 946 *oid++ = oidp->oid_number; 947 (*len)++; 948 949 if (name == NULL || *name == '\0') { 950 if (oidpp) 951 *oidpp = oidp; 952 return (0); 953 } 954 955 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 956 break; 957 958 if (oidp->oid_handler) 959 break; 960 961 lsp = SYSCTL_CHILDREN(oidp); 962 } 963 return (ENOENT); 964} 965 966static int 967sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) 968{ 969 char *p; 970 int error, oid[CTL_MAXNAME], len = 0; 971 struct sysctl_oid *op = 0; 972 973 if (!req->newlen) 974 return (ENOENT); 975 if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */ 976 return (ENAMETOOLONG); 977 978 p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK); 979 980 error = SYSCTL_IN(req, p, req->newlen); 981 if (error) { 982 free(p, M_SYSCTL); 983 return (error); 984 } 985 986 p [req->newlen] = '\0'; 987 988 SYSCTL_XLOCK(); 989 error = name2oid(p, oid, &len, &op); 990 SYSCTL_XUNLOCK(); 991 992 free(p, M_SYSCTL); 993 994 if (error) 995 return (error); 996 997 error = SYSCTL_OUT(req, oid, len * sizeof *oid); 998 return (error); 999} 1000 1001/* 1002 * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in 1003 * capability mode. 1004 */ 1005SYSCTL_PROC(_sysctl, 3, name2oid, 1006 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE 1007 | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", ""); 1008 1009static int 1010sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) 1011{ 1012 struct sysctl_oid *oid; 1013 int error; 1014 1015 SYSCTL_XLOCK(); 1016 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); 1017 if (error) 1018 goto out; 1019 1020 if (oid->oid_fmt == NULL) { 1021 error = ENOENT; 1022 goto out; 1023 } 1024 error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind)); 1025 if (error) 1026 goto out; 1027 error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1); 1028 out: 1029 SYSCTL_XUNLOCK(); 1030 return (error); 1031} 1032 1033 1034static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, 1035 sysctl_sysctl_oidfmt, ""); 1036 1037static int 1038sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS) 1039{ 1040 struct sysctl_oid *oid; 1041 int error; 1042 1043 SYSCTL_XLOCK(); 1044 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); 1045 if (error) 1046 goto out; 1047 1048 if (oid->oid_descr == NULL) { 1049 error = ENOENT; 1050 goto out; 1051 } 1052 error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1); 1053 out: 1054 SYSCTL_XUNLOCK(); 1055 return (error); 1056} 1057 1058static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_CAPRD, 1059 sysctl_sysctl_oiddescr, ""); 1060 1061/* 1062 * Default "handler" functions. 1063 */ 1064 1065/* 1066 * Handle an int, signed or unsigned. 1067 * Two cases: 1068 * a variable: point arg1 at it. 1069 * a constant: pass it in arg2. 1070 */ 1071 1072int 1073sysctl_handle_int(SYSCTL_HANDLER_ARGS) 1074{ 1075 int tmpout, error = 0; 1076 1077 /* 1078 * Attempt to get a coherent snapshot by making a copy of the data. 1079 */ 1080 if (arg1) 1081 tmpout = *(int *)arg1; 1082 else 1083 tmpout = arg2; 1084 error = SYSCTL_OUT(req, &tmpout, sizeof(int)); 1085 1086 if (error || !req->newptr) 1087 return (error); 1088 1089 if (!arg1) 1090 error = EPERM; 1091 else 1092 error = SYSCTL_IN(req, arg1, sizeof(int)); 1093 return (error); 1094} 1095 1096/* 1097 * Based on on sysctl_handle_int() convert milliseconds into ticks. 1098 * Note: this is used by TCP. 1099 */ 1100 1101int 1102sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) 1103{ 1104 int error, s, tt; 1105 1106 tt = *(int *)arg1; 1107 s = (int)((int64_t)tt * 1000 / hz); 1108 1109 error = sysctl_handle_int(oidp, &s, 0, req); 1110 if (error || !req->newptr) 1111 return (error); 1112 1113 tt = (int)((int64_t)s * hz / 1000); 1114 if (tt < 1) 1115 return (EINVAL); 1116 1117 *(int *)arg1 = tt; 1118 return (0); 1119} 1120 1121 1122/* 1123 * Handle a long, signed or unsigned. 1124 * Two cases: 1125 * a variable: point arg1 at it. 1126 * a constant: pass it in arg2. 1127 */ 1128 1129int 1130sysctl_handle_long(SYSCTL_HANDLER_ARGS) 1131{ 1132 int error = 0; 1133 long tmplong; 1134#ifdef SCTL_MASK32 1135 int tmpint; 1136#endif 1137 1138 /* 1139 * Attempt to get a coherent snapshot by making a copy of the data. 1140 */ 1141 if (arg1) 1142 tmplong = *(long *)arg1; 1143 else 1144 tmplong = arg2; 1145#ifdef SCTL_MASK32 1146 if (req->flags & SCTL_MASK32) { 1147 tmpint = tmplong; 1148 error = SYSCTL_OUT(req, &tmpint, sizeof(int)); 1149 } else 1150#endif 1151 error = SYSCTL_OUT(req, &tmplong, sizeof(long)); 1152 1153 if (error || !req->newptr) 1154 return (error); 1155 1156 if (!arg1) 1157 error = EPERM; 1158#ifdef SCTL_MASK32 1159 else if (req->flags & SCTL_MASK32) { 1160 error = SYSCTL_IN(req, &tmpint, sizeof(int)); 1161 *(long *)arg1 = (long)tmpint; 1162 } 1163#endif 1164 else 1165 error = SYSCTL_IN(req, arg1, sizeof(long)); 1166 return (error); 1167} 1168 1169/* 1170 * Handle a 64 bit int, signed or unsigned. 1171 * Two cases: 1172 * a variable: point arg1 at it. 1173 * a constant: pass it in arg2. 1174 */ 1175int 1176sysctl_handle_64(SYSCTL_HANDLER_ARGS) 1177{ 1178 int error = 0; 1179 uint64_t tmpout; 1180 1181 /* 1182 * Attempt to get a coherent snapshot by making a copy of the data. 1183 */ 1184 if (arg1) 1185 tmpout = *(uint64_t *)arg1; 1186 else 1187 tmpout = arg2; 1188 error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t)); 1189 1190 if (error || !req->newptr) 1191 return (error); 1192 1193 if (!arg1) 1194 error = EPERM; 1195 else 1196 error = SYSCTL_IN(req, arg1, sizeof(uint64_t)); 1197 return (error); 1198} 1199 1200/* 1201 * Handle our generic '\0' terminated 'C' string. 1202 * Two cases: 1203 * a variable string: point arg1 at it, arg2 is max length. 1204 * a constant string: point arg1 at it, arg2 is zero. 1205 */ 1206 1207int 1208sysctl_handle_string(SYSCTL_HANDLER_ARGS) 1209{ 1210 size_t outlen; 1211 int error = 0; 1212 1213 /* 1214 * A zero-length buffer indicates a fixed size read-only 1215 * string: 1216 */ 1217 if (arg2 == 0) 1218 arg2 = strlen((char *)arg1) + 1; 1219 1220 if (req->oldptr != NULL) { 1221 char *tmparg; 1222 1223 /* try to make a coherent snapshot of the string */ 1224 tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK); 1225 memcpy(tmparg, arg1, arg2); 1226 1227 outlen = strnlen(tmparg, arg2 - 1) + 1; 1228 error = SYSCTL_OUT(req, tmparg, outlen); 1229 1230 free(tmparg, M_SYSCTLTMP); 1231 } else { 1232 outlen = strnlen((char *)arg1, arg2 - 1) + 1; 1233 error = SYSCTL_OUT(req, NULL, outlen); 1234 } 1235 if (error || !req->newptr) 1236 return (error); 1237 1238 if ((req->newlen - req->newidx) >= arg2) { 1239 error = EINVAL; 1240 } else { 1241 arg2 = (req->newlen - req->newidx); 1242 error = SYSCTL_IN(req, arg1, arg2); 1243 ((char *)arg1)[arg2] = '\0'; 1244 } 1245 return (error); 1246} 1247 1248/* 1249 * Handle any kind of opaque data. 1250 * arg1 points to it, arg2 is the size. 1251 */ 1252 1253int 1254sysctl_handle_opaque(SYSCTL_HANDLER_ARGS) 1255{ 1256 int error, tries; 1257 u_int generation; 1258 struct sysctl_req req2; 1259 1260 /* 1261 * Attempt to get a coherent snapshot, by using the thread 1262 * pre-emption counter updated from within mi_switch() to 1263 * determine if we were pre-empted during a bcopy() or 1264 * copyout(). Make 3 attempts at doing this before giving up. 1265 * If we encounter an error, stop immediately. 1266 */ 1267 tries = 0; 1268 req2 = *req; 1269retry: 1270 generation = curthread->td_generation; 1271 error = SYSCTL_OUT(req, arg1, arg2); 1272 if (error) 1273 return (error); 1274 tries++; 1275 if (generation != curthread->td_generation && tries < 3) { 1276 *req = req2; 1277 goto retry; 1278 } 1279 1280 error = SYSCTL_IN(req, arg1, arg2); 1281 1282 return (error); 1283} 1284 1285/* 1286 * Transfer functions to/from kernel space. 1287 * XXX: rather untested at this point 1288 */ 1289static int 1290sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l) 1291{ 1292 size_t i = 0; 1293 1294 if (req->oldptr) { 1295 i = l; 1296 if (req->oldlen <= req->oldidx) 1297 i = 0; 1298 else 1299 if (i > req->oldlen - req->oldidx) 1300 i = req->oldlen - req->oldidx; 1301 if (i > 0) 1302 bcopy(p, (char *)req->oldptr + req->oldidx, i); 1303 } 1304 req->oldidx += l; 1305 if (req->oldptr && i != l) 1306 return (ENOMEM); 1307 return (0); 1308} 1309 1310static int 1311sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l) 1312{ 1313 if (!req->newptr) 1314 return (0); 1315 if (req->newlen - req->newidx < l) 1316 return (EINVAL); 1317 bcopy((char *)req->newptr + req->newidx, p, l); 1318 req->newidx += l; 1319 return (0); 1320} 1321 1322int 1323kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old, 1324 size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags) 1325{ 1326 int error = 0; 1327 struct sysctl_req req; 1328 1329 bzero(&req, sizeof req); 1330 1331 req.td = td; 1332 req.flags = flags; 1333 1334 if (oldlenp) { 1335 req.oldlen = *oldlenp; 1336 } 1337 req.validlen = req.oldlen; 1338 1339 if (old) { 1340 req.oldptr= old; 1341 } 1342 1343 if (new != NULL) { 1344 req.newlen = newlen; 1345 req.newptr = new; 1346 } 1347 1348 req.oldfunc = sysctl_old_kernel; 1349 req.newfunc = sysctl_new_kernel; 1350 req.lock = REQ_UNWIRED; 1351 1352 SYSCTL_XLOCK(); 1353 error = sysctl_root(0, name, namelen, &req); 1354 SYSCTL_XUNLOCK(); 1355 1356 if (req.lock == REQ_WIRED && req.validlen > 0) 1357 vsunlock(req.oldptr, req.validlen); 1358 1359 if (error && error != ENOMEM) 1360 return (error); 1361 1362 if (retval) { 1363 if (req.oldptr && req.oldidx > req.validlen) 1364 *retval = req.validlen; 1365 else 1366 *retval = req.oldidx; 1367 } 1368 return (error); 1369} 1370 1371int 1372kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp, 1373 void *new, size_t newlen, size_t *retval, int flags) 1374{ 1375 int oid[CTL_MAXNAME]; 1376 size_t oidlen, plen; 1377 int error; 1378 1379 oid[0] = 0; /* sysctl internal magic */ 1380 oid[1] = 3; /* name2oid */ 1381 oidlen = sizeof(oid); 1382 1383 error = kernel_sysctl(td, oid, 2, oid, &oidlen, 1384 (void *)name, strlen(name), &plen, flags); 1385 if (error) 1386 return (error); 1387 1388 error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp, 1389 new, newlen, retval, flags); 1390 return (error); 1391} 1392 1393/* 1394 * Transfer function to/from user space. 1395 */ 1396static int 1397sysctl_old_user(struct sysctl_req *req, const void *p, size_t l) 1398{ 1399 size_t i, len, origidx; 1400 int error; 1401 1402 origidx = req->oldidx; 1403 req->oldidx += l; 1404 if (req->oldptr == NULL) 1405 return (0); 1406 /* 1407 * If we have not wired the user supplied buffer and we are currently 1408 * holding locks, drop a witness warning, as it's possible that 1409 * write operations to the user page can sleep. 1410 */ 1411 if (req->lock != REQ_WIRED) 1412 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1413 "sysctl_old_user()"); 1414 i = l; 1415 len = req->validlen; 1416 if (len <= origidx) 1417 i = 0; 1418 else { 1419 if (i > len - origidx) 1420 i = len - origidx; 1421 if (req->lock == REQ_WIRED) { 1422 error = copyout_nofault(p, (char *)req->oldptr + 1423 origidx, i); 1424 } else 1425 error = copyout(p, (char *)req->oldptr + origidx, i); 1426 if (error != 0) 1427 return (error); 1428 } 1429 if (i < l) 1430 return (ENOMEM); 1431 return (0); 1432} 1433 1434static int 1435sysctl_new_user(struct sysctl_req *req, void *p, size_t l) 1436{ 1437 int error; 1438 1439 if (!req->newptr) 1440 return (0); 1441 if (req->newlen - req->newidx < l) 1442 return (EINVAL); 1443 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1444 "sysctl_new_user()"); 1445 error = copyin((char *)req->newptr + req->newidx, p, l); 1446 req->newidx += l; 1447 return (error); 1448} 1449 1450/* 1451 * Wire the user space destination buffer. If set to a value greater than 1452 * zero, the len parameter limits the maximum amount of wired memory. 1453 */ 1454int 1455sysctl_wire_old_buffer(struct sysctl_req *req, size_t len) 1456{ 1457 int ret; 1458 size_t wiredlen; 1459 1460 wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen; 1461 ret = 0; 1462 if (req->lock != REQ_WIRED && req->oldptr && 1463 req->oldfunc == sysctl_old_user) { 1464 if (wiredlen != 0) { 1465 ret = vslock(req->oldptr, wiredlen); 1466 if (ret != 0) { 1467 if (ret != ENOMEM) 1468 return (ret); 1469 wiredlen = 0; 1470 } 1471 } 1472 req->lock = REQ_WIRED; 1473 req->validlen = wiredlen; 1474 } 1475 return (0); 1476} 1477 1478int 1479sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, 1480 int *nindx, struct sysctl_req *req) 1481{ 1482 struct sysctl_oid_list *lsp; 1483 struct sysctl_oid *oid; 1484 int indx; 1485 1486 SYSCTL_ASSERT_XLOCKED(); 1487 lsp = &sysctl__children; 1488 indx = 0; 1489 while (indx < CTL_MAXNAME) { 1490 SLIST_FOREACH(oid, lsp, oid_link) { 1491 if (oid->oid_number == name[indx]) 1492 break; 1493 } 1494 if (oid == NULL) 1495 return (ENOENT); 1496 1497 indx++; 1498 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1499 if (oid->oid_handler != NULL || indx == namelen) { 1500 *noid = oid; 1501 if (nindx != NULL) 1502 *nindx = indx; 1503 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, 1504 ("%s found DYING node %p", __func__, oid)); 1505 return (0); 1506 } 1507 lsp = SYSCTL_CHILDREN(oid); 1508 } else if (indx == namelen) { 1509 *noid = oid; 1510 if (nindx != NULL) 1511 *nindx = indx; 1512 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, 1513 ("%s found DYING node %p", __func__, oid)); 1514 return (0); 1515 } else { 1516 return (ENOTDIR); 1517 } 1518 } 1519 return (ENOENT); 1520} 1521 1522/* 1523 * Traverse our tree, and find the right node, execute whatever it points 1524 * to, and return the resulting error code. 1525 */ 1526 1527static int 1528sysctl_root(SYSCTL_HANDLER_ARGS) 1529{ 1530 struct sysctl_oid *oid; 1531 int error, indx, lvl; 1532 1533 SYSCTL_ASSERT_XLOCKED(); 1534 1535 error = sysctl_find_oid(arg1, arg2, &oid, &indx, req); 1536 if (error) 1537 return (error); 1538 1539 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1540 /* 1541 * You can't call a sysctl when it's a node, but has 1542 * no handler. Inform the user that it's a node. 1543 * The indx may or may not be the same as namelen. 1544 */ 1545 if (oid->oid_handler == NULL) 1546 return (EISDIR); 1547 } 1548 1549 /* Is this sysctl writable? */ 1550 if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) 1551 return (EPERM); 1552 1553 KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL")); 1554 1555#ifdef CAPABILITY_MODE 1556 /* 1557 * If the process is in capability mode, then don't permit reading or 1558 * writing unless specifically granted for the node. 1559 */ 1560 if (IN_CAPABILITY_MODE(req->td)) { 1561 if (req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) 1562 return (EPERM); 1563 if (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR)) 1564 return (EPERM); 1565 } 1566#endif 1567 1568 /* Is this sysctl sensitive to securelevels? */ 1569 if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) { 1570 lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE; 1571 error = securelevel_gt(req->td->td_ucred, lvl); 1572 if (error) 1573 return (error); 1574 } 1575 1576 /* Is this sysctl writable by only privileged users? */ 1577 if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) { 1578 int priv; 1579 1580 if (oid->oid_kind & CTLFLAG_PRISON) 1581 priv = PRIV_SYSCTL_WRITEJAIL; 1582#ifdef VIMAGE 1583 else if ((oid->oid_kind & CTLFLAG_VNET) && 1584 prison_owns_vnet(req->td->td_ucred)) 1585 priv = PRIV_SYSCTL_WRITEJAIL; 1586#endif 1587 else 1588 priv = PRIV_SYSCTL_WRITE; 1589 error = priv_check(req->td, priv); 1590 if (error) 1591 return (error); 1592 } 1593 1594 if (!oid->oid_handler) 1595 return (EINVAL); 1596 1597 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1598 arg1 = (int *)arg1 + indx; 1599 arg2 -= indx; 1600 } else { 1601 arg1 = oid->oid_arg1; 1602 arg2 = oid->oid_arg2; 1603 } 1604#ifdef MAC 1605 error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2, 1606 req); 1607 if (error != 0) 1608 return (error); 1609#endif 1610#ifdef VIMAGE 1611 if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL) 1612 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 1613#endif 1614 error = sysctl_root_handler_locked(oid, arg1, arg2, req); 1615 1616 KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error); 1617 1618 return (error); 1619} 1620 1621#ifndef _SYS_SYSPROTO_H_ 1622struct sysctl_args { 1623 int *name; 1624 u_int namelen; 1625 void *old; 1626 size_t *oldlenp; 1627 void *new; 1628 size_t newlen; 1629}; 1630#endif 1631int 1632sys___sysctl(struct thread *td, struct sysctl_args *uap) 1633{ 1634 int error, i, name[CTL_MAXNAME]; 1635 size_t j; 1636 1637 if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) 1638 return (EINVAL); 1639 1640 error = copyin(uap->name, &name, uap->namelen * sizeof(int)); 1641 if (error) 1642 return (error); 1643 1644 error = userland_sysctl(td, name, uap->namelen, 1645 uap->old, uap->oldlenp, 0, 1646 uap->new, uap->newlen, &j, 0); 1647 if (error && error != ENOMEM) 1648 return (error); 1649 if (uap->oldlenp) { 1650 i = copyout(&j, uap->oldlenp, sizeof(j)); 1651 if (i) 1652 return (i); 1653 } 1654 return (error); 1655} 1656 1657/* 1658 * This is used from various compatibility syscalls too. That's why name 1659 * must be in kernel space. 1660 */ 1661int 1662userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, 1663 size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval, 1664 int flags) 1665{ 1666 int error = 0, memlocked; 1667 struct sysctl_req req; 1668 1669 bzero(&req, sizeof req); 1670 1671 req.td = td; 1672 req.flags = flags; 1673 1674 if (oldlenp) { 1675 if (inkernel) { 1676 req.oldlen = *oldlenp; 1677 } else { 1678 error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp)); 1679 if (error) 1680 return (error); 1681 } 1682 } 1683 req.validlen = req.oldlen; 1684 1685 if (old) { 1686 if (!useracc(old, req.oldlen, VM_PROT_WRITE)) 1687 return (EFAULT); 1688 req.oldptr= old; 1689 } 1690 1691 if (new != NULL) { 1692 if (!useracc(new, newlen, VM_PROT_READ)) 1693 return (EFAULT); 1694 req.newlen = newlen; 1695 req.newptr = new; 1696 } 1697 1698 req.oldfunc = sysctl_old_user; 1699 req.newfunc = sysctl_new_user; 1700 req.lock = REQ_UNWIRED; 1701 1702#ifdef KTRACE 1703 if (KTRPOINT(curthread, KTR_SYSCTL)) 1704 ktrsysctl(name, namelen); 1705#endif 1706 1707 if (req.oldlen > PAGE_SIZE) { 1708 memlocked = 1; 1709 sx_xlock(&sysctlmemlock); 1710 } else 1711 memlocked = 0; 1712 CURVNET_SET(TD_TO_VNET(td)); 1713 1714 for (;;) { 1715 req.oldidx = 0; 1716 req.newidx = 0; 1717 SYSCTL_XLOCK(); 1718 error = sysctl_root(0, name, namelen, &req); 1719 SYSCTL_XUNLOCK(); 1720 if (error != EAGAIN) 1721 break; 1722 kern_yield(PRI_USER); 1723 } 1724 1725 CURVNET_RESTORE(); 1726 1727 if (req.lock == REQ_WIRED && req.validlen > 0) 1728 vsunlock(req.oldptr, req.validlen); 1729 if (memlocked) 1730 sx_xunlock(&sysctlmemlock); 1731 1732 if (error && error != ENOMEM) 1733 return (error); 1734 1735 if (retval) { 1736 if (req.oldptr && req.oldidx > req.validlen) 1737 *retval = req.validlen; 1738 else 1739 *retval = req.oldidx; 1740 } 1741 return (error); 1742} 1743 1744/* 1745 * Drain into a sysctl struct. The user buffer should be wired if a page 1746 * fault would cause issue. 1747 */ 1748static int 1749sbuf_sysctl_drain(void *arg, const char *data, int len) 1750{ 1751 struct sysctl_req *req = arg; 1752 int error; 1753 1754 error = SYSCTL_OUT(req, data, len); 1755 KASSERT(error >= 0, ("Got unexpected negative value %d", error)); 1756 return (error == 0 ? len : -error); 1757} 1758 1759struct sbuf * 1760sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length, 1761 struct sysctl_req *req) 1762{ 1763 1764 s = sbuf_new(s, buf, length, SBUF_FIXEDLEN); 1765 sbuf_set_drain(s, sbuf_sysctl_drain, req); 1766 return (s); 1767} 1768