kern_sysctl.c revision 276341
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Karels at Berkeley Software Design, Inc. 7 * 8 * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD 9 * project, to make these variables more userfriendly. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 36 */ 37 38#include <sys/cdefs.h> 39__FBSDID("$FreeBSD: head/sys/kern/kern_sysctl.c 276341 2014-12-28 19:24:01Z mjg $"); 40 41#include "opt_capsicum.h" 42#include "opt_compat.h" 43#include "opt_ktrace.h" 44 45#include <sys/param.h> 46#include <sys/fail.h> 47#include <sys/systm.h> 48#include <sys/capsicum.h> 49#include <sys/kernel.h> 50#include <sys/sysctl.h> 51#include <sys/malloc.h> 52#include <sys/priv.h> 53#include <sys/proc.h> 54#include <sys/jail.h> 55#include <sys/lock.h> 56#include <sys/mutex.h> 57#include <sys/sbuf.h> 58#include <sys/sx.h> 59#include <sys/sysproto.h> 60#include <sys/uio.h> 61#ifdef KTRACE 62#include <sys/ktrace.h> 63#endif 64 65#include <net/vnet.h> 66 67#include <security/mac/mac_framework.h> 68 69#include <vm/vm.h> 70#include <vm/vm_extern.h> 71 72static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic"); 73static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids"); 74static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer"); 75 76/* 77 * The sysctllock protects the MIB tree. It also protects sysctl 78 * contexts used with dynamic sysctls. The sysctl_register_oid() and 79 * sysctl_unregister_oid() routines require the sysctllock to already 80 * be held, so the sysctl_xlock() and sysctl_xunlock() routines are 81 * provided for the few places in the kernel which need to use that 82 * API rather than using the dynamic API. Use of the dynamic API is 83 * strongly encouraged for most code. 84 * 85 * The sysctlmemlock is used to limit the amount of user memory wired for 86 * sysctl requests. This is implemented by serializing any userland 87 * sysctl requests larger than a single page via an exclusive lock. 88 */ 89static struct sx sysctllock; 90static struct sx sysctlmemlock; 91 92#define SYSCTL_XLOCK() sx_xlock(&sysctllock) 93#define SYSCTL_XUNLOCK() sx_xunlock(&sysctllock) 94#define SYSCTL_SLOCK() sx_slock(&sysctllock) 95#define SYSCTL_SUNLOCK() sx_sunlock(&sysctllock) 96#define SYSCTL_XLOCKED() sx_xlocked(&sysctllock) 97#define SYSCTL_ASSERT_LOCKED() sx_assert(&sysctllock, SA_LOCKED) 98#define SYSCTL_ASSERT_XLOCKED() sx_assert(&sysctllock, SA_XLOCKED) 99#define SYSCTL_ASSERT_SLOCKED() sx_assert(&sysctllock, SA_SLOCKED) 100#define SYSCTL_INIT() sx_init(&sysctllock, "sysctl lock") 101#define SYSCTL_SLEEP(ch, wmesg, timo) \ 102 sx_sleep(ch, &sysctllock, 0, wmesg, timo) 103 104static int sysctl_root(SYSCTL_HANDLER_ARGS); 105 106/* Root list */ 107struct sysctl_oid_list sysctl__children = SLIST_HEAD_INITIALIZER(&sysctl__children); 108 109static int sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, 110 int recurse); 111static int sysctl_old_kernel(struct sysctl_req *, const void *, size_t); 112static int sysctl_new_kernel(struct sysctl_req *, void *, size_t); 113 114static void 115sysctl_lock(bool xlock) 116{ 117 118 if (xlock) 119 SYSCTL_XLOCK(); 120 else 121 SYSCTL_SLOCK(); 122} 123 124static bool 125sysctl_unlock(void) 126{ 127 bool xlocked; 128 129 xlocked = SYSCTL_XLOCKED(); 130 if (xlocked) 131 SYSCTL_XUNLOCK(); 132 else 133 SYSCTL_SUNLOCK(); 134 return (xlocked); 135} 136 137static struct sysctl_oid * 138sysctl_find_oidname(const char *name, struct sysctl_oid_list *list) 139{ 140 struct sysctl_oid *oidp; 141 142 SYSCTL_ASSERT_LOCKED(); 143 SLIST_FOREACH(oidp, list, oid_link) { 144 if (strcmp(oidp->oid_name, name) == 0) { 145 return (oidp); 146 } 147 } 148 return (NULL); 149} 150 151/* 152 * Initialization of the MIB tree. 153 * 154 * Order by number in each list. 155 */ 156void 157sysctl_xlock(void) 158{ 159 160 SYSCTL_XLOCK(); 161} 162 163void 164sysctl_xunlock(void) 165{ 166 167 SYSCTL_XUNLOCK(); 168} 169 170static int 171sysctl_root_handler_locked(struct sysctl_oid *oid, void *arg1, intptr_t arg2, 172 struct sysctl_req *req) 173{ 174 int error; 175 bool xlocked; 176 177 if (oid->oid_kind & CTLFLAG_DYN) 178 atomic_add_int(&oid->oid_running, 1); 179 xlocked = sysctl_unlock(); 180 181 if (!(oid->oid_kind & CTLFLAG_MPSAFE)) 182 mtx_lock(&Giant); 183 error = oid->oid_handler(oid, arg1, arg2, req); 184 if (!(oid->oid_kind & CTLFLAG_MPSAFE)) 185 mtx_unlock(&Giant); 186 187 sysctl_lock(xlocked); 188 if (oid->oid_kind & CTLFLAG_DYN) { 189 if (atomic_fetchadd_int(&oid->oid_running, -1) == 1 && 190 (oid->oid_kind & CTLFLAG_DYING) != 0) 191 wakeup(&oid->oid_running); 192 } 193 194 return (error); 195} 196 197static void 198sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp) 199{ 200 struct sysctl_req req; 201 struct sysctl_oid *curr; 202 char *penv = NULL; 203 char path[64]; 204 ssize_t rem = sizeof(path); 205 ssize_t len; 206 int val_int; 207 long val_long; 208 int64_t val_64; 209 quad_t val_quad; 210 int error; 211 212 path[--rem] = 0; 213 214 for (curr = oidp; curr != NULL; curr = SYSCTL_PARENT(curr)) { 215 len = strlen(curr->oid_name); 216 rem -= len; 217 if (curr != oidp) 218 rem -= 1; 219 if (rem < 0) { 220 printf("OID path exceeds %d bytes\n", (int)sizeof(path)); 221 return; 222 } 223 memcpy(path + rem, curr->oid_name, len); 224 if (curr != oidp) 225 path[rem + len] = '.'; 226 } 227 228 memset(&req, 0, sizeof(req)); 229 230 req.td = curthread; 231 req.oldfunc = sysctl_old_kernel; 232 req.newfunc = sysctl_new_kernel; 233 req.lock = REQ_UNWIRED; 234 235 switch (oidp->oid_kind & CTLTYPE) { 236 case CTLTYPE_INT: 237 if (getenv_int(path + rem, &val_int) == 0) 238 return; 239 req.newlen = sizeof(val_int); 240 req.newptr = &val_int; 241 break; 242 case CTLTYPE_UINT: 243 if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0) 244 return; 245 req.newlen = sizeof(val_int); 246 req.newptr = &val_int; 247 break; 248 case CTLTYPE_LONG: 249 if (getenv_long(path + rem, &val_long) == 0) 250 return; 251 req.newlen = sizeof(val_long); 252 req.newptr = &val_long; 253 break; 254 case CTLTYPE_ULONG: 255 if (getenv_ulong(path + rem, (unsigned long *)&val_long) == 0) 256 return; 257 req.newlen = sizeof(val_long); 258 req.newptr = &val_long; 259 break; 260 case CTLTYPE_S64: 261 if (getenv_quad(path + rem, &val_quad) == 0) 262 return; 263 val_64 = val_quad; 264 req.newlen = sizeof(val_64); 265 req.newptr = &val_64; 266 break; 267 case CTLTYPE_U64: 268 /* XXX there is no getenv_uquad() */ 269 if (getenv_quad(path + rem, &val_quad) == 0) 270 return; 271 val_64 = val_quad; 272 req.newlen = sizeof(val_64); 273 req.newptr = &val_64; 274 break; 275 case CTLTYPE_STRING: 276 penv = kern_getenv(path + rem); 277 if (penv == NULL) 278 return; 279 req.newlen = strlen(penv); 280 req.newptr = penv; 281 break; 282 default: 283 return; 284 } 285 error = sysctl_root_handler_locked(oidp, oidp->oid_arg1, 286 oidp->oid_arg2, &req); 287 if (error != 0) 288 printf("Setting sysctl %s failed: %d\n", path, error); 289 if (penv != NULL) 290 freeenv(penv); 291} 292 293void 294sysctl_register_oid(struct sysctl_oid *oidp) 295{ 296 struct sysctl_oid_list *parent = oidp->oid_parent; 297 struct sysctl_oid *p; 298 struct sysctl_oid *q; 299 300 /* 301 * First check if another oid with the same name already 302 * exists in the parent's list. 303 */ 304 SYSCTL_ASSERT_XLOCKED(); 305 p = sysctl_find_oidname(oidp->oid_name, parent); 306 if (p != NULL) { 307 if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 308 p->oid_refcnt++; 309 return; 310 } else { 311 printf("can't re-use a leaf (%s)!\n", p->oid_name); 312 return; 313 } 314 } 315 /* 316 * If this oid has a number OID_AUTO, give it a number which 317 * is greater than any current oid. 318 * NOTE: DO NOT change the starting value here, change it in 319 * <sys/sysctl.h>, and make sure it is at least 256 to 320 * accomodate e.g. net.inet.raw as a static sysctl node. 321 */ 322 if (oidp->oid_number == OID_AUTO) { 323 static int newoid = CTL_AUTO_START; 324 325 oidp->oid_number = newoid++; 326 if (newoid == 0x7fffffff) 327 panic("out of oids"); 328 } 329#if 0 330 else if (oidp->oid_number >= CTL_AUTO_START) { 331 /* do not panic; this happens when unregistering sysctl sets */ 332 printf("static sysctl oid too high: %d", oidp->oid_number); 333 } 334#endif 335 336 /* 337 * Insert the oid into the parent's list in order. 338 */ 339 q = NULL; 340 SLIST_FOREACH(p, parent, oid_link) { 341 if (oidp->oid_number < p->oid_number) 342 break; 343 q = p; 344 } 345 if (q) 346 SLIST_INSERT_AFTER(q, oidp, oid_link); 347 else 348 SLIST_INSERT_HEAD(parent, oidp, oid_link); 349 350 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE && 351#ifdef VIMAGE 352 (oidp->oid_kind & CTLFLAG_VNET) == 0 && 353#endif 354 (oidp->oid_kind & CTLFLAG_TUN) != 0 && 355 (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) { 356 sysctl_load_tunable_by_oid_locked(oidp); 357 } 358} 359 360void 361sysctl_unregister_oid(struct sysctl_oid *oidp) 362{ 363 struct sysctl_oid *p; 364 int error; 365 366 SYSCTL_ASSERT_XLOCKED(); 367 error = ENOENT; 368 if (oidp->oid_number == OID_AUTO) { 369 error = EINVAL; 370 } else { 371 SLIST_FOREACH(p, oidp->oid_parent, oid_link) { 372 if (p == oidp) { 373 SLIST_REMOVE(oidp->oid_parent, oidp, 374 sysctl_oid, oid_link); 375 error = 0; 376 break; 377 } 378 } 379 } 380 381 /* 382 * This can happen when a module fails to register and is 383 * being unloaded afterwards. It should not be a panic() 384 * for normal use. 385 */ 386 if (error) 387 printf("%s: failed to unregister sysctl\n", __func__); 388} 389 390/* Initialize a new context to keep track of dynamically added sysctls. */ 391int 392sysctl_ctx_init(struct sysctl_ctx_list *c) 393{ 394 395 if (c == NULL) { 396 return (EINVAL); 397 } 398 399 /* 400 * No locking here, the caller is responsible for not adding 401 * new nodes to a context until after this function has 402 * returned. 403 */ 404 TAILQ_INIT(c); 405 return (0); 406} 407 408/* Free the context, and destroy all dynamic oids registered in this context */ 409int 410sysctl_ctx_free(struct sysctl_ctx_list *clist) 411{ 412 struct sysctl_ctx_entry *e, *e1; 413 int error; 414 415 error = 0; 416 /* 417 * First perform a "dry run" to check if it's ok to remove oids. 418 * XXX FIXME 419 * XXX This algorithm is a hack. But I don't know any 420 * XXX better solution for now... 421 */ 422 SYSCTL_XLOCK(); 423 TAILQ_FOREACH(e, clist, link) { 424 error = sysctl_remove_oid_locked(e->entry, 0, 0); 425 if (error) 426 break; 427 } 428 /* 429 * Restore deregistered entries, either from the end, 430 * or from the place where error occured. 431 * e contains the entry that was not unregistered 432 */ 433 if (error) 434 e1 = TAILQ_PREV(e, sysctl_ctx_list, link); 435 else 436 e1 = TAILQ_LAST(clist, sysctl_ctx_list); 437 while (e1 != NULL) { 438 sysctl_register_oid(e1->entry); 439 e1 = TAILQ_PREV(e1, sysctl_ctx_list, link); 440 } 441 if (error) { 442 SYSCTL_XUNLOCK(); 443 return(EBUSY); 444 } 445 /* Now really delete the entries */ 446 e = TAILQ_FIRST(clist); 447 while (e != NULL) { 448 e1 = TAILQ_NEXT(e, link); 449 error = sysctl_remove_oid_locked(e->entry, 1, 0); 450 if (error) 451 panic("sysctl_remove_oid: corrupt tree, entry: %s", 452 e->entry->oid_name); 453 free(e, M_SYSCTLOID); 454 e = e1; 455 } 456 SYSCTL_XUNLOCK(); 457 return (error); 458} 459 460/* Add an entry to the context */ 461struct sysctl_ctx_entry * 462sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 463{ 464 struct sysctl_ctx_entry *e; 465 466 SYSCTL_ASSERT_XLOCKED(); 467 if (clist == NULL || oidp == NULL) 468 return(NULL); 469 e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK); 470 e->entry = oidp; 471 TAILQ_INSERT_HEAD(clist, e, link); 472 return (e); 473} 474 475/* Find an entry in the context */ 476struct sysctl_ctx_entry * 477sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 478{ 479 struct sysctl_ctx_entry *e; 480 481 SYSCTL_ASSERT_XLOCKED(); 482 if (clist == NULL || oidp == NULL) 483 return(NULL); 484 TAILQ_FOREACH(e, clist, link) { 485 if(e->entry == oidp) 486 return(e); 487 } 488 return (e); 489} 490 491/* 492 * Delete an entry from the context. 493 * NOTE: this function doesn't free oidp! You have to remove it 494 * with sysctl_remove_oid(). 495 */ 496int 497sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 498{ 499 struct sysctl_ctx_entry *e; 500 501 if (clist == NULL || oidp == NULL) 502 return (EINVAL); 503 SYSCTL_XLOCK(); 504 e = sysctl_ctx_entry_find(clist, oidp); 505 if (e != NULL) { 506 TAILQ_REMOVE(clist, e, link); 507 SYSCTL_XUNLOCK(); 508 free(e, M_SYSCTLOID); 509 return (0); 510 } else { 511 SYSCTL_XUNLOCK(); 512 return (ENOENT); 513 } 514} 515 516/* 517 * Remove dynamically created sysctl trees. 518 * oidp - top of the tree to be removed 519 * del - if 0 - just deregister, otherwise free up entries as well 520 * recurse - if != 0 traverse the subtree to be deleted 521 */ 522int 523sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse) 524{ 525 int error; 526 527 SYSCTL_XLOCK(); 528 error = sysctl_remove_oid_locked(oidp, del, recurse); 529 SYSCTL_XUNLOCK(); 530 return (error); 531} 532 533int 534sysctl_remove_name(struct sysctl_oid *parent, const char *name, 535 int del, int recurse) 536{ 537 struct sysctl_oid *p, *tmp; 538 int error; 539 540 error = ENOENT; 541 SYSCTL_XLOCK(); 542 SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) { 543 if (strcmp(p->oid_name, name) == 0) { 544 error = sysctl_remove_oid_locked(p, del, recurse); 545 break; 546 } 547 } 548 SYSCTL_XUNLOCK(); 549 550 return (error); 551} 552 553 554static int 555sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) 556{ 557 struct sysctl_oid *p, *tmp; 558 int error; 559 560 SYSCTL_ASSERT_XLOCKED(); 561 if (oidp == NULL) 562 return(EINVAL); 563 if ((oidp->oid_kind & CTLFLAG_DYN) == 0) { 564 printf("can't remove non-dynamic nodes!\n"); 565 return (EINVAL); 566 } 567 /* 568 * WARNING: normal method to do this should be through 569 * sysctl_ctx_free(). Use recursing as the last resort 570 * method to purge your sysctl tree of leftovers... 571 * However, if some other code still references these nodes, 572 * it will panic. 573 */ 574 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 575 if (oidp->oid_refcnt == 1) { 576 SLIST_FOREACH_SAFE(p, 577 SYSCTL_CHILDREN(oidp), oid_link, tmp) { 578 if (!recurse) { 579 printf("Warning: failed attempt to " 580 "remove oid %s with child %s\n", 581 oidp->oid_name, p->oid_name); 582 return (ENOTEMPTY); 583 } 584 error = sysctl_remove_oid_locked(p, del, 585 recurse); 586 if (error) 587 return (error); 588 } 589 } 590 } 591 if (oidp->oid_refcnt > 1 ) { 592 oidp->oid_refcnt--; 593 } else { 594 if (oidp->oid_refcnt == 0) { 595 printf("Warning: bad oid_refcnt=%u (%s)!\n", 596 oidp->oid_refcnt, oidp->oid_name); 597 return (EINVAL); 598 } 599 sysctl_unregister_oid(oidp); 600 if (del) { 601 /* 602 * Wait for all threads running the handler to drain. 603 * This preserves the previous behavior when the 604 * sysctl lock was held across a handler invocation, 605 * and is necessary for module unload correctness. 606 */ 607 while (oidp->oid_running > 0) { 608 oidp->oid_kind |= CTLFLAG_DYING; 609 SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0); 610 } 611 if (oidp->oid_descr) 612 free(__DECONST(char *, oidp->oid_descr), 613 M_SYSCTLOID); 614 free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID); 615 free(oidp, M_SYSCTLOID); 616 } 617 } 618 return (0); 619} 620/* 621 * Create new sysctls at run time. 622 * clist may point to a valid context initialized with sysctl_ctx_init(). 623 */ 624struct sysctl_oid * 625sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, 626 int number, const char *name, int kind, void *arg1, intptr_t arg2, 627 int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr) 628{ 629 struct sysctl_oid *oidp; 630 631 /* You have to hook up somewhere.. */ 632 if (parent == NULL) 633 return(NULL); 634 /* Check if the node already exists, otherwise create it */ 635 SYSCTL_XLOCK(); 636 oidp = sysctl_find_oidname(name, parent); 637 if (oidp != NULL) { 638 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 639 oidp->oid_refcnt++; 640 /* Update the context */ 641 if (clist != NULL) 642 sysctl_ctx_entry_add(clist, oidp); 643 SYSCTL_XUNLOCK(); 644 return (oidp); 645 } else { 646 SYSCTL_XUNLOCK(); 647 printf("can't re-use a leaf (%s)!\n", name); 648 return (NULL); 649 } 650 } 651 oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO); 652 oidp->oid_parent = parent; 653 SLIST_INIT(&oidp->oid_children); 654 oidp->oid_number = number; 655 oidp->oid_refcnt = 1; 656 oidp->oid_name = strdup(name, M_SYSCTLOID); 657 oidp->oid_handler = handler; 658 oidp->oid_kind = CTLFLAG_DYN | kind; 659 oidp->oid_arg1 = arg1; 660 oidp->oid_arg2 = arg2; 661 oidp->oid_fmt = fmt; 662 if (descr != NULL) 663 oidp->oid_descr = strdup(descr, M_SYSCTLOID); 664 /* Update the context, if used */ 665 if (clist != NULL) 666 sysctl_ctx_entry_add(clist, oidp); 667 /* Register this oid */ 668 sysctl_register_oid(oidp); 669 SYSCTL_XUNLOCK(); 670 return (oidp); 671} 672 673/* 674 * Rename an existing oid. 675 */ 676void 677sysctl_rename_oid(struct sysctl_oid *oidp, const char *name) 678{ 679 char *newname; 680 char *oldname; 681 682 newname = strdup(name, M_SYSCTLOID); 683 SYSCTL_XLOCK(); 684 oldname = __DECONST(char *, oidp->oid_name); 685 oidp->oid_name = newname; 686 SYSCTL_XUNLOCK(); 687 free(oldname, M_SYSCTLOID); 688} 689 690/* 691 * Reparent an existing oid. 692 */ 693int 694sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent) 695{ 696 struct sysctl_oid *oidp; 697 698 SYSCTL_XLOCK(); 699 if (oid->oid_parent == parent) { 700 SYSCTL_XUNLOCK(); 701 return (0); 702 } 703 oidp = sysctl_find_oidname(oid->oid_name, parent); 704 if (oidp != NULL) { 705 SYSCTL_XUNLOCK(); 706 return (EEXIST); 707 } 708 sysctl_unregister_oid(oid); 709 oid->oid_parent = parent; 710 oid->oid_number = OID_AUTO; 711 sysctl_register_oid(oid); 712 SYSCTL_XUNLOCK(); 713 return (0); 714} 715 716/* 717 * Register the kernel's oids on startup. 718 */ 719SET_DECLARE(sysctl_set, struct sysctl_oid); 720 721static void 722sysctl_register_all(void *arg) 723{ 724 struct sysctl_oid **oidp; 725 726 sx_init(&sysctlmemlock, "sysctl mem"); 727 SYSCTL_INIT(); 728 SYSCTL_XLOCK(); 729 SET_FOREACH(oidp, sysctl_set) 730 sysctl_register_oid(*oidp); 731 SYSCTL_XUNLOCK(); 732} 733SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, 0); 734 735/* 736 * "Staff-functions" 737 * 738 * These functions implement a presently undocumented interface 739 * used by the sysctl program to walk the tree, and get the type 740 * so it can print the value. 741 * This interface is under work and consideration, and should probably 742 * be killed with a big axe by the first person who can find the time. 743 * (be aware though, that the proper interface isn't as obvious as it 744 * may seem, there are various conflicting requirements. 745 * 746 * {0,0} printf the entire MIB-tree. 747 * {0,1,...} return the name of the "..." OID. 748 * {0,2,...} return the next OID. 749 * {0,3} return the OID of the name in "new" 750 * {0,4,...} return the kind & format info for the "..." OID. 751 * {0,5,...} return the description the "..." OID. 752 */ 753 754#ifdef SYSCTL_DEBUG 755static void 756sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i) 757{ 758 int k; 759 struct sysctl_oid *oidp; 760 761 SYSCTL_ASSERT_LOCKED(); 762 SLIST_FOREACH(oidp, l, oid_link) { 763 764 for (k=0; k<i; k++) 765 printf(" "); 766 767 printf("%d %s ", oidp->oid_number, oidp->oid_name); 768 769 printf("%c%c", 770 oidp->oid_kind & CTLFLAG_RD ? 'R':' ', 771 oidp->oid_kind & CTLFLAG_WR ? 'W':' '); 772 773 if (oidp->oid_handler) 774 printf(" *Handler"); 775 776 switch (oidp->oid_kind & CTLTYPE) { 777 case CTLTYPE_NODE: 778 printf(" Node\n"); 779 if (!oidp->oid_handler) { 780 sysctl_sysctl_debug_dump_node( 781 SYSCTL_CHILDREN(oidp), i + 2); 782 } 783 break; 784 case CTLTYPE_INT: printf(" Int\n"); break; 785 case CTLTYPE_UINT: printf(" u_int\n"); break; 786 case CTLTYPE_LONG: printf(" Long\n"); break; 787 case CTLTYPE_ULONG: printf(" u_long\n"); break; 788 case CTLTYPE_STRING: printf(" String\n"); break; 789 case CTLTYPE_U64: printf(" uint64_t\n"); break; 790 case CTLTYPE_S64: printf(" int64_t\n"); break; 791 case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break; 792 default: printf("\n"); 793 } 794 795 } 796} 797 798static int 799sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS) 800{ 801 int error; 802 803 error = priv_check(req->td, PRIV_SYSCTL_DEBUG); 804 if (error) 805 return (error); 806 SYSCTL_SLOCK(); 807 sysctl_sysctl_debug_dump_node(&sysctl__children, 0); 808 SYSCTL_SUNLOCK(); 809 return (ENOENT); 810} 811 812SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE, 813 0, 0, sysctl_sysctl_debug, "-", ""); 814#endif 815 816static int 817sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) 818{ 819 int *name = (int *) arg1; 820 u_int namelen = arg2; 821 int error = 0; 822 struct sysctl_oid *oid; 823 struct sysctl_oid_list *lsp = &sysctl__children, *lsp2; 824 char buf[10]; 825 826 SYSCTL_SLOCK(); 827 while (namelen) { 828 if (!lsp) { 829 snprintf(buf,sizeof(buf),"%d",*name); 830 if (req->oldidx) 831 error = SYSCTL_OUT(req, ".", 1); 832 if (!error) 833 error = SYSCTL_OUT(req, buf, strlen(buf)); 834 if (error) 835 goto out; 836 namelen--; 837 name++; 838 continue; 839 } 840 lsp2 = 0; 841 SLIST_FOREACH(oid, lsp, oid_link) { 842 if (oid->oid_number != *name) 843 continue; 844 845 if (req->oldidx) 846 error = SYSCTL_OUT(req, ".", 1); 847 if (!error) 848 error = SYSCTL_OUT(req, oid->oid_name, 849 strlen(oid->oid_name)); 850 if (error) 851 goto out; 852 853 namelen--; 854 name++; 855 856 if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) 857 break; 858 859 if (oid->oid_handler) 860 break; 861 862 lsp2 = SYSCTL_CHILDREN(oid); 863 break; 864 } 865 lsp = lsp2; 866 } 867 error = SYSCTL_OUT(req, "", 1); 868 out: 869 SYSCTL_SUNLOCK(); 870 return (error); 871} 872 873/* 874 * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in 875 * capability mode. 876 */ 877static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, 878 sysctl_sysctl_name, ""); 879 880static int 881sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, 882 int *next, int *len, int level, struct sysctl_oid **oidpp) 883{ 884 struct sysctl_oid *oidp; 885 886 SYSCTL_ASSERT_LOCKED(); 887 *len = level; 888 SLIST_FOREACH(oidp, lsp, oid_link) { 889 *next = oidp->oid_number; 890 *oidpp = oidp; 891 892 if (oidp->oid_kind & CTLFLAG_SKIP) 893 continue; 894 895 if (!namelen) { 896 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 897 return (0); 898 if (oidp->oid_handler) 899 /* We really should call the handler here...*/ 900 return (0); 901 lsp = SYSCTL_CHILDREN(oidp); 902 if (!sysctl_sysctl_next_ls(lsp, 0, 0, next+1, 903 len, level+1, oidpp)) 904 return (0); 905 goto emptynode; 906 } 907 908 if (oidp->oid_number < *name) 909 continue; 910 911 if (oidp->oid_number > *name) { 912 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 913 return (0); 914 if (oidp->oid_handler) 915 return (0); 916 lsp = SYSCTL_CHILDREN(oidp); 917 if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, 918 next+1, len, level+1, oidpp)) 919 return (0); 920 goto next; 921 } 922 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 923 continue; 924 925 if (oidp->oid_handler) 926 continue; 927 928 lsp = SYSCTL_CHILDREN(oidp); 929 if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1, 930 len, level+1, oidpp)) 931 return (0); 932 next: 933 namelen = 1; 934 emptynode: 935 *len = level; 936 } 937 return (1); 938} 939 940static int 941sysctl_sysctl_next(SYSCTL_HANDLER_ARGS) 942{ 943 int *name = (int *) arg1; 944 u_int namelen = arg2; 945 int i, j, error; 946 struct sysctl_oid *oid; 947 struct sysctl_oid_list *lsp = &sysctl__children; 948 int newoid[CTL_MAXNAME]; 949 950 SYSCTL_SLOCK(); 951 i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid); 952 SYSCTL_SUNLOCK(); 953 if (i) 954 return (ENOENT); 955 error = SYSCTL_OUT(req, newoid, j * sizeof (int)); 956 return (error); 957} 958 959/* 960 * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in 961 * capability mode. 962 */ 963static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, 964 sysctl_sysctl_next, ""); 965 966static int 967name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) 968{ 969 struct sysctl_oid *oidp; 970 struct sysctl_oid_list *lsp = &sysctl__children; 971 char *p; 972 973 SYSCTL_ASSERT_LOCKED(); 974 975 for (*len = 0; *len < CTL_MAXNAME;) { 976 p = strsep(&name, "."); 977 978 oidp = SLIST_FIRST(lsp); 979 for (;; oidp = SLIST_NEXT(oidp, oid_link)) { 980 if (oidp == NULL) 981 return (ENOENT); 982 if (strcmp(p, oidp->oid_name) == 0) 983 break; 984 } 985 *oid++ = oidp->oid_number; 986 (*len)++; 987 988 if (name == NULL || *name == '\0') { 989 if (oidpp) 990 *oidpp = oidp; 991 return (0); 992 } 993 994 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 995 break; 996 997 if (oidp->oid_handler) 998 break; 999 1000 lsp = SYSCTL_CHILDREN(oidp); 1001 } 1002 return (ENOENT); 1003} 1004 1005static int 1006sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) 1007{ 1008 char *p; 1009 int error, oid[CTL_MAXNAME], len = 0; 1010 struct sysctl_oid *op = 0; 1011 1012 if (!req->newlen) 1013 return (ENOENT); 1014 if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */ 1015 return (ENAMETOOLONG); 1016 1017 p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK); 1018 1019 error = SYSCTL_IN(req, p, req->newlen); 1020 if (error) { 1021 free(p, M_SYSCTL); 1022 return (error); 1023 } 1024 1025 p [req->newlen] = '\0'; 1026 1027 SYSCTL_SLOCK(); 1028 error = name2oid(p, oid, &len, &op); 1029 SYSCTL_SUNLOCK(); 1030 1031 free(p, M_SYSCTL); 1032 1033 if (error) 1034 return (error); 1035 1036 error = SYSCTL_OUT(req, oid, len * sizeof *oid); 1037 return (error); 1038} 1039 1040/* 1041 * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in 1042 * capability mode. 1043 */ 1044SYSCTL_PROC(_sysctl, 3, name2oid, 1045 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE 1046 | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", ""); 1047 1048static int 1049sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) 1050{ 1051 struct sysctl_oid *oid; 1052 int error; 1053 1054 SYSCTL_SLOCK(); 1055 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); 1056 if (error) 1057 goto out; 1058 1059 if (oid->oid_fmt == NULL) { 1060 error = ENOENT; 1061 goto out; 1062 } 1063 error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind)); 1064 if (error) 1065 goto out; 1066 error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1); 1067 out: 1068 SYSCTL_SUNLOCK(); 1069 return (error); 1070} 1071 1072 1073static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, 1074 sysctl_sysctl_oidfmt, ""); 1075 1076static int 1077sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS) 1078{ 1079 struct sysctl_oid *oid; 1080 int error; 1081 1082 SYSCTL_SLOCK(); 1083 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); 1084 if (error) 1085 goto out; 1086 1087 if (oid->oid_descr == NULL) { 1088 error = ENOENT; 1089 goto out; 1090 } 1091 error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1); 1092 out: 1093 SYSCTL_SUNLOCK(); 1094 return (error); 1095} 1096 1097static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, 1098 sysctl_sysctl_oiddescr, ""); 1099 1100/* 1101 * Default "handler" functions. 1102 */ 1103 1104/* 1105 * Handle an int, signed or unsigned. 1106 * Two cases: 1107 * a variable: point arg1 at it. 1108 * a constant: pass it in arg2. 1109 */ 1110 1111int 1112sysctl_handle_int(SYSCTL_HANDLER_ARGS) 1113{ 1114 int tmpout, error = 0; 1115 1116 /* 1117 * Attempt to get a coherent snapshot by making a copy of the data. 1118 */ 1119 if (arg1) 1120 tmpout = *(int *)arg1; 1121 else 1122 tmpout = arg2; 1123 error = SYSCTL_OUT(req, &tmpout, sizeof(int)); 1124 1125 if (error || !req->newptr) 1126 return (error); 1127 1128 if (!arg1) 1129 error = EPERM; 1130 else 1131 error = SYSCTL_IN(req, arg1, sizeof(int)); 1132 return (error); 1133} 1134 1135/* 1136 * Based on on sysctl_handle_int() convert milliseconds into ticks. 1137 * Note: this is used by TCP. 1138 */ 1139 1140int 1141sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) 1142{ 1143 int error, s, tt; 1144 1145 tt = *(int *)arg1; 1146 s = (int)((int64_t)tt * 1000 / hz); 1147 1148 error = sysctl_handle_int(oidp, &s, 0, req); 1149 if (error || !req->newptr) 1150 return (error); 1151 1152 tt = (int)((int64_t)s * hz / 1000); 1153 if (tt < 1) 1154 return (EINVAL); 1155 1156 *(int *)arg1 = tt; 1157 return (0); 1158} 1159 1160 1161/* 1162 * Handle a long, signed or unsigned. 1163 * Two cases: 1164 * a variable: point arg1 at it. 1165 * a constant: pass it in arg2. 1166 */ 1167 1168int 1169sysctl_handle_long(SYSCTL_HANDLER_ARGS) 1170{ 1171 int error = 0; 1172 long tmplong; 1173#ifdef SCTL_MASK32 1174 int tmpint; 1175#endif 1176 1177 /* 1178 * Attempt to get a coherent snapshot by making a copy of the data. 1179 */ 1180 if (arg1) 1181 tmplong = *(long *)arg1; 1182 else 1183 tmplong = arg2; 1184#ifdef SCTL_MASK32 1185 if (req->flags & SCTL_MASK32) { 1186 tmpint = tmplong; 1187 error = SYSCTL_OUT(req, &tmpint, sizeof(int)); 1188 } else 1189#endif 1190 error = SYSCTL_OUT(req, &tmplong, sizeof(long)); 1191 1192 if (error || !req->newptr) 1193 return (error); 1194 1195 if (!arg1) 1196 error = EPERM; 1197#ifdef SCTL_MASK32 1198 else if (req->flags & SCTL_MASK32) { 1199 error = SYSCTL_IN(req, &tmpint, sizeof(int)); 1200 *(long *)arg1 = (long)tmpint; 1201 } 1202#endif 1203 else 1204 error = SYSCTL_IN(req, arg1, sizeof(long)); 1205 return (error); 1206} 1207 1208/* 1209 * Handle a 64 bit int, signed or unsigned. 1210 * Two cases: 1211 * a variable: point arg1 at it. 1212 * a constant: pass it in arg2. 1213 */ 1214int 1215sysctl_handle_64(SYSCTL_HANDLER_ARGS) 1216{ 1217 int error = 0; 1218 uint64_t tmpout; 1219 1220 /* 1221 * Attempt to get a coherent snapshot by making a copy of the data. 1222 */ 1223 if (arg1) 1224 tmpout = *(uint64_t *)arg1; 1225 else 1226 tmpout = arg2; 1227 error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t)); 1228 1229 if (error || !req->newptr) 1230 return (error); 1231 1232 if (!arg1) 1233 error = EPERM; 1234 else 1235 error = SYSCTL_IN(req, arg1, sizeof(uint64_t)); 1236 return (error); 1237} 1238 1239/* 1240 * Handle our generic '\0' terminated 'C' string. 1241 * Two cases: 1242 * a variable string: point arg1 at it, arg2 is max length. 1243 * a constant string: point arg1 at it, arg2 is zero. 1244 */ 1245 1246int 1247sysctl_handle_string(SYSCTL_HANDLER_ARGS) 1248{ 1249 size_t outlen; 1250 int error = 0, ro_string = 0; 1251 1252 /* 1253 * A zero-length buffer indicates a fixed size read-only 1254 * string: 1255 */ 1256 if (arg2 == 0) { 1257 arg2 = strlen((char *)arg1) + 1; 1258 ro_string = 1; 1259 } 1260 1261 if (req->oldptr != NULL) { 1262 char *tmparg; 1263 1264 if (ro_string) { 1265 tmparg = arg1; 1266 } else { 1267 /* try to make a coherent snapshot of the string */ 1268 tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK); 1269 memcpy(tmparg, arg1, arg2); 1270 } 1271 1272 outlen = strnlen(tmparg, arg2 - 1) + 1; 1273 error = SYSCTL_OUT(req, tmparg, outlen); 1274 1275 if (!ro_string) 1276 free(tmparg, M_SYSCTLTMP); 1277 } else { 1278 outlen = strnlen((char *)arg1, arg2 - 1) + 1; 1279 error = SYSCTL_OUT(req, NULL, outlen); 1280 } 1281 if (error || !req->newptr) 1282 return (error); 1283 1284 if ((req->newlen - req->newidx) >= arg2) { 1285 error = EINVAL; 1286 } else { 1287 arg2 = (req->newlen - req->newidx); 1288 error = SYSCTL_IN(req, arg1, arg2); 1289 ((char *)arg1)[arg2] = '\0'; 1290 } 1291 return (error); 1292} 1293 1294/* 1295 * Handle any kind of opaque data. 1296 * arg1 points to it, arg2 is the size. 1297 */ 1298 1299int 1300sysctl_handle_opaque(SYSCTL_HANDLER_ARGS) 1301{ 1302 int error, tries; 1303 u_int generation; 1304 struct sysctl_req req2; 1305 1306 /* 1307 * Attempt to get a coherent snapshot, by using the thread 1308 * pre-emption counter updated from within mi_switch() to 1309 * determine if we were pre-empted during a bcopy() or 1310 * copyout(). Make 3 attempts at doing this before giving up. 1311 * If we encounter an error, stop immediately. 1312 */ 1313 tries = 0; 1314 req2 = *req; 1315retry: 1316 generation = curthread->td_generation; 1317 error = SYSCTL_OUT(req, arg1, arg2); 1318 if (error) 1319 return (error); 1320 tries++; 1321 if (generation != curthread->td_generation && tries < 3) { 1322 *req = req2; 1323 goto retry; 1324 } 1325 1326 error = SYSCTL_IN(req, arg1, arg2); 1327 1328 return (error); 1329} 1330 1331/* 1332 * Transfer functions to/from kernel space. 1333 * XXX: rather untested at this point 1334 */ 1335static int 1336sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l) 1337{ 1338 size_t i = 0; 1339 1340 if (req->oldptr) { 1341 i = l; 1342 if (req->oldlen <= req->oldidx) 1343 i = 0; 1344 else 1345 if (i > req->oldlen - req->oldidx) 1346 i = req->oldlen - req->oldidx; 1347 if (i > 0) 1348 bcopy(p, (char *)req->oldptr + req->oldidx, i); 1349 } 1350 req->oldidx += l; 1351 if (req->oldptr && i != l) 1352 return (ENOMEM); 1353 return (0); 1354} 1355 1356static int 1357sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l) 1358{ 1359 if (!req->newptr) 1360 return (0); 1361 if (req->newlen - req->newidx < l) 1362 return (EINVAL); 1363 bcopy((char *)req->newptr + req->newidx, p, l); 1364 req->newidx += l; 1365 return (0); 1366} 1367 1368int 1369kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old, 1370 size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags) 1371{ 1372 int error = 0; 1373 struct sysctl_req req; 1374 1375 bzero(&req, sizeof req); 1376 1377 req.td = td; 1378 req.flags = flags; 1379 1380 if (oldlenp) { 1381 req.oldlen = *oldlenp; 1382 } 1383 req.validlen = req.oldlen; 1384 1385 if (old) { 1386 req.oldptr= old; 1387 } 1388 1389 if (new != NULL) { 1390 req.newlen = newlen; 1391 req.newptr = new; 1392 } 1393 1394 req.oldfunc = sysctl_old_kernel; 1395 req.newfunc = sysctl_new_kernel; 1396 req.lock = REQ_UNWIRED; 1397 1398 SYSCTL_SLOCK(); 1399 error = sysctl_root(0, name, namelen, &req); 1400 SYSCTL_SUNLOCK(); 1401 1402 if (req.lock == REQ_WIRED && req.validlen > 0) 1403 vsunlock(req.oldptr, req.validlen); 1404 1405 if (error && error != ENOMEM) 1406 return (error); 1407 1408 if (retval) { 1409 if (req.oldptr && req.oldidx > req.validlen) 1410 *retval = req.validlen; 1411 else 1412 *retval = req.oldidx; 1413 } 1414 return (error); 1415} 1416 1417int 1418kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp, 1419 void *new, size_t newlen, size_t *retval, int flags) 1420{ 1421 int oid[CTL_MAXNAME]; 1422 size_t oidlen, plen; 1423 int error; 1424 1425 oid[0] = 0; /* sysctl internal magic */ 1426 oid[1] = 3; /* name2oid */ 1427 oidlen = sizeof(oid); 1428 1429 error = kernel_sysctl(td, oid, 2, oid, &oidlen, 1430 (void *)name, strlen(name), &plen, flags); 1431 if (error) 1432 return (error); 1433 1434 error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp, 1435 new, newlen, retval, flags); 1436 return (error); 1437} 1438 1439/* 1440 * Transfer function to/from user space. 1441 */ 1442static int 1443sysctl_old_user(struct sysctl_req *req, const void *p, size_t l) 1444{ 1445 size_t i, len, origidx; 1446 int error; 1447 1448 origidx = req->oldidx; 1449 req->oldidx += l; 1450 if (req->oldptr == NULL) 1451 return (0); 1452 /* 1453 * If we have not wired the user supplied buffer and we are currently 1454 * holding locks, drop a witness warning, as it's possible that 1455 * write operations to the user page can sleep. 1456 */ 1457 if (req->lock != REQ_WIRED) 1458 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1459 "sysctl_old_user()"); 1460 i = l; 1461 len = req->validlen; 1462 if (len <= origidx) 1463 i = 0; 1464 else { 1465 if (i > len - origidx) 1466 i = len - origidx; 1467 if (req->lock == REQ_WIRED) { 1468 error = copyout_nofault(p, (char *)req->oldptr + 1469 origidx, i); 1470 } else 1471 error = copyout(p, (char *)req->oldptr + origidx, i); 1472 if (error != 0) 1473 return (error); 1474 } 1475 if (i < l) 1476 return (ENOMEM); 1477 return (0); 1478} 1479 1480static int 1481sysctl_new_user(struct sysctl_req *req, void *p, size_t l) 1482{ 1483 int error; 1484 1485 if (!req->newptr) 1486 return (0); 1487 if (req->newlen - req->newidx < l) 1488 return (EINVAL); 1489 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1490 "sysctl_new_user()"); 1491 error = copyin((char *)req->newptr + req->newidx, p, l); 1492 req->newidx += l; 1493 return (error); 1494} 1495 1496/* 1497 * Wire the user space destination buffer. If set to a value greater than 1498 * zero, the len parameter limits the maximum amount of wired memory. 1499 */ 1500int 1501sysctl_wire_old_buffer(struct sysctl_req *req, size_t len) 1502{ 1503 int ret; 1504 size_t wiredlen; 1505 1506 wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen; 1507 ret = 0; 1508 if (req->lock != REQ_WIRED && req->oldptr && 1509 req->oldfunc == sysctl_old_user) { 1510 if (wiredlen != 0) { 1511 ret = vslock(req->oldptr, wiredlen); 1512 if (ret != 0) { 1513 if (ret != ENOMEM) 1514 return (ret); 1515 wiredlen = 0; 1516 } 1517 } 1518 req->lock = REQ_WIRED; 1519 req->validlen = wiredlen; 1520 } 1521 return (0); 1522} 1523 1524int 1525sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, 1526 int *nindx, struct sysctl_req *req) 1527{ 1528 struct sysctl_oid_list *lsp; 1529 struct sysctl_oid *oid; 1530 int indx; 1531 1532 SYSCTL_ASSERT_LOCKED(); 1533 lsp = &sysctl__children; 1534 indx = 0; 1535 while (indx < CTL_MAXNAME) { 1536 SLIST_FOREACH(oid, lsp, oid_link) { 1537 if (oid->oid_number == name[indx]) 1538 break; 1539 } 1540 if (oid == NULL) 1541 return (ENOENT); 1542 1543 indx++; 1544 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1545 if (oid->oid_handler != NULL || indx == namelen) { 1546 *noid = oid; 1547 if (nindx != NULL) 1548 *nindx = indx; 1549 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, 1550 ("%s found DYING node %p", __func__, oid)); 1551 return (0); 1552 } 1553 lsp = SYSCTL_CHILDREN(oid); 1554 } else if (indx == namelen) { 1555 *noid = oid; 1556 if (nindx != NULL) 1557 *nindx = indx; 1558 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, 1559 ("%s found DYING node %p", __func__, oid)); 1560 return (0); 1561 } else { 1562 return (ENOTDIR); 1563 } 1564 } 1565 return (ENOENT); 1566} 1567 1568/* 1569 * Traverse our tree, and find the right node, execute whatever it points 1570 * to, and return the resulting error code. 1571 */ 1572 1573static int 1574sysctl_root(SYSCTL_HANDLER_ARGS) 1575{ 1576 struct sysctl_oid *oid; 1577 int error, indx, lvl; 1578 1579 SYSCTL_ASSERT_SLOCKED(); 1580 1581 error = sysctl_find_oid(arg1, arg2, &oid, &indx, req); 1582 if (error) 1583 return (error); 1584 1585 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1586 /* 1587 * You can't call a sysctl when it's a node, but has 1588 * no handler. Inform the user that it's a node. 1589 * The indx may or may not be the same as namelen. 1590 */ 1591 if (oid->oid_handler == NULL) 1592 return (EISDIR); 1593 } 1594 1595 /* Is this sysctl writable? */ 1596 if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) 1597 return (EPERM); 1598 1599 KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL")); 1600 1601#ifdef CAPABILITY_MODE 1602 /* 1603 * If the process is in capability mode, then don't permit reading or 1604 * writing unless specifically granted for the node. 1605 */ 1606 if (IN_CAPABILITY_MODE(req->td)) { 1607 if (req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) 1608 return (EPERM); 1609 if (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR)) 1610 return (EPERM); 1611 } 1612#endif 1613 1614 /* Is this sysctl sensitive to securelevels? */ 1615 if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) { 1616 lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE; 1617 error = securelevel_gt(req->td->td_ucred, lvl); 1618 if (error) 1619 return (error); 1620 } 1621 1622 /* Is this sysctl writable by only privileged users? */ 1623 if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) { 1624 int priv; 1625 1626 if (oid->oid_kind & CTLFLAG_PRISON) 1627 priv = PRIV_SYSCTL_WRITEJAIL; 1628#ifdef VIMAGE 1629 else if ((oid->oid_kind & CTLFLAG_VNET) && 1630 prison_owns_vnet(req->td->td_ucred)) 1631 priv = PRIV_SYSCTL_WRITEJAIL; 1632#endif 1633 else 1634 priv = PRIV_SYSCTL_WRITE; 1635 error = priv_check(req->td, priv); 1636 if (error) 1637 return (error); 1638 } 1639 1640 if (!oid->oid_handler) 1641 return (EINVAL); 1642 1643 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1644 arg1 = (int *)arg1 + indx; 1645 arg2 -= indx; 1646 } else { 1647 arg1 = oid->oid_arg1; 1648 arg2 = oid->oid_arg2; 1649 } 1650#ifdef MAC 1651 error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2, 1652 req); 1653 if (error != 0) 1654 return (error); 1655#endif 1656#ifdef VIMAGE 1657 if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL) 1658 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 1659#endif 1660 error = sysctl_root_handler_locked(oid, arg1, arg2, req); 1661 1662 KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error); 1663 1664 return (error); 1665} 1666 1667#ifndef _SYS_SYSPROTO_H_ 1668struct sysctl_args { 1669 int *name; 1670 u_int namelen; 1671 void *old; 1672 size_t *oldlenp; 1673 void *new; 1674 size_t newlen; 1675}; 1676#endif 1677int 1678sys___sysctl(struct thread *td, struct sysctl_args *uap) 1679{ 1680 int error, i, name[CTL_MAXNAME]; 1681 size_t j; 1682 1683 if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) 1684 return (EINVAL); 1685 1686 error = copyin(uap->name, &name, uap->namelen * sizeof(int)); 1687 if (error) 1688 return (error); 1689 1690 error = userland_sysctl(td, name, uap->namelen, 1691 uap->old, uap->oldlenp, 0, 1692 uap->new, uap->newlen, &j, 0); 1693 if (error && error != ENOMEM) 1694 return (error); 1695 if (uap->oldlenp) { 1696 i = copyout(&j, uap->oldlenp, sizeof(j)); 1697 if (i) 1698 return (i); 1699 } 1700 return (error); 1701} 1702 1703/* 1704 * This is used from various compatibility syscalls too. That's why name 1705 * must be in kernel space. 1706 */ 1707int 1708userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, 1709 size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval, 1710 int flags) 1711{ 1712 int error = 0, memlocked; 1713 struct sysctl_req req; 1714 1715 bzero(&req, sizeof req); 1716 1717 req.td = td; 1718 req.flags = flags; 1719 1720 if (oldlenp) { 1721 if (inkernel) { 1722 req.oldlen = *oldlenp; 1723 } else { 1724 error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp)); 1725 if (error) 1726 return (error); 1727 } 1728 } 1729 req.validlen = req.oldlen; 1730 1731 if (old) { 1732 if (!useracc(old, req.oldlen, VM_PROT_WRITE)) 1733 return (EFAULT); 1734 req.oldptr= old; 1735 } 1736 1737 if (new != NULL) { 1738 if (!useracc(new, newlen, VM_PROT_READ)) 1739 return (EFAULT); 1740 req.newlen = newlen; 1741 req.newptr = new; 1742 } 1743 1744 req.oldfunc = sysctl_old_user; 1745 req.newfunc = sysctl_new_user; 1746 req.lock = REQ_UNWIRED; 1747 1748#ifdef KTRACE 1749 if (KTRPOINT(curthread, KTR_SYSCTL)) 1750 ktrsysctl(name, namelen); 1751#endif 1752 1753 if (req.oldlen > PAGE_SIZE) { 1754 memlocked = 1; 1755 sx_xlock(&sysctlmemlock); 1756 } else 1757 memlocked = 0; 1758 CURVNET_SET(TD_TO_VNET(td)); 1759 1760 for (;;) { 1761 req.oldidx = 0; 1762 req.newidx = 0; 1763 SYSCTL_SLOCK(); 1764 error = sysctl_root(0, name, namelen, &req); 1765 SYSCTL_SUNLOCK(); 1766 if (error != EAGAIN) 1767 break; 1768 kern_yield(PRI_USER); 1769 } 1770 1771 CURVNET_RESTORE(); 1772 1773 if (req.lock == REQ_WIRED && req.validlen > 0) 1774 vsunlock(req.oldptr, req.validlen); 1775 if (memlocked) 1776 sx_xunlock(&sysctlmemlock); 1777 1778 if (error && error != ENOMEM) 1779 return (error); 1780 1781 if (retval) { 1782 if (req.oldptr && req.oldidx > req.validlen) 1783 *retval = req.validlen; 1784 else 1785 *retval = req.oldidx; 1786 } 1787 return (error); 1788} 1789 1790/* 1791 * Drain into a sysctl struct. The user buffer should be wired if a page 1792 * fault would cause issue. 1793 */ 1794static int 1795sbuf_sysctl_drain(void *arg, const char *data, int len) 1796{ 1797 struct sysctl_req *req = arg; 1798 int error; 1799 1800 error = SYSCTL_OUT(req, data, len); 1801 KASSERT(error >= 0, ("Got unexpected negative value %d", error)); 1802 return (error == 0 ? len : -error); 1803} 1804 1805struct sbuf * 1806sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length, 1807 struct sysctl_req *req) 1808{ 1809 1810 s = sbuf_new(s, buf, length, SBUF_FIXEDLEN); 1811 sbuf_set_drain(s, sbuf_sysctl_drain, req); 1812 return (s); 1813} 1814