181 182/* 183 * Global lists of subsystem constructor and destructors for vnets. They are 184 * registered via VNET_SYSINIT() and VNET_SYSUNINIT(). Both lists are 185 * protected by the vnet_sysinit_sxlock global lock. 186 */ 187static TAILQ_HEAD(vnet_sysinit_head, vnet_sysinit) vnet_constructors = 188 TAILQ_HEAD_INITIALIZER(vnet_constructors); 189static TAILQ_HEAD(vnet_sysuninit_head, vnet_sysinit) vnet_destructors = 190 TAILQ_HEAD_INITIALIZER(vnet_destructors); 191 192struct sx vnet_sysinit_sxlock; 193 194#define VNET_SYSINIT_WLOCK() sx_xlock(&vnet_sysinit_sxlock); 195#define VNET_SYSINIT_WUNLOCK() sx_xunlock(&vnet_sysinit_sxlock); 196#define VNET_SYSINIT_RLOCK() sx_slock(&vnet_sysinit_sxlock); 197#define VNET_SYSINIT_RUNLOCK() sx_sunlock(&vnet_sysinit_sxlock); 198 199struct vnet_data_free { 200 uintptr_t vnd_start; 201 int vnd_len; 202 TAILQ_ENTRY(vnet_data_free) vnd_link; 203}; 204 205MALLOC_DEFINE(M_VNET_DATA_FREE, "vnet_data_free", "VNET resource accounting"); 206static TAILQ_HEAD(, vnet_data_free) vnet_data_free_head = 207 TAILQ_HEAD_INITIALIZER(vnet_data_free_head); 208static struct sx vnet_data_free_lock; 209 210SDT_PROVIDER_DEFINE(vnet); 211SDT_PROBE_DEFINE1(vnet, functions, vnet_alloc, entry, entry, "int"); 212SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, alloc, alloc, "int", 213 "struct vnet *"); 214SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, return, return, 215 "int", "struct vnet *"); 216SDT_PROBE_DEFINE2(vnet, functions, vnet_destroy, entry, entry, 217 "int", "struct vnet *"); 218SDT_PROBE_DEFINE1(vnet, functions, vnet_destroy, return, entry, 219 "int"); 220 221#ifdef DDB 222static void db_show_vnet_print_vs(struct vnet_sysinit *, int); 223#endif 224 225/* 226 * Allocate a virtual network stack. 227 */ 228struct vnet * 229vnet_alloc(void) 230{ 231 struct vnet *vnet; 232 233 SDT_PROBE1(vnet, functions, vnet_alloc, entry, __LINE__); 234 vnet = malloc(sizeof(struct vnet), M_VNET, M_WAITOK | M_ZERO); 235 vnet->vnet_magic_n = VNET_MAGIC_N; 236 SDT_PROBE2(vnet, functions, vnet_alloc, alloc, __LINE__, vnet); 237 238 /* 239 * Allocate storage for virtualized global variables and copy in 240 * initial values form our 'master' copy. 241 */ 242 vnet->vnet_data_mem = malloc(VNET_SIZE, M_VNET_DATA, M_WAITOK); 243 memcpy(vnet->vnet_data_mem, (void *)VNET_START, VNET_BYTES); 244 245 /* 246 * All use of vnet-specific data will immediately subtract VNET_START 247 * from the base memory pointer, so pre-calculate that now to avoid 248 * it on each use. 249 */ 250 vnet->vnet_data_base = (uintptr_t)vnet->vnet_data_mem - VNET_START; 251 252 /* Initialize / attach vnet module instances. */ 253 CURVNET_SET_QUIET(vnet); 254 vnet_sysinit(); 255 CURVNET_RESTORE(); 256 257 VNET_LIST_WLOCK(); 258 LIST_INSERT_HEAD(&vnet_head, vnet, vnet_le); 259 VNET_LIST_WUNLOCK(); 260 261 SDT_PROBE2(vnet, functions, vnet_alloc, return, __LINE__, vnet); 262 return (vnet); 263} 264 265/* 266 * Destroy a virtual network stack. 267 */ 268void 269vnet_destroy(struct vnet *vnet) 270{ 271 struct ifnet *ifp, *nifp; 272 273 SDT_PROBE2(vnet, functions, vnet_destroy, entry, __LINE__, vnet); 274 KASSERT(vnet->vnet_sockcnt == 0, 275 ("%s: vnet still has sockets", __func__)); 276 277 VNET_LIST_WLOCK(); 278 LIST_REMOVE(vnet, vnet_le); 279 VNET_LIST_WUNLOCK(); 280 281 CURVNET_SET_QUIET(vnet); 282 283 /* Return all inherited interfaces to their parent vnets. */ 284 TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { 285 if (ifp->if_home_vnet != ifp->if_vnet) 286 if_vmove(ifp, ifp->if_home_vnet); 287 } 288 289 vnet_sysuninit(); 290 CURVNET_RESTORE(); 291 292 /* 293 * Release storage for the virtual network stack instance. 294 */ 295 free(vnet->vnet_data_mem, M_VNET_DATA); 296 vnet->vnet_data_mem = NULL; 297 vnet->vnet_data_base = 0; 298 vnet->vnet_magic_n = 0xdeadbeef; 299 free(vnet, M_VNET); 300 SDT_PROBE1(vnet, functions, vnet_destroy, return, __LINE__); 301} 302 303/* 304 * Boot time initialization and allocation of virtual network stacks. 305 */ 306static void 307vnet_init_prelink(void *arg) 308{ 309 310 rw_init(&vnet_rwlock, "vnet_rwlock"); 311 sx_init(&vnet_sxlock, "vnet_sxlock"); 312 sx_init(&vnet_sysinit_sxlock, "vnet_sysinit_sxlock"); 313 LIST_INIT(&vnet_head); 314} 315SYSINIT(vnet_init_prelink, SI_SUB_VNET_PRELINK, SI_ORDER_FIRST, 316 vnet_init_prelink, NULL); 317 318static void 319vnet0_init(void *arg) 320{ 321 322 /* Warn people before take off - in case we crash early. */ 323 printf("WARNING: VIMAGE (virtualized network stack) is a highly " 324 "experimental feature.\n"); 325 326 /* 327 * We MUST clear curvnet in vi_init_done() before going SMP, 328 * otherwise CURVNET_SET() macros would scream about unnecessary 329 * curvnet recursions. 330 */ 331 curvnet = prison0.pr_vnet = vnet0 = vnet_alloc(); 332} 333SYSINIT(vnet0_init, SI_SUB_VNET, SI_ORDER_FIRST, vnet0_init, NULL); 334 335static void 336vnet_init_done(void *unused) 337{ 338 339 curvnet = NULL; 340} 341 342SYSINIT(vnet_init_done, SI_SUB_VNET_DONE, SI_ORDER_FIRST, vnet_init_done, 343 NULL); 344 345/* 346 * Once on boot, initialize the modspace freelist to entirely cover modspace. 347 */ 348static void 349vnet_data_startup(void *dummy __unused) 350{ 351 struct vnet_data_free *df; 352 353 df = malloc(sizeof(*df), M_VNET_DATA_FREE, M_WAITOK | M_ZERO); 354 df->vnd_start = (uintptr_t)&VNET_NAME(modspace); 355 df->vnd_len = VNET_MODMIN; 356 TAILQ_INSERT_HEAD(&vnet_data_free_head, df, vnd_link); 357 sx_init(&vnet_data_free_lock, "vnet_data alloc lock"); 358} 359SYSINIT(vnet_data, SI_SUB_KLD, SI_ORDER_FIRST, vnet_data_startup, 0); 360 361/* 362 * When a module is loaded and requires storage for a virtualized global 363 * variable, allocate space from the modspace free list. This interface 364 * should be used only by the kernel linker. 365 */ 366void * 367vnet_data_alloc(int size) 368{ 369 struct vnet_data_free *df; 370 void *s; 371 372 s = NULL; 373 size = roundup2(size, sizeof(void *)); 374 sx_xlock(&vnet_data_free_lock); 375 TAILQ_FOREACH(df, &vnet_data_free_head, vnd_link) { 376 if (df->vnd_len < size) 377 continue; 378 if (df->vnd_len == size) { 379 s = (void *)df->vnd_start; 380 TAILQ_REMOVE(&vnet_data_free_head, df, vnd_link); 381 free(df, M_VNET_DATA_FREE); 382 break; 383 } 384 s = (void *)df->vnd_start; 385 df->vnd_len -= size; 386 df->vnd_start = df->vnd_start + size; 387 break; 388 } 389 sx_xunlock(&vnet_data_free_lock); 390 391 return (s); 392} 393 394/* 395 * Free space for a virtualized global variable on module unload. 396 */ 397void 398vnet_data_free(void *start_arg, int size) 399{ 400 struct vnet_data_free *df; 401 struct vnet_data_free *dn; 402 uintptr_t start; 403 uintptr_t end; 404 405 size = roundup2(size, sizeof(void *)); 406 start = (uintptr_t)start_arg; 407 end = start + size; 408 /* 409 * Free a region of space and merge it with as many neighbors as 410 * possible. Keeping the list sorted simplifies this operation. 411 */ 412 sx_xlock(&vnet_data_free_lock); 413 TAILQ_FOREACH(df, &vnet_data_free_head, vnd_link) { 414 if (df->vnd_start > end) 415 break; 416 /* 417 * If we expand at the end of an entry we may have to merge 418 * it with the one following it as well. 419 */ 420 if (df->vnd_start + df->vnd_len == start) { 421 df->vnd_len += size; 422 dn = TAILQ_NEXT(df, vnd_link); 423 if (df->vnd_start + df->vnd_len == dn->vnd_start) { 424 df->vnd_len += dn->vnd_len; 425 TAILQ_REMOVE(&vnet_data_free_head, dn, 426 vnd_link); 427 free(dn, M_VNET_DATA_FREE); 428 } 429 sx_xunlock(&vnet_data_free_lock); 430 return; 431 } 432 if (df->vnd_start == end) { 433 df->vnd_start = start; 434 df->vnd_len += size; 435 sx_xunlock(&vnet_data_free_lock); 436 return; 437 } 438 } 439 dn = malloc(sizeof(*df), M_VNET_DATA_FREE, M_WAITOK | M_ZERO); 440 dn->vnd_start = start; 441 dn->vnd_len = size; 442 if (df) 443 TAILQ_INSERT_BEFORE(df, dn, vnd_link); 444 else 445 TAILQ_INSERT_TAIL(&vnet_data_free_head, dn, vnd_link); 446 sx_xunlock(&vnet_data_free_lock); 447} 448 449/* 450 * When a new virtualized global variable has been allocated, propagate its 451 * initial value to each already-allocated virtual network stack instance. 452 */ 453void 454vnet_data_copy(void *start, int size) 455{ 456 struct vnet *vnet; 457 458 VNET_LIST_RLOCK(); 459 LIST_FOREACH(vnet, &vnet_head, vnet_le) 460 memcpy((void *)((uintptr_t)vnet->vnet_data_base + 461 (uintptr_t)start), start, size); 462 VNET_LIST_RUNLOCK(); 463} 464 465/* 466 * Variants on sysctl_handle_foo that know how to handle virtualized global 467 * variables: if 'arg1' is a pointer, then we transform it to the local vnet 468 * offset. 469 */ 470int 471vnet_sysctl_handle_int(SYSCTL_HANDLER_ARGS) 472{ 473 474 if (arg1 != NULL) 475 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 476 return (sysctl_handle_int(oidp, arg1, arg2, req)); 477} 478 479int 480vnet_sysctl_handle_opaque(SYSCTL_HANDLER_ARGS) 481{ 482 483 if (arg1 != NULL) 484 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 485 return (sysctl_handle_opaque(oidp, arg1, arg2, req)); 486} 487 488int 489vnet_sysctl_handle_string(SYSCTL_HANDLER_ARGS) 490{ 491 492 if (arg1 != NULL) 493 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 494 return (sysctl_handle_string(oidp, arg1, arg2, req)); 495} 496 497int 498vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS) 499{ 500 501 if (arg1 != NULL) 502 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 503 return (sysctl_handle_int(oidp, arg1, arg2, req)); 504} 505 506/* 507 * Support for special SYSINIT handlers registered via VNET_SYSINIT() 508 * and VNET_SYSUNINIT(). 509 */ 510void 511vnet_register_sysinit(void *arg) 512{ 513 struct vnet_sysinit *vs, *vs2; 514 struct vnet *vnet; 515 516 vs = arg; 517 KASSERT(vs->subsystem > SI_SUB_VNET, ("vnet sysinit too early")); 518 519 /* Add the constructor to the global list of vnet constructors. */ 520 VNET_SYSINIT_WLOCK(); 521 TAILQ_FOREACH(vs2, &vnet_constructors, link) { 522 if (vs2->subsystem > vs->subsystem) 523 break; 524 if (vs2->subsystem == vs->subsystem && vs2->order > vs->order) 525 break; 526 } 527 if (vs2 != NULL) 528 TAILQ_INSERT_BEFORE(vs2, vs, link); 529 else 530 TAILQ_INSERT_TAIL(&vnet_constructors, vs, link); 531 532 /* 533 * Invoke the constructor on all the existing vnets when it is 534 * registered. 535 */ 536 VNET_FOREACH(vnet) { 537 CURVNET_SET_QUIET(vnet); 538 vs->func(vs->arg); 539 CURVNET_RESTORE(); 540 } 541 VNET_SYSINIT_WUNLOCK(); 542} 543 544void 545vnet_deregister_sysinit(void *arg) 546{ 547 struct vnet_sysinit *vs; 548 549 vs = arg; 550 551 /* Remove the constructor from the global list of vnet constructors. */ 552 VNET_SYSINIT_WLOCK(); 553 TAILQ_REMOVE(&vnet_constructors, vs, link); 554 VNET_SYSINIT_WUNLOCK(); 555} 556 557void 558vnet_register_sysuninit(void *arg) 559{ 560 struct vnet_sysinit *vs, *vs2; 561 562 vs = arg; 563 564 /* Add the destructor to the global list of vnet destructors. */ 565 VNET_SYSINIT_WLOCK(); 566 TAILQ_FOREACH(vs2, &vnet_destructors, link) { 567 if (vs2->subsystem > vs->subsystem) 568 break; 569 if (vs2->subsystem == vs->subsystem && vs2->order > vs->order) 570 break; 571 } 572 if (vs2 != NULL) 573 TAILQ_INSERT_BEFORE(vs2, vs, link); 574 else 575 TAILQ_INSERT_TAIL(&vnet_destructors, vs, link); 576 VNET_SYSINIT_WUNLOCK(); 577} 578 579void 580vnet_deregister_sysuninit(void *arg) 581{ 582 struct vnet_sysinit *vs; 583 struct vnet *vnet; 584 585 vs = arg; 586 587 /* 588 * Invoke the destructor on all the existing vnets when it is 589 * deregistered. 590 */ 591 VNET_SYSINIT_WLOCK(); 592 VNET_FOREACH(vnet) { 593 CURVNET_SET_QUIET(vnet); 594 vs->func(vs->arg); 595 CURVNET_RESTORE(); 596 } 597 598 /* Remove the destructor from the global list of vnet destructors. */ 599 TAILQ_REMOVE(&vnet_destructors, vs, link); 600 VNET_SYSINIT_WUNLOCK(); 601} 602 603/* 604 * Invoke all registered vnet constructors on the current vnet. Used during 605 * vnet construction. The caller is responsible for ensuring the new vnet is 606 * the current vnet and that the vnet_sysinit_sxlock lock is locked. 607 */ 608void 609vnet_sysinit(void) 610{ 611 struct vnet_sysinit *vs; 612 613 VNET_SYSINIT_RLOCK(); 614 TAILQ_FOREACH(vs, &vnet_constructors, link) { 615 vs->func(vs->arg); 616 } 617 VNET_SYSINIT_RUNLOCK(); 618} 619 620/* 621 * Invoke all registered vnet destructors on the current vnet. Used during 622 * vnet destruction. The caller is responsible for ensuring the dying vnet 623 * the current vnet and that the vnet_sysinit_sxlock lock is locked. 624 */ 625void 626vnet_sysuninit(void) 627{ 628 struct vnet_sysinit *vs; 629 630 VNET_SYSINIT_RLOCK(); 631 TAILQ_FOREACH_REVERSE(vs, &vnet_destructors, vnet_sysuninit_head, 632 link) { 633 vs->func(vs->arg); 634 } 635 VNET_SYSINIT_RUNLOCK(); 636} 637 638/* 639 * EVENTHANDLER(9) extensions. 640 */ 641/* 642 * Invoke the eventhandler function originally registered with the possibly 643 * registered argument for all virtual network stack instances. 644 * 645 * This iterator can only be used for eventhandlers that do not take any 646 * additional arguments, as we do ignore the variadic arguments from the 647 * EVENTHANDLER_INVOKE() call. 648 */ 649void 650vnet_global_eventhandler_iterator_func(void *arg, ...) 651{ 652 VNET_ITERATOR_DECL(vnet_iter); 653 struct eventhandler_entry_vimage *v_ee; 654 655 /* 656 * There is a bug here in that we should actually cast things to 657 * (struct eventhandler_entry_ ## name *) but that's not easily 658 * possible in here so just re-using the variadic version we 659 * defined for the generic vimage case. 660 */ 661 v_ee = arg; 662 VNET_LIST_RLOCK(); 663 VNET_FOREACH(vnet_iter) { 664 CURVNET_SET(vnet_iter); 665 ((vimage_iterator_func_t)v_ee->func)(v_ee->ee_arg); 666 CURVNET_RESTORE(); 667 } 668 VNET_LIST_RUNLOCK(); 669} 670 671#ifdef VNET_DEBUG 672struct vnet_recursion { 673 SLIST_ENTRY(vnet_recursion) vnr_le; 674 const char *prev_fn; 675 const char *where_fn; 676 int where_line; 677 struct vnet *old_vnet; 678 struct vnet *new_vnet; 679}; 680 681static SLIST_HEAD(, vnet_recursion) vnet_recursions = 682 SLIST_HEAD_INITIALIZER(vnet_recursions); 683 684static void 685vnet_print_recursion(struct vnet_recursion *vnr, int brief) 686{ 687 688 if (!brief) 689 printf("CURVNET_SET() recursion in "); 690 printf("%s() line %d, prev in %s()", vnr->where_fn, vnr->where_line, 691 vnr->prev_fn); 692 if (brief) 693 printf(", "); 694 else 695 printf("\n "); 696 printf("%p -> %p\n", vnr->old_vnet, vnr->new_vnet); 697} 698 699void 700vnet_log_recursion(struct vnet *old_vnet, const char *old_fn, int line) 701{ 702 struct vnet_recursion *vnr; 703 704 /* Skip already logged recursion events. */ 705 SLIST_FOREACH(vnr, &vnet_recursions, vnr_le) 706 if (vnr->prev_fn == old_fn && 707 vnr->where_fn == curthread->td_vnet_lpush && 708 vnr->where_line == line && 709 (vnr->old_vnet == vnr->new_vnet) == (curvnet == old_vnet)) 710 return; 711 712 vnr = malloc(sizeof(*vnr), M_VNET, M_NOWAIT | M_ZERO); 713 if (vnr == NULL) 714 panic("%s: malloc failed", __func__); 715 vnr->prev_fn = old_fn; 716 vnr->where_fn = curthread->td_vnet_lpush; 717 vnr->where_line = line; 718 vnr->old_vnet = old_vnet; 719 vnr->new_vnet = curvnet; 720 721 SLIST_INSERT_HEAD(&vnet_recursions, vnr, vnr_le); 722 723 vnet_print_recursion(vnr, 0); 724#ifdef KDB 725 kdb_backtrace(); 726#endif 727} 728#endif /* VNET_DEBUG */ 729 730/* 731 * DDB(4). 732 */ 733#ifdef DDB 734DB_SHOW_COMMAND(vnets, db_show_vnets) 735{ 736 VNET_ITERATOR_DECL(vnet_iter); 737 738 VNET_FOREACH(vnet_iter) { 739 db_printf("vnet = %p\n", vnet_iter); 740 db_printf(" vnet_magic_n = 0x%x (%s, orig 0x%x)\n", 741 vnet_iter->vnet_magic_n, 742 (vnet_iter->vnet_magic_n == VNET_MAGIC_N) ? 743 "ok" : "mismatch", VNET_MAGIC_N); 744 db_printf(" vnet_ifcnt = %u\n", vnet_iter->vnet_ifcnt); 745 db_printf(" vnet_sockcnt = %u\n", vnet_iter->vnet_sockcnt); 746 db_printf(" vnet_data_mem = %p\n", vnet_iter->vnet_data_mem); 747 db_printf(" vnet_data_base = 0x%jx\n", 748 (uintmax_t)vnet_iter->vnet_data_base); 749 db_printf("\n"); 750 if (db_pager_quit) 751 break; 752 } 753} 754 755static void 756db_show_vnet_print_vs(struct vnet_sysinit *vs, int ddb) 757{ 758 const char *vsname, *funcname; 759 c_db_sym_t sym; 760 db_expr_t offset; 761 762#define xprint(...) \ 763 if (ddb) \ 764 db_printf(__VA_ARGS__); \ 765 else \ 766 printf(__VA_ARGS__) 767 768 if (vs == NULL) { 769 xprint("%s: no vnet_sysinit * given\n", __func__); 770 return; 771 } 772 773 sym = db_search_symbol((vm_offset_t)vs, DB_STGY_ANY, &offset); 774 db_symbol_values(sym, &vsname, NULL); 775 sym = db_search_symbol((vm_offset_t)vs->func, DB_STGY_PROC, &offset); 776 db_symbol_values(sym, &funcname, NULL); 777 xprint("%s(%p)\n", (vsname != NULL) ? vsname : "", vs); 778 xprint(" 0x%08x 0x%08x\n", vs->subsystem, vs->order); 779 xprint(" %p(%s)(%p)\n", 780 vs->func, (funcname != NULL) ? funcname : "", vs->arg); 781#undef xprint 782} 783 784DB_SHOW_COMMAND(vnet_sysinit, db_show_vnet_sysinit) 785{ 786 struct vnet_sysinit *vs; 787 788 db_printf("VNET_SYSINIT vs Name(Ptr)\n"); 789 db_printf(" Subsystem Order\n"); 790 db_printf(" Function(Name)(Arg)\n"); 791 TAILQ_FOREACH(vs, &vnet_constructors, link) { 792 db_show_vnet_print_vs(vs, 1); 793 if (db_pager_quit) 794 break; 795 } 796} 797 798DB_SHOW_COMMAND(vnet_sysuninit, db_show_vnet_sysuninit) 799{ 800 struct vnet_sysinit *vs; 801 802 db_printf("VNET_SYSUNINIT vs Name(Ptr)\n"); 803 db_printf(" Subsystem Order\n"); 804 db_printf(" Function(Name)(Arg)\n"); 805 TAILQ_FOREACH_REVERSE(vs, &vnet_destructors, vnet_sysuninit_head, 806 link) { 807 db_show_vnet_print_vs(vs, 1); 808 if (db_pager_quit) 809 break; 810 } 811} 812 813#ifdef VNET_DEBUG 814DB_SHOW_COMMAND(vnetrcrs, db_show_vnetrcrs) 815{ 816 struct vnet_recursion *vnr; 817 818 SLIST_FOREACH(vnr, &vnet_recursions, vnr_le) 819 vnet_print_recursion(vnr, 1); 820} 821#endif 822#endif /* DDB */
| 181 182/* 183 * Global lists of subsystem constructor and destructors for vnets. They are 184 * registered via VNET_SYSINIT() and VNET_SYSUNINIT(). Both lists are 185 * protected by the vnet_sysinit_sxlock global lock. 186 */ 187static TAILQ_HEAD(vnet_sysinit_head, vnet_sysinit) vnet_constructors = 188 TAILQ_HEAD_INITIALIZER(vnet_constructors); 189static TAILQ_HEAD(vnet_sysuninit_head, vnet_sysinit) vnet_destructors = 190 TAILQ_HEAD_INITIALIZER(vnet_destructors); 191 192struct sx vnet_sysinit_sxlock; 193 194#define VNET_SYSINIT_WLOCK() sx_xlock(&vnet_sysinit_sxlock); 195#define VNET_SYSINIT_WUNLOCK() sx_xunlock(&vnet_sysinit_sxlock); 196#define VNET_SYSINIT_RLOCK() sx_slock(&vnet_sysinit_sxlock); 197#define VNET_SYSINIT_RUNLOCK() sx_sunlock(&vnet_sysinit_sxlock); 198 199struct vnet_data_free { 200 uintptr_t vnd_start; 201 int vnd_len; 202 TAILQ_ENTRY(vnet_data_free) vnd_link; 203}; 204 205MALLOC_DEFINE(M_VNET_DATA_FREE, "vnet_data_free", "VNET resource accounting"); 206static TAILQ_HEAD(, vnet_data_free) vnet_data_free_head = 207 TAILQ_HEAD_INITIALIZER(vnet_data_free_head); 208static struct sx vnet_data_free_lock; 209 210SDT_PROVIDER_DEFINE(vnet); 211SDT_PROBE_DEFINE1(vnet, functions, vnet_alloc, entry, entry, "int"); 212SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, alloc, alloc, "int", 213 "struct vnet *"); 214SDT_PROBE_DEFINE2(vnet, functions, vnet_alloc, return, return, 215 "int", "struct vnet *"); 216SDT_PROBE_DEFINE2(vnet, functions, vnet_destroy, entry, entry, 217 "int", "struct vnet *"); 218SDT_PROBE_DEFINE1(vnet, functions, vnet_destroy, return, entry, 219 "int"); 220 221#ifdef DDB 222static void db_show_vnet_print_vs(struct vnet_sysinit *, int); 223#endif 224 225/* 226 * Allocate a virtual network stack. 227 */ 228struct vnet * 229vnet_alloc(void) 230{ 231 struct vnet *vnet; 232 233 SDT_PROBE1(vnet, functions, vnet_alloc, entry, __LINE__); 234 vnet = malloc(sizeof(struct vnet), M_VNET, M_WAITOK | M_ZERO); 235 vnet->vnet_magic_n = VNET_MAGIC_N; 236 SDT_PROBE2(vnet, functions, vnet_alloc, alloc, __LINE__, vnet); 237 238 /* 239 * Allocate storage for virtualized global variables and copy in 240 * initial values form our 'master' copy. 241 */ 242 vnet->vnet_data_mem = malloc(VNET_SIZE, M_VNET_DATA, M_WAITOK); 243 memcpy(vnet->vnet_data_mem, (void *)VNET_START, VNET_BYTES); 244 245 /* 246 * All use of vnet-specific data will immediately subtract VNET_START 247 * from the base memory pointer, so pre-calculate that now to avoid 248 * it on each use. 249 */ 250 vnet->vnet_data_base = (uintptr_t)vnet->vnet_data_mem - VNET_START; 251 252 /* Initialize / attach vnet module instances. */ 253 CURVNET_SET_QUIET(vnet); 254 vnet_sysinit(); 255 CURVNET_RESTORE(); 256 257 VNET_LIST_WLOCK(); 258 LIST_INSERT_HEAD(&vnet_head, vnet, vnet_le); 259 VNET_LIST_WUNLOCK(); 260 261 SDT_PROBE2(vnet, functions, vnet_alloc, return, __LINE__, vnet); 262 return (vnet); 263} 264 265/* 266 * Destroy a virtual network stack. 267 */ 268void 269vnet_destroy(struct vnet *vnet) 270{ 271 struct ifnet *ifp, *nifp; 272 273 SDT_PROBE2(vnet, functions, vnet_destroy, entry, __LINE__, vnet); 274 KASSERT(vnet->vnet_sockcnt == 0, 275 ("%s: vnet still has sockets", __func__)); 276 277 VNET_LIST_WLOCK(); 278 LIST_REMOVE(vnet, vnet_le); 279 VNET_LIST_WUNLOCK(); 280 281 CURVNET_SET_QUIET(vnet); 282 283 /* Return all inherited interfaces to their parent vnets. */ 284 TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { 285 if (ifp->if_home_vnet != ifp->if_vnet) 286 if_vmove(ifp, ifp->if_home_vnet); 287 } 288 289 vnet_sysuninit(); 290 CURVNET_RESTORE(); 291 292 /* 293 * Release storage for the virtual network stack instance. 294 */ 295 free(vnet->vnet_data_mem, M_VNET_DATA); 296 vnet->vnet_data_mem = NULL; 297 vnet->vnet_data_base = 0; 298 vnet->vnet_magic_n = 0xdeadbeef; 299 free(vnet, M_VNET); 300 SDT_PROBE1(vnet, functions, vnet_destroy, return, __LINE__); 301} 302 303/* 304 * Boot time initialization and allocation of virtual network stacks. 305 */ 306static void 307vnet_init_prelink(void *arg) 308{ 309 310 rw_init(&vnet_rwlock, "vnet_rwlock"); 311 sx_init(&vnet_sxlock, "vnet_sxlock"); 312 sx_init(&vnet_sysinit_sxlock, "vnet_sysinit_sxlock"); 313 LIST_INIT(&vnet_head); 314} 315SYSINIT(vnet_init_prelink, SI_SUB_VNET_PRELINK, SI_ORDER_FIRST, 316 vnet_init_prelink, NULL); 317 318static void 319vnet0_init(void *arg) 320{ 321 322 /* Warn people before take off - in case we crash early. */ 323 printf("WARNING: VIMAGE (virtualized network stack) is a highly " 324 "experimental feature.\n"); 325 326 /* 327 * We MUST clear curvnet in vi_init_done() before going SMP, 328 * otherwise CURVNET_SET() macros would scream about unnecessary 329 * curvnet recursions. 330 */ 331 curvnet = prison0.pr_vnet = vnet0 = vnet_alloc(); 332} 333SYSINIT(vnet0_init, SI_SUB_VNET, SI_ORDER_FIRST, vnet0_init, NULL); 334 335static void 336vnet_init_done(void *unused) 337{ 338 339 curvnet = NULL; 340} 341 342SYSINIT(vnet_init_done, SI_SUB_VNET_DONE, SI_ORDER_FIRST, vnet_init_done, 343 NULL); 344 345/* 346 * Once on boot, initialize the modspace freelist to entirely cover modspace. 347 */ 348static void 349vnet_data_startup(void *dummy __unused) 350{ 351 struct vnet_data_free *df; 352 353 df = malloc(sizeof(*df), M_VNET_DATA_FREE, M_WAITOK | M_ZERO); 354 df->vnd_start = (uintptr_t)&VNET_NAME(modspace); 355 df->vnd_len = VNET_MODMIN; 356 TAILQ_INSERT_HEAD(&vnet_data_free_head, df, vnd_link); 357 sx_init(&vnet_data_free_lock, "vnet_data alloc lock"); 358} 359SYSINIT(vnet_data, SI_SUB_KLD, SI_ORDER_FIRST, vnet_data_startup, 0); 360 361/* 362 * When a module is loaded and requires storage for a virtualized global 363 * variable, allocate space from the modspace free list. This interface 364 * should be used only by the kernel linker. 365 */ 366void * 367vnet_data_alloc(int size) 368{ 369 struct vnet_data_free *df; 370 void *s; 371 372 s = NULL; 373 size = roundup2(size, sizeof(void *)); 374 sx_xlock(&vnet_data_free_lock); 375 TAILQ_FOREACH(df, &vnet_data_free_head, vnd_link) { 376 if (df->vnd_len < size) 377 continue; 378 if (df->vnd_len == size) { 379 s = (void *)df->vnd_start; 380 TAILQ_REMOVE(&vnet_data_free_head, df, vnd_link); 381 free(df, M_VNET_DATA_FREE); 382 break; 383 } 384 s = (void *)df->vnd_start; 385 df->vnd_len -= size; 386 df->vnd_start = df->vnd_start + size; 387 break; 388 } 389 sx_xunlock(&vnet_data_free_lock); 390 391 return (s); 392} 393 394/* 395 * Free space for a virtualized global variable on module unload. 396 */ 397void 398vnet_data_free(void *start_arg, int size) 399{ 400 struct vnet_data_free *df; 401 struct vnet_data_free *dn; 402 uintptr_t start; 403 uintptr_t end; 404 405 size = roundup2(size, sizeof(void *)); 406 start = (uintptr_t)start_arg; 407 end = start + size; 408 /* 409 * Free a region of space and merge it with as many neighbors as 410 * possible. Keeping the list sorted simplifies this operation. 411 */ 412 sx_xlock(&vnet_data_free_lock); 413 TAILQ_FOREACH(df, &vnet_data_free_head, vnd_link) { 414 if (df->vnd_start > end) 415 break; 416 /* 417 * If we expand at the end of an entry we may have to merge 418 * it with the one following it as well. 419 */ 420 if (df->vnd_start + df->vnd_len == start) { 421 df->vnd_len += size; 422 dn = TAILQ_NEXT(df, vnd_link); 423 if (df->vnd_start + df->vnd_len == dn->vnd_start) { 424 df->vnd_len += dn->vnd_len; 425 TAILQ_REMOVE(&vnet_data_free_head, dn, 426 vnd_link); 427 free(dn, M_VNET_DATA_FREE); 428 } 429 sx_xunlock(&vnet_data_free_lock); 430 return; 431 } 432 if (df->vnd_start == end) { 433 df->vnd_start = start; 434 df->vnd_len += size; 435 sx_xunlock(&vnet_data_free_lock); 436 return; 437 } 438 } 439 dn = malloc(sizeof(*df), M_VNET_DATA_FREE, M_WAITOK | M_ZERO); 440 dn->vnd_start = start; 441 dn->vnd_len = size; 442 if (df) 443 TAILQ_INSERT_BEFORE(df, dn, vnd_link); 444 else 445 TAILQ_INSERT_TAIL(&vnet_data_free_head, dn, vnd_link); 446 sx_xunlock(&vnet_data_free_lock); 447} 448 449/* 450 * When a new virtualized global variable has been allocated, propagate its 451 * initial value to each already-allocated virtual network stack instance. 452 */ 453void 454vnet_data_copy(void *start, int size) 455{ 456 struct vnet *vnet; 457 458 VNET_LIST_RLOCK(); 459 LIST_FOREACH(vnet, &vnet_head, vnet_le) 460 memcpy((void *)((uintptr_t)vnet->vnet_data_base + 461 (uintptr_t)start), start, size); 462 VNET_LIST_RUNLOCK(); 463} 464 465/* 466 * Variants on sysctl_handle_foo that know how to handle virtualized global 467 * variables: if 'arg1' is a pointer, then we transform it to the local vnet 468 * offset. 469 */ 470int 471vnet_sysctl_handle_int(SYSCTL_HANDLER_ARGS) 472{ 473 474 if (arg1 != NULL) 475 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 476 return (sysctl_handle_int(oidp, arg1, arg2, req)); 477} 478 479int 480vnet_sysctl_handle_opaque(SYSCTL_HANDLER_ARGS) 481{ 482 483 if (arg1 != NULL) 484 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 485 return (sysctl_handle_opaque(oidp, arg1, arg2, req)); 486} 487 488int 489vnet_sysctl_handle_string(SYSCTL_HANDLER_ARGS) 490{ 491 492 if (arg1 != NULL) 493 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 494 return (sysctl_handle_string(oidp, arg1, arg2, req)); 495} 496 497int 498vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS) 499{ 500 501 if (arg1 != NULL) 502 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 503 return (sysctl_handle_int(oidp, arg1, arg2, req)); 504} 505 506/* 507 * Support for special SYSINIT handlers registered via VNET_SYSINIT() 508 * and VNET_SYSUNINIT(). 509 */ 510void 511vnet_register_sysinit(void *arg) 512{ 513 struct vnet_sysinit *vs, *vs2; 514 struct vnet *vnet; 515 516 vs = arg; 517 KASSERT(vs->subsystem > SI_SUB_VNET, ("vnet sysinit too early")); 518 519 /* Add the constructor to the global list of vnet constructors. */ 520 VNET_SYSINIT_WLOCK(); 521 TAILQ_FOREACH(vs2, &vnet_constructors, link) { 522 if (vs2->subsystem > vs->subsystem) 523 break; 524 if (vs2->subsystem == vs->subsystem && vs2->order > vs->order) 525 break; 526 } 527 if (vs2 != NULL) 528 TAILQ_INSERT_BEFORE(vs2, vs, link); 529 else 530 TAILQ_INSERT_TAIL(&vnet_constructors, vs, link); 531 532 /* 533 * Invoke the constructor on all the existing vnets when it is 534 * registered. 535 */ 536 VNET_FOREACH(vnet) { 537 CURVNET_SET_QUIET(vnet); 538 vs->func(vs->arg); 539 CURVNET_RESTORE(); 540 } 541 VNET_SYSINIT_WUNLOCK(); 542} 543 544void 545vnet_deregister_sysinit(void *arg) 546{ 547 struct vnet_sysinit *vs; 548 549 vs = arg; 550 551 /* Remove the constructor from the global list of vnet constructors. */ 552 VNET_SYSINIT_WLOCK(); 553 TAILQ_REMOVE(&vnet_constructors, vs, link); 554 VNET_SYSINIT_WUNLOCK(); 555} 556 557void 558vnet_register_sysuninit(void *arg) 559{ 560 struct vnet_sysinit *vs, *vs2; 561 562 vs = arg; 563 564 /* Add the destructor to the global list of vnet destructors. */ 565 VNET_SYSINIT_WLOCK(); 566 TAILQ_FOREACH(vs2, &vnet_destructors, link) { 567 if (vs2->subsystem > vs->subsystem) 568 break; 569 if (vs2->subsystem == vs->subsystem && vs2->order > vs->order) 570 break; 571 } 572 if (vs2 != NULL) 573 TAILQ_INSERT_BEFORE(vs2, vs, link); 574 else 575 TAILQ_INSERT_TAIL(&vnet_destructors, vs, link); 576 VNET_SYSINIT_WUNLOCK(); 577} 578 579void 580vnet_deregister_sysuninit(void *arg) 581{ 582 struct vnet_sysinit *vs; 583 struct vnet *vnet; 584 585 vs = arg; 586 587 /* 588 * Invoke the destructor on all the existing vnets when it is 589 * deregistered. 590 */ 591 VNET_SYSINIT_WLOCK(); 592 VNET_FOREACH(vnet) { 593 CURVNET_SET_QUIET(vnet); 594 vs->func(vs->arg); 595 CURVNET_RESTORE(); 596 } 597 598 /* Remove the destructor from the global list of vnet destructors. */ 599 TAILQ_REMOVE(&vnet_destructors, vs, link); 600 VNET_SYSINIT_WUNLOCK(); 601} 602 603/* 604 * Invoke all registered vnet constructors on the current vnet. Used during 605 * vnet construction. The caller is responsible for ensuring the new vnet is 606 * the current vnet and that the vnet_sysinit_sxlock lock is locked. 607 */ 608void 609vnet_sysinit(void) 610{ 611 struct vnet_sysinit *vs; 612 613 VNET_SYSINIT_RLOCK(); 614 TAILQ_FOREACH(vs, &vnet_constructors, link) { 615 vs->func(vs->arg); 616 } 617 VNET_SYSINIT_RUNLOCK(); 618} 619 620/* 621 * Invoke all registered vnet destructors on the current vnet. Used during 622 * vnet destruction. The caller is responsible for ensuring the dying vnet 623 * the current vnet and that the vnet_sysinit_sxlock lock is locked. 624 */ 625void 626vnet_sysuninit(void) 627{ 628 struct vnet_sysinit *vs; 629 630 VNET_SYSINIT_RLOCK(); 631 TAILQ_FOREACH_REVERSE(vs, &vnet_destructors, vnet_sysuninit_head, 632 link) { 633 vs->func(vs->arg); 634 } 635 VNET_SYSINIT_RUNLOCK(); 636} 637 638/* 639 * EVENTHANDLER(9) extensions. 640 */ 641/* 642 * Invoke the eventhandler function originally registered with the possibly 643 * registered argument for all virtual network stack instances. 644 * 645 * This iterator can only be used for eventhandlers that do not take any 646 * additional arguments, as we do ignore the variadic arguments from the 647 * EVENTHANDLER_INVOKE() call. 648 */ 649void 650vnet_global_eventhandler_iterator_func(void *arg, ...) 651{ 652 VNET_ITERATOR_DECL(vnet_iter); 653 struct eventhandler_entry_vimage *v_ee; 654 655 /* 656 * There is a bug here in that we should actually cast things to 657 * (struct eventhandler_entry_ ## name *) but that's not easily 658 * possible in here so just re-using the variadic version we 659 * defined for the generic vimage case. 660 */ 661 v_ee = arg; 662 VNET_LIST_RLOCK(); 663 VNET_FOREACH(vnet_iter) { 664 CURVNET_SET(vnet_iter); 665 ((vimage_iterator_func_t)v_ee->func)(v_ee->ee_arg); 666 CURVNET_RESTORE(); 667 } 668 VNET_LIST_RUNLOCK(); 669} 670 671#ifdef VNET_DEBUG 672struct vnet_recursion { 673 SLIST_ENTRY(vnet_recursion) vnr_le; 674 const char *prev_fn; 675 const char *where_fn; 676 int where_line; 677 struct vnet *old_vnet; 678 struct vnet *new_vnet; 679}; 680 681static SLIST_HEAD(, vnet_recursion) vnet_recursions = 682 SLIST_HEAD_INITIALIZER(vnet_recursions); 683 684static void 685vnet_print_recursion(struct vnet_recursion *vnr, int brief) 686{ 687 688 if (!brief) 689 printf("CURVNET_SET() recursion in "); 690 printf("%s() line %d, prev in %s()", vnr->where_fn, vnr->where_line, 691 vnr->prev_fn); 692 if (brief) 693 printf(", "); 694 else 695 printf("\n "); 696 printf("%p -> %p\n", vnr->old_vnet, vnr->new_vnet); 697} 698 699void 700vnet_log_recursion(struct vnet *old_vnet, const char *old_fn, int line) 701{ 702 struct vnet_recursion *vnr; 703 704 /* Skip already logged recursion events. */ 705 SLIST_FOREACH(vnr, &vnet_recursions, vnr_le) 706 if (vnr->prev_fn == old_fn && 707 vnr->where_fn == curthread->td_vnet_lpush && 708 vnr->where_line == line && 709 (vnr->old_vnet == vnr->new_vnet) == (curvnet == old_vnet)) 710 return; 711 712 vnr = malloc(sizeof(*vnr), M_VNET, M_NOWAIT | M_ZERO); 713 if (vnr == NULL) 714 panic("%s: malloc failed", __func__); 715 vnr->prev_fn = old_fn; 716 vnr->where_fn = curthread->td_vnet_lpush; 717 vnr->where_line = line; 718 vnr->old_vnet = old_vnet; 719 vnr->new_vnet = curvnet; 720 721 SLIST_INSERT_HEAD(&vnet_recursions, vnr, vnr_le); 722 723 vnet_print_recursion(vnr, 0); 724#ifdef KDB 725 kdb_backtrace(); 726#endif 727} 728#endif /* VNET_DEBUG */ 729 730/* 731 * DDB(4). 732 */ 733#ifdef DDB 734DB_SHOW_COMMAND(vnets, db_show_vnets) 735{ 736 VNET_ITERATOR_DECL(vnet_iter); 737 738 VNET_FOREACH(vnet_iter) { 739 db_printf("vnet = %p\n", vnet_iter); 740 db_printf(" vnet_magic_n = 0x%x (%s, orig 0x%x)\n", 741 vnet_iter->vnet_magic_n, 742 (vnet_iter->vnet_magic_n == VNET_MAGIC_N) ? 743 "ok" : "mismatch", VNET_MAGIC_N); 744 db_printf(" vnet_ifcnt = %u\n", vnet_iter->vnet_ifcnt); 745 db_printf(" vnet_sockcnt = %u\n", vnet_iter->vnet_sockcnt); 746 db_printf(" vnet_data_mem = %p\n", vnet_iter->vnet_data_mem); 747 db_printf(" vnet_data_base = 0x%jx\n", 748 (uintmax_t)vnet_iter->vnet_data_base); 749 db_printf("\n"); 750 if (db_pager_quit) 751 break; 752 } 753} 754 755static void 756db_show_vnet_print_vs(struct vnet_sysinit *vs, int ddb) 757{ 758 const char *vsname, *funcname; 759 c_db_sym_t sym; 760 db_expr_t offset; 761 762#define xprint(...) \ 763 if (ddb) \ 764 db_printf(__VA_ARGS__); \ 765 else \ 766 printf(__VA_ARGS__) 767 768 if (vs == NULL) { 769 xprint("%s: no vnet_sysinit * given\n", __func__); 770 return; 771 } 772 773 sym = db_search_symbol((vm_offset_t)vs, DB_STGY_ANY, &offset); 774 db_symbol_values(sym, &vsname, NULL); 775 sym = db_search_symbol((vm_offset_t)vs->func, DB_STGY_PROC, &offset); 776 db_symbol_values(sym, &funcname, NULL); 777 xprint("%s(%p)\n", (vsname != NULL) ? vsname : "", vs); 778 xprint(" 0x%08x 0x%08x\n", vs->subsystem, vs->order); 779 xprint(" %p(%s)(%p)\n", 780 vs->func, (funcname != NULL) ? funcname : "", vs->arg); 781#undef xprint 782} 783 784DB_SHOW_COMMAND(vnet_sysinit, db_show_vnet_sysinit) 785{ 786 struct vnet_sysinit *vs; 787 788 db_printf("VNET_SYSINIT vs Name(Ptr)\n"); 789 db_printf(" Subsystem Order\n"); 790 db_printf(" Function(Name)(Arg)\n"); 791 TAILQ_FOREACH(vs, &vnet_constructors, link) { 792 db_show_vnet_print_vs(vs, 1); 793 if (db_pager_quit) 794 break; 795 } 796} 797 798DB_SHOW_COMMAND(vnet_sysuninit, db_show_vnet_sysuninit) 799{ 800 struct vnet_sysinit *vs; 801 802 db_printf("VNET_SYSUNINIT vs Name(Ptr)\n"); 803 db_printf(" Subsystem Order\n"); 804 db_printf(" Function(Name)(Arg)\n"); 805 TAILQ_FOREACH_REVERSE(vs, &vnet_destructors, vnet_sysuninit_head, 806 link) { 807 db_show_vnet_print_vs(vs, 1); 808 if (db_pager_quit) 809 break; 810 } 811} 812 813#ifdef VNET_DEBUG 814DB_SHOW_COMMAND(vnetrcrs, db_show_vnetrcrs) 815{ 816 struct vnet_recursion *vnr; 817 818 SLIST_FOREACH(vnr, &vnet_recursions, vnr_le) 819 vnet_print_recursion(vnr, 1); 820} 821#endif 822#endif /* DDB */
|