1#include <linux/workqueue.h> 2#include <linux/rtnetlink.h> 3#include <linux/cache.h> 4#include <linux/slab.h> 5#include <linux/list.h> 6#include <linux/delay.h> 7#include <linux/sched.h> 8#include <linux/idr.h> 9#include <linux/rculist.h> 10#include <linux/nsproxy.h> 11#include <net/net_namespace.h> 12#include <net/netns/generic.h> 13 14/* 15 * Our network namespace constructor/destructor lists 16 */ 17 18static LIST_HEAD(pernet_list); 19static struct list_head *first_device = &pernet_list; 20static DEFINE_MUTEX(net_mutex); 21 22LIST_HEAD(net_namespace_list); 23EXPORT_SYMBOL_GPL(net_namespace_list); 24 25struct net init_net; 26EXPORT_SYMBOL(init_net); 27 28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 29 30static void net_generic_release(struct rcu_head *rcu) 31{ 32 struct net_generic *ng; 33 34 ng = container_of(rcu, struct net_generic, rcu); 35 kfree(ng); 36} 37 38static int net_assign_generic(struct net *net, int id, void *data) 39{ 40 struct net_generic *ng, *old_ng; 41 42 BUG_ON(!mutex_is_locked(&net_mutex)); 43 BUG_ON(id == 0); 44 45 ng = old_ng = net->gen; 46 if (old_ng->len >= id) 47 goto assign; 48 49 ng = kzalloc(sizeof(struct net_generic) + 50 id * sizeof(void *), GFP_KERNEL); 51 if (ng == NULL) 52 return -ENOMEM; 53 54 /* 55 * Some synchronisation notes: 56 * 57 * The net_generic explores the net->gen array inside rcu 58 * read section. Besides once set the net->gen->ptr[x] 59 * pointer never changes (see rules in netns/generic.h). 60 * 61 * That said, we simply duplicate this array and schedule 62 * the old copy for kfree after a grace period. 63 */ 64 65 ng->len = id; 66 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 67 68 rcu_assign_pointer(net->gen, ng); 69 call_rcu(&old_ng->rcu, net_generic_release); 70assign: 71 ng->ptr[id - 1] = data; 72 return 0; 73} 74 75static int ops_init(const struct pernet_operations *ops, struct net *net) 76{ 77 int err; 78 if (ops->id && ops->size) { 79 void *data = kzalloc(ops->size, GFP_KERNEL); 80 if (!data) 81 return -ENOMEM; 82 83 err = net_assign_generic(net, *ops->id, data); 84 if (err) { 85 kfree(data); 86 return err; 87 } 88 } 89 if (ops->init) 90 return ops->init(net); 91 return 0; 92} 93 94static void ops_free(const struct pernet_operations *ops, struct net *net) 95{ 96 if (ops->id && ops->size) { 97 int id = *ops->id; 98 kfree(net_generic(net, id)); 99 } 100} 101 102static void ops_exit_list(const struct pernet_operations *ops, 103 struct list_head *net_exit_list) 104{ 105 struct net *net; 106 if (ops->exit) { 107 list_for_each_entry(net, net_exit_list, exit_list) 108 ops->exit(net); 109 } 110 if (ops->exit_batch) 111 ops->exit_batch(net_exit_list); 112} 113 114static void ops_free_list(const struct pernet_operations *ops, 115 struct list_head *net_exit_list) 116{ 117 struct net *net; 118 if (ops->size && ops->id) { 119 list_for_each_entry(net, net_exit_list, exit_list) 120 ops_free(ops, net); 121 } 122} 123 124/* 125 * setup_net runs the initializers for the network namespace object. 126 */ 127static __net_init int setup_net(struct net *net) 128{ 129 /* Must be called with net_mutex held */ 130 const struct pernet_operations *ops, *saved_ops; 131 int error = 0; 132 LIST_HEAD(net_exit_list); 133 134 atomic_set(&net->count, 1); 135 136#ifdef NETNS_REFCNT_DEBUG 137 atomic_set(&net->use_count, 0); 138#endif 139 140 list_for_each_entry(ops, &pernet_list, list) { 141 error = ops_init(ops, net); 142 if (error < 0) 143 goto out_undo; 144 } 145out: 146 return error; 147 148out_undo: 149 /* Walk through the list backwards calling the exit functions 150 * for the pernet modules whose init functions did not fail. 151 */ 152 list_add(&net->exit_list, &net_exit_list); 153 saved_ops = ops; 154 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 155 ops_exit_list(ops, &net_exit_list); 156 157 ops = saved_ops; 158 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 159 ops_free_list(ops, &net_exit_list); 160 161 rcu_barrier(); 162 goto out; 163} 164 165static struct net_generic *net_alloc_generic(void) 166{ 167 struct net_generic *ng; 168 size_t generic_size = sizeof(struct net_generic) + 169 INITIAL_NET_GEN_PTRS * sizeof(void *); 170 171 ng = kzalloc(generic_size, GFP_KERNEL); 172 if (ng) 173 ng->len = INITIAL_NET_GEN_PTRS; 174 175 return ng; 176} 177 178#ifdef CONFIG_NET_NS 179static struct kmem_cache *net_cachep; 180static struct workqueue_struct *netns_wq; 181 182static struct net *net_alloc(void) 183{ 184 struct net *net = NULL; 185 struct net_generic *ng; 186 187 ng = net_alloc_generic(); 188 if (!ng) 189 goto out; 190 191 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 192 if (!net) 193 goto out_free; 194 195 rcu_assign_pointer(net->gen, ng); 196out: 197 return net; 198 199out_free: 200 kfree(ng); 201 goto out; 202} 203 204static void net_free(struct net *net) 205{ 206#ifdef NETNS_REFCNT_DEBUG 207 if (unlikely(atomic_read(&net->use_count) != 0)) { 208 printk(KERN_EMERG "network namespace not free! Usage: %d\n", 209 atomic_read(&net->use_count)); 210 return; 211 } 212#endif 213 kfree(net->gen); 214 kmem_cache_free(net_cachep, net); 215} 216 217static struct net *net_create(void) 218{ 219 struct net *net; 220 int rv; 221 222 net = net_alloc(); 223 if (!net) 224 return ERR_PTR(-ENOMEM); 225 mutex_lock(&net_mutex); 226 rv = setup_net(net); 227 if (rv == 0) { 228 rtnl_lock(); 229 list_add_tail_rcu(&net->list, &net_namespace_list); 230 rtnl_unlock(); 231 } 232 mutex_unlock(&net_mutex); 233 if (rv < 0) { 234 net_free(net); 235 return ERR_PTR(rv); 236 } 237 return net; 238} 239 240struct net *copy_net_ns(unsigned long flags, struct net *old_net) 241{ 242 if (!(flags & CLONE_NEWNET)) 243 return get_net(old_net); 244 return net_create(); 245} 246 247static DEFINE_SPINLOCK(cleanup_list_lock); 248static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 249 250static void cleanup_net(struct work_struct *work) 251{ 252 const struct pernet_operations *ops; 253 struct net *net, *tmp; 254 LIST_HEAD(net_kill_list); 255 LIST_HEAD(net_exit_list); 256 257 /* Atomically snapshot the list of namespaces to cleanup */ 258 spin_lock_irq(&cleanup_list_lock); 259 list_replace_init(&cleanup_list, &net_kill_list); 260 spin_unlock_irq(&cleanup_list_lock); 261 262 mutex_lock(&net_mutex); 263 264 /* Don't let anyone else find us. */ 265 rtnl_lock(); 266 list_for_each_entry(net, &net_kill_list, cleanup_list) { 267 list_del_rcu(&net->list); 268 list_add_tail(&net->exit_list, &net_exit_list); 269 } 270 rtnl_unlock(); 271 272 /* 273 * Another CPU might be rcu-iterating the list, wait for it. 274 * This needs to be before calling the exit() notifiers, so 275 * the rcu_barrier() below isn't sufficient alone. 276 */ 277 synchronize_rcu(); 278 279 /* Run all of the network namespace exit methods */ 280 list_for_each_entry_reverse(ops, &pernet_list, list) 281 ops_exit_list(ops, &net_exit_list); 282 283 /* Free the net generic variables */ 284 list_for_each_entry_reverse(ops, &pernet_list, list) 285 ops_free_list(ops, &net_exit_list); 286 287 mutex_unlock(&net_mutex); 288 289 /* Ensure there are no outstanding rcu callbacks using this 290 * network namespace. 291 */ 292 rcu_barrier(); 293 294 /* Finally it is safe to free my network namespace structure */ 295 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 296 list_del_init(&net->exit_list); 297 net_free(net); 298 } 299} 300static DECLARE_WORK(net_cleanup_work, cleanup_net); 301 302void __put_net(struct net *net) 303{ 304 /* Cleanup the network namespace in process context */ 305 unsigned long flags; 306 307 spin_lock_irqsave(&cleanup_list_lock, flags); 308 list_add(&net->cleanup_list, &cleanup_list); 309 spin_unlock_irqrestore(&cleanup_list_lock, flags); 310 311 queue_work(netns_wq, &net_cleanup_work); 312} 313EXPORT_SYMBOL_GPL(__put_net); 314 315#else 316struct net *copy_net_ns(unsigned long flags, struct net *old_net) 317{ 318 if (flags & CLONE_NEWNET) 319 return ERR_PTR(-EINVAL); 320 return old_net; 321} 322#endif 323 324struct net *get_net_ns_by_pid(pid_t pid) 325{ 326 struct task_struct *tsk; 327 struct net *net; 328 329 /* Lookup the network namespace */ 330 net = ERR_PTR(-ESRCH); 331 rcu_read_lock(); 332 tsk = find_task_by_vpid(pid); 333 if (tsk) { 334 struct nsproxy *nsproxy; 335 nsproxy = task_nsproxy(tsk); 336 if (nsproxy) 337 net = get_net(nsproxy->net_ns); 338 } 339 rcu_read_unlock(); 340 return net; 341} 342EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 343 344static int __init net_ns_init(void) 345{ 346 struct net_generic *ng; 347 348#ifdef CONFIG_NET_NS 349 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 350 SMP_CACHE_BYTES, 351 SLAB_PANIC, NULL); 352 353 /* Create workqueue for cleanup */ 354 netns_wq = create_singlethread_workqueue("netns"); 355 if (!netns_wq) 356 panic("Could not create netns workq"); 357#endif 358 359 ng = net_alloc_generic(); 360 if (!ng) 361 panic("Could not allocate generic netns"); 362 363 rcu_assign_pointer(init_net.gen, ng); 364 365 mutex_lock(&net_mutex); 366 if (setup_net(&init_net)) 367 panic("Could not setup the initial network namespace"); 368 369 rtnl_lock(); 370 list_add_tail_rcu(&init_net.list, &net_namespace_list); 371 rtnl_unlock(); 372 373 mutex_unlock(&net_mutex); 374 375 return 0; 376} 377 378pure_initcall(net_ns_init); 379 380#ifdef CONFIG_NET_NS 381static int __register_pernet_operations(struct list_head *list, 382 struct pernet_operations *ops) 383{ 384 struct net *net; 385 int error; 386 LIST_HEAD(net_exit_list); 387 388 list_add_tail(&ops->list, list); 389 if (ops->init || (ops->id && ops->size)) { 390 for_each_net(net) { 391 error = ops_init(ops, net); 392 if (error) 393 goto out_undo; 394 list_add_tail(&net->exit_list, &net_exit_list); 395 } 396 } 397 return 0; 398 399out_undo: 400 /* If I have an error cleanup all namespaces I initialized */ 401 list_del(&ops->list); 402 ops_exit_list(ops, &net_exit_list); 403 ops_free_list(ops, &net_exit_list); 404 return error; 405} 406 407static void __unregister_pernet_operations(struct pernet_operations *ops) 408{ 409 struct net *net; 410 LIST_HEAD(net_exit_list); 411 412 list_del(&ops->list); 413 for_each_net(net) 414 list_add_tail(&net->exit_list, &net_exit_list); 415 ops_exit_list(ops, &net_exit_list); 416 ops_free_list(ops, &net_exit_list); 417} 418 419#else 420 421static int __register_pernet_operations(struct list_head *list, 422 struct pernet_operations *ops) 423{ 424 int err = 0; 425 err = ops_init(ops, &init_net); 426 if (err) 427 ops_free(ops, &init_net); 428 return err; 429 430} 431 432static void __unregister_pernet_operations(struct pernet_operations *ops) 433{ 434 LIST_HEAD(net_exit_list); 435 list_add(&init_net.exit_list, &net_exit_list); 436 ops_exit_list(ops, &net_exit_list); 437 ops_free_list(ops, &net_exit_list); 438} 439 440#endif /* CONFIG_NET_NS */ 441 442static DEFINE_IDA(net_generic_ids); 443 444static int register_pernet_operations(struct list_head *list, 445 struct pernet_operations *ops) 446{ 447 int error; 448 449 if (ops->id) { 450again: 451 error = ida_get_new_above(&net_generic_ids, 1, ops->id); 452 if (error < 0) { 453 if (error == -EAGAIN) { 454 ida_pre_get(&net_generic_ids, GFP_KERNEL); 455 goto again; 456 } 457 return error; 458 } 459 } 460 error = __register_pernet_operations(list, ops); 461 if (error) { 462 rcu_barrier(); 463 if (ops->id) 464 ida_remove(&net_generic_ids, *ops->id); 465 } 466 467 return error; 468} 469 470static void unregister_pernet_operations(struct pernet_operations *ops) 471{ 472 473 __unregister_pernet_operations(ops); 474 rcu_barrier(); 475 if (ops->id) 476 ida_remove(&net_generic_ids, *ops->id); 477} 478 479/** 480 * register_pernet_subsys - register a network namespace subsystem 481 * @ops: pernet operations structure for the subsystem 482 * 483 * Register a subsystem which has init and exit functions 484 * that are called when network namespaces are created and 485 * destroyed respectively. 486 * 487 * When registered all network namespace init functions are 488 * called for every existing network namespace. Allowing kernel 489 * modules to have a race free view of the set of network namespaces. 490 * 491 * When a new network namespace is created all of the init 492 * methods are called in the order in which they were registered. 493 * 494 * When a network namespace is destroyed all of the exit methods 495 * are called in the reverse of the order with which they were 496 * registered. 497 */ 498int register_pernet_subsys(struct pernet_operations *ops) 499{ 500 int error; 501 mutex_lock(&net_mutex); 502 error = register_pernet_operations(first_device, ops); 503 mutex_unlock(&net_mutex); 504 return error; 505} 506EXPORT_SYMBOL_GPL(register_pernet_subsys); 507 508/** 509 * unregister_pernet_subsys - unregister a network namespace subsystem 510 * @ops: pernet operations structure to manipulate 511 * 512 * Remove the pernet operations structure from the list to be 513 * used when network namespaces are created or destroyed. In 514 * addition run the exit method for all existing network 515 * namespaces. 516 */ 517void unregister_pernet_subsys(struct pernet_operations *ops) 518{ 519 mutex_lock(&net_mutex); 520 unregister_pernet_operations(ops); 521 mutex_unlock(&net_mutex); 522} 523EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 524 525/** 526 * register_pernet_device - register a network namespace device 527 * @ops: pernet operations structure for the subsystem 528 * 529 * Register a device which has init and exit functions 530 * that are called when network namespaces are created and 531 * destroyed respectively. 532 * 533 * When registered all network namespace init functions are 534 * called for every existing network namespace. Allowing kernel 535 * modules to have a race free view of the set of network namespaces. 536 * 537 * When a new network namespace is created all of the init 538 * methods are called in the order in which they were registered. 539 * 540 * When a network namespace is destroyed all of the exit methods 541 * are called in the reverse of the order with which they were 542 * registered. 543 */ 544int register_pernet_device(struct pernet_operations *ops) 545{ 546 int error; 547 mutex_lock(&net_mutex); 548 error = register_pernet_operations(&pernet_list, ops); 549 if (!error && (first_device == &pernet_list)) 550 first_device = &ops->list; 551 mutex_unlock(&net_mutex); 552 return error; 553} 554EXPORT_SYMBOL_GPL(register_pernet_device); 555 556/** 557 * unregister_pernet_device - unregister a network namespace netdevice 558 * @ops: pernet operations structure to manipulate 559 * 560 * Remove the pernet operations structure from the list to be 561 * used when network namespaces are created or destroyed. In 562 * addition run the exit method for all existing network 563 * namespaces. 564 */ 565void unregister_pernet_device(struct pernet_operations *ops) 566{ 567 mutex_lock(&net_mutex); 568 if (&ops->list == first_device) 569 first_device = first_device->next; 570 unregister_pernet_operations(ops); 571 mutex_unlock(&net_mutex); 572} 573EXPORT_SYMBOL_GPL(unregister_pernet_device); 574