1// SPDX-License-Identifier: GPL-2.0-only
2#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3
4#include <linux/workqueue.h>
5#include <linux/rtnetlink.h>
6#include <linux/cache.h>
7#include <linux/slab.h>
8#include <linux/list.h>
9#include <linux/delay.h>
10#include <linux/sched.h>
11#include <linux/idr.h>
12#include <linux/rculist.h>
13#include <linux/nsproxy.h>
14#include <linux/fs.h>
15#include <linux/proc_ns.h>
16#include <linux/file.h>
17#include <linux/export.h>
18#include <linux/user_namespace.h>
19#include <linux/net_namespace.h>
20#include <linux/sched/task.h>
21#include <linux/uidgid.h>
22#include <linux/cookie.h>
23#include <linux/proc_fs.h>
24
25#include <net/sock.h>
26#include <net/netlink.h>
27#include <net/net_namespace.h>
28#include <net/netns/generic.h>
29
30/*
31 *	Our network namespace constructor/destructor lists
32 */
33
34static LIST_HEAD(pernet_list);
35static struct list_head *first_device = &pernet_list;
36
37LIST_HEAD(net_namespace_list);
38EXPORT_SYMBOL_GPL(net_namespace_list);
39
40/* Protects net_namespace_list. Nests iside rtnl_lock() */
41DECLARE_RWSEM(net_rwsem);
42EXPORT_SYMBOL_GPL(net_rwsem);
43
44#ifdef CONFIG_KEYS
45static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
46#endif
47
48struct net init_net;
49EXPORT_SYMBOL(init_net);
50
51static bool init_net_initialized;
52/*
53 * pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
54 * init_net_initialized and first_device pointer.
55 * This is internal net namespace object. Please, don't use it
56 * outside.
57 */
58DECLARE_RWSEM(pernet_ops_rwsem);
59EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
60
61#define MIN_PERNET_OPS_ID	\
62	((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
63
64#define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
65
66static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
67
68DEFINE_COOKIE(net_cookie);
69
70static struct net_generic *net_alloc_generic(void)
71{
72	struct net_generic *ng;
73	unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
74
75	ng = kzalloc(generic_size, GFP_KERNEL);
76	if (ng)
77		ng->s.len = max_gen_ptrs;
78
79	return ng;
80}
81
82static int net_assign_generic(struct net *net, unsigned int id, void *data)
83{
84	struct net_generic *ng, *old_ng;
85
86	BUG_ON(id < MIN_PERNET_OPS_ID);
87
88	old_ng = rcu_dereference_protected(net->gen,
89					   lockdep_is_held(&pernet_ops_rwsem));
90	if (old_ng->s.len > id) {
91		old_ng->ptr[id] = data;
92		return 0;
93	}
94
95	ng = net_alloc_generic();
96	if (!ng)
97		return -ENOMEM;
98
99	/*
100	 * Some synchronisation notes:
101	 *
102	 * The net_generic explores the net->gen array inside rcu
103	 * read section. Besides once set the net->gen->ptr[x]
104	 * pointer never changes (see rules in netns/generic.h).
105	 *
106	 * That said, we simply duplicate this array and schedule
107	 * the old copy for kfree after a grace period.
108	 */
109
110	memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
111	       (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
112	ng->ptr[id] = data;
113
114	rcu_assign_pointer(net->gen, ng);
115	kfree_rcu(old_ng, s.rcu);
116	return 0;
117}
118
119static int ops_init(const struct pernet_operations *ops, struct net *net)
120{
121	struct net_generic *ng;
122	int err = -ENOMEM;
123	void *data = NULL;
124
125	if (ops->id && ops->size) {
126		data = kzalloc(ops->size, GFP_KERNEL);
127		if (!data)
128			goto out;
129
130		err = net_assign_generic(net, *ops->id, data);
131		if (err)
132			goto cleanup;
133	}
134	err = 0;
135	if (ops->init)
136		err = ops->init(net);
137	if (!err)
138		return 0;
139
140	if (ops->id && ops->size) {
141		ng = rcu_dereference_protected(net->gen,
142					       lockdep_is_held(&pernet_ops_rwsem));
143		ng->ptr[*ops->id] = NULL;
144	}
145
146cleanup:
147	kfree(data);
148
149out:
150	return err;
151}
152
153static void ops_pre_exit_list(const struct pernet_operations *ops,
154			      struct list_head *net_exit_list)
155{
156	struct net *net;
157
158	if (ops->pre_exit) {
159		list_for_each_entry(net, net_exit_list, exit_list)
160			ops->pre_exit(net);
161	}
162}
163
164static void ops_exit_list(const struct pernet_operations *ops,
165			  struct list_head *net_exit_list)
166{
167	struct net *net;
168	if (ops->exit) {
169		list_for_each_entry(net, net_exit_list, exit_list) {
170			ops->exit(net);
171			cond_resched();
172		}
173	}
174	if (ops->exit_batch)
175		ops->exit_batch(net_exit_list);
176}
177
178static void ops_free_list(const struct pernet_operations *ops,
179			  struct list_head *net_exit_list)
180{
181	struct net *net;
182	if (ops->size && ops->id) {
183		list_for_each_entry(net, net_exit_list, exit_list)
184			kfree(net_generic(net, *ops->id));
185	}
186}
187
188/* should be called with nsid_lock held */
189static int alloc_netid(struct net *net, struct net *peer, int reqid)
190{
191	int min = 0, max = 0;
192
193	if (reqid >= 0) {
194		min = reqid;
195		max = reqid + 1;
196	}
197
198	return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
199}
200
201/* This function is used by idr_for_each(). If net is equal to peer, the
202 * function returns the id so that idr_for_each() stops. Because we cannot
203 * returns the id 0 (idr_for_each() will not stop), we return the magic value
204 * NET_ID_ZERO (-1) for it.
205 */
206#define NET_ID_ZERO -1
207static int net_eq_idr(int id, void *net, void *peer)
208{
209	if (net_eq(net, peer))
210		return id ? : NET_ID_ZERO;
211	return 0;
212}
213
214/* Must be called from RCU-critical section or with nsid_lock held */
215static int __peernet2id(const struct net *net, struct net *peer)
216{
217	int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
218
219	/* Magic value for id 0. */
220	if (id == NET_ID_ZERO)
221		return 0;
222	if (id > 0)
223		return id;
224
225	return NETNSA_NSID_NOT_ASSIGNED;
226}
227
228static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
229			      struct nlmsghdr *nlh, gfp_t gfp);
230/* This function returns the id of a peer netns. If no id is assigned, one will
231 * be allocated and returned.
232 */
233int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
234{
235	int id;
236
237	if (refcount_read(&net->ns.count) == 0)
238		return NETNSA_NSID_NOT_ASSIGNED;
239
240	spin_lock_bh(&net->nsid_lock);
241	id = __peernet2id(net, peer);
242	if (id >= 0) {
243		spin_unlock_bh(&net->nsid_lock);
244		return id;
245	}
246
247	/* When peer is obtained from RCU lists, we may race with
248	 * its cleanup. Check whether it's alive, and this guarantees
249	 * we never hash a peer back to net->netns_ids, after it has
250	 * just been idr_remove()'d from there in cleanup_net().
251	 */
252	if (!maybe_get_net(peer)) {
253		spin_unlock_bh(&net->nsid_lock);
254		return NETNSA_NSID_NOT_ASSIGNED;
255	}
256
257	id = alloc_netid(net, peer, -1);
258	spin_unlock_bh(&net->nsid_lock);
259
260	put_net(peer);
261	if (id < 0)
262		return NETNSA_NSID_NOT_ASSIGNED;
263
264	rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp);
265
266	return id;
267}
268EXPORT_SYMBOL_GPL(peernet2id_alloc);
269
270/* This function returns, if assigned, the id of a peer netns. */
271int peernet2id(const struct net *net, struct net *peer)
272{
273	int id;
274
275	rcu_read_lock();
276	id = __peernet2id(net, peer);
277	rcu_read_unlock();
278
279	return id;
280}
281EXPORT_SYMBOL(peernet2id);
282
283/* This function returns true is the peer netns has an id assigned into the
284 * current netns.
285 */
286bool peernet_has_id(const struct net *net, struct net *peer)
287{
288	return peernet2id(net, peer) >= 0;
289}
290
291struct net *get_net_ns_by_id(const struct net *net, int id)
292{
293	struct net *peer;
294
295	if (id < 0)
296		return NULL;
297
298	rcu_read_lock();
299	peer = idr_find(&net->netns_ids, id);
300	if (peer)
301		peer = maybe_get_net(peer);
302	rcu_read_unlock();
303
304	return peer;
305}
306EXPORT_SYMBOL_GPL(get_net_ns_by_id);
307
308/* init code that must occur even if setup_net() is not called. */
309static __net_init void preinit_net(struct net *net)
310{
311	ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt");
312}
313
314/*
315 * setup_net runs the initializers for the network namespace object.
316 */
317static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
318{
319	/* Must be called with pernet_ops_rwsem held */
320	const struct pernet_operations *ops, *saved_ops;
321	LIST_HEAD(net_exit_list);
322	LIST_HEAD(dev_kill_list);
323	int error = 0;
324
325	refcount_set(&net->ns.count, 1);
326	ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
327
328	refcount_set(&net->passive, 1);
329	get_random_bytes(&net->hash_mix, sizeof(u32));
330	preempt_disable();
331	net->net_cookie = gen_cookie_next(&net_cookie);
332	preempt_enable();
333	net->dev_base_seq = 1;
334	net->user_ns = user_ns;
335	idr_init(&net->netns_ids);
336	spin_lock_init(&net->nsid_lock);
337	mutex_init(&net->ipv4.ra_mutex);
338
339	list_for_each_entry(ops, &pernet_list, list) {
340		error = ops_init(ops, net);
341		if (error < 0)
342			goto out_undo;
343	}
344	down_write(&net_rwsem);
345	list_add_tail_rcu(&net->list, &net_namespace_list);
346	up_write(&net_rwsem);
347out:
348	return error;
349
350out_undo:
351	/* Walk through the list backwards calling the exit functions
352	 * for the pernet modules whose init functions did not fail.
353	 */
354	list_add(&net->exit_list, &net_exit_list);
355	saved_ops = ops;
356	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
357		ops_pre_exit_list(ops, &net_exit_list);
358
359	synchronize_rcu();
360
361	ops = saved_ops;
362	rtnl_lock();
363	list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
364		if (ops->exit_batch_rtnl)
365			ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
366	}
367	unregister_netdevice_many(&dev_kill_list);
368	rtnl_unlock();
369
370	ops = saved_ops;
371	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
372		ops_exit_list(ops, &net_exit_list);
373
374	ops = saved_ops;
375	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
376		ops_free_list(ops, &net_exit_list);
377
378	rcu_barrier();
379	goto out;
380}
381
382static int __net_init net_defaults_init_net(struct net *net)
383{
384	net->core.sysctl_somaxconn = SOMAXCONN;
385	/* Limits per socket sk_omem_alloc usage.
386	 * TCP zerocopy regular usage needs 128 KB.
387	 */
388	net->core.sysctl_optmem_max = 128 * 1024;
389	net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
390
391	return 0;
392}
393
394static struct pernet_operations net_defaults_ops = {
395	.init = net_defaults_init_net,
396};
397
398static __init int net_defaults_init(void)
399{
400	if (register_pernet_subsys(&net_defaults_ops))
401		panic("Cannot initialize net default settings");
402
403	return 0;
404}
405
406core_initcall(net_defaults_init);
407
408#ifdef CONFIG_NET_NS
409static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
410{
411	return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
412}
413
414static void dec_net_namespaces(struct ucounts *ucounts)
415{
416	dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
417}
418
419static struct kmem_cache *net_cachep __ro_after_init;
420static struct workqueue_struct *netns_wq;
421
422static struct net *net_alloc(void)
423{
424	struct net *net = NULL;
425	struct net_generic *ng;
426
427	ng = net_alloc_generic();
428	if (!ng)
429		goto out;
430
431	net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
432	if (!net)
433		goto out_free;
434
435#ifdef CONFIG_KEYS
436	net->key_domain = kzalloc(sizeof(struct key_tag), GFP_KERNEL);
437	if (!net->key_domain)
438		goto out_free_2;
439	refcount_set(&net->key_domain->usage, 1);
440#endif
441
442	rcu_assign_pointer(net->gen, ng);
443out:
444	return net;
445
446#ifdef CONFIG_KEYS
447out_free_2:
448	kmem_cache_free(net_cachep, net);
449	net = NULL;
450#endif
451out_free:
452	kfree(ng);
453	goto out;
454}
455
456static void net_free(struct net *net)
457{
458	if (refcount_dec_and_test(&net->passive)) {
459		kfree(rcu_access_pointer(net->gen));
460
461		/* There should not be any trackers left there. */
462		ref_tracker_dir_exit(&net->notrefcnt_tracker);
463
464		kmem_cache_free(net_cachep, net);
465	}
466}
467
468void net_drop_ns(void *p)
469{
470	struct net *net = (struct net *)p;
471
472	if (net)
473		net_free(net);
474}
475
476struct net *copy_net_ns(unsigned long flags,
477			struct user_namespace *user_ns, struct net *old_net)
478{
479	struct ucounts *ucounts;
480	struct net *net;
481	int rv;
482
483	if (!(flags & CLONE_NEWNET))
484		return get_net(old_net);
485
486	ucounts = inc_net_namespaces(user_ns);
487	if (!ucounts)
488		return ERR_PTR(-ENOSPC);
489
490	net = net_alloc();
491	if (!net) {
492		rv = -ENOMEM;
493		goto dec_ucounts;
494	}
495
496	preinit_net(net);
497	refcount_set(&net->passive, 1);
498	net->ucounts = ucounts;
499	get_user_ns(user_ns);
500
501	rv = down_read_killable(&pernet_ops_rwsem);
502	if (rv < 0)
503		goto put_userns;
504
505	rv = setup_net(net, user_ns);
506
507	up_read(&pernet_ops_rwsem);
508
509	if (rv < 0) {
510put_userns:
511#ifdef CONFIG_KEYS
512		key_remove_domain(net->key_domain);
513#endif
514		put_user_ns(user_ns);
515		net_free(net);
516dec_ucounts:
517		dec_net_namespaces(ucounts);
518		return ERR_PTR(rv);
519	}
520	return net;
521}
522
523/**
524 * net_ns_get_ownership - get sysfs ownership data for @net
525 * @net: network namespace in question (can be NULL)
526 * @uid: kernel user ID for sysfs objects
527 * @gid: kernel group ID for sysfs objects
528 *
529 * Returns the uid/gid pair of root in the user namespace associated with the
530 * given network namespace.
531 */
532void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
533{
534	if (net) {
535		kuid_t ns_root_uid = make_kuid(net->user_ns, 0);
536		kgid_t ns_root_gid = make_kgid(net->user_ns, 0);
537
538		if (uid_valid(ns_root_uid))
539			*uid = ns_root_uid;
540
541		if (gid_valid(ns_root_gid))
542			*gid = ns_root_gid;
543	} else {
544		*uid = GLOBAL_ROOT_UID;
545		*gid = GLOBAL_ROOT_GID;
546	}
547}
548EXPORT_SYMBOL_GPL(net_ns_get_ownership);
549
550static void unhash_nsid(struct net *net, struct net *last)
551{
552	struct net *tmp;
553	/* This function is only called from cleanup_net() work,
554	 * and this work is the only process, that may delete
555	 * a net from net_namespace_list. So, when the below
556	 * is executing, the list may only grow. Thus, we do not
557	 * use for_each_net_rcu() or net_rwsem.
558	 */
559	for_each_net(tmp) {
560		int id;
561
562		spin_lock_bh(&tmp->nsid_lock);
563		id = __peernet2id(tmp, net);
564		if (id >= 0)
565			idr_remove(&tmp->netns_ids, id);
566		spin_unlock_bh(&tmp->nsid_lock);
567		if (id >= 0)
568			rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
569					  GFP_KERNEL);
570		if (tmp == last)
571			break;
572	}
573	spin_lock_bh(&net->nsid_lock);
574	idr_destroy(&net->netns_ids);
575	spin_unlock_bh(&net->nsid_lock);
576}
577
578static LLIST_HEAD(cleanup_list);
579
580static void cleanup_net(struct work_struct *work)
581{
582	const struct pernet_operations *ops;
583	struct net *net, *tmp, *last;
584	struct llist_node *net_kill_list;
585	LIST_HEAD(net_exit_list);
586	LIST_HEAD(dev_kill_list);
587
588	/* Atomically snapshot the list of namespaces to cleanup */
589	net_kill_list = llist_del_all(&cleanup_list);
590
591	down_read(&pernet_ops_rwsem);
592
593	/* Don't let anyone else find us. */
594	down_write(&net_rwsem);
595	llist_for_each_entry(net, net_kill_list, cleanup_list)
596		list_del_rcu(&net->list);
597	/* Cache last net. After we unlock rtnl, no one new net
598	 * added to net_namespace_list can assign nsid pointer
599	 * to a net from net_kill_list (see peernet2id_alloc()).
600	 * So, we skip them in unhash_nsid().
601	 *
602	 * Note, that unhash_nsid() does not delete nsid links
603	 * between net_kill_list's nets, as they've already
604	 * deleted from net_namespace_list. But, this would be
605	 * useless anyway, as netns_ids are destroyed there.
606	 */
607	last = list_last_entry(&net_namespace_list, struct net, list);
608	up_write(&net_rwsem);
609
610	llist_for_each_entry(net, net_kill_list, cleanup_list) {
611		unhash_nsid(net, last);
612		list_add_tail(&net->exit_list, &net_exit_list);
613	}
614
615	/* Run all of the network namespace pre_exit methods */
616	list_for_each_entry_reverse(ops, &pernet_list, list)
617		ops_pre_exit_list(ops, &net_exit_list);
618
619	/*
620	 * Another CPU might be rcu-iterating the list, wait for it.
621	 * This needs to be before calling the exit() notifiers, so
622	 * the rcu_barrier() below isn't sufficient alone.
623	 * Also the pre_exit() and exit() methods need this barrier.
624	 */
625	synchronize_rcu_expedited();
626
627	rtnl_lock();
628	list_for_each_entry_reverse(ops, &pernet_list, list) {
629		if (ops->exit_batch_rtnl)
630			ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
631	}
632	unregister_netdevice_many(&dev_kill_list);
633	rtnl_unlock();
634
635	/* Run all of the network namespace exit methods */
636	list_for_each_entry_reverse(ops, &pernet_list, list)
637		ops_exit_list(ops, &net_exit_list);
638
639	/* Free the net generic variables */
640	list_for_each_entry_reverse(ops, &pernet_list, list)
641		ops_free_list(ops, &net_exit_list);
642
643	up_read(&pernet_ops_rwsem);
644
645	/* Ensure there are no outstanding rcu callbacks using this
646	 * network namespace.
647	 */
648	rcu_barrier();
649
650	/* Finally it is safe to free my network namespace structure */
651	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
652		list_del_init(&net->exit_list);
653		dec_net_namespaces(net->ucounts);
654#ifdef CONFIG_KEYS
655		key_remove_domain(net->key_domain);
656#endif
657		put_user_ns(net->user_ns);
658		net_free(net);
659	}
660}
661
662/**
663 * net_ns_barrier - wait until concurrent net_cleanup_work is done
664 *
665 * cleanup_net runs from work queue and will first remove namespaces
666 * from the global list, then run net exit functions.
667 *
668 * Call this in module exit path to make sure that all netns
669 * ->exit ops have been invoked before the function is removed.
670 */
671void net_ns_barrier(void)
672{
673	down_write(&pernet_ops_rwsem);
674	up_write(&pernet_ops_rwsem);
675}
676EXPORT_SYMBOL(net_ns_barrier);
677
678static DECLARE_WORK(net_cleanup_work, cleanup_net);
679
680void __put_net(struct net *net)
681{
682	ref_tracker_dir_exit(&net->refcnt_tracker);
683	/* Cleanup the network namespace in process context */
684	if (llist_add(&net->cleanup_list, &cleanup_list))
685		queue_work(netns_wq, &net_cleanup_work);
686}
687EXPORT_SYMBOL_GPL(__put_net);
688
689/**
690 * get_net_ns - increment the refcount of the network namespace
691 * @ns: common namespace (net)
692 *
693 * Returns the net's common namespace.
694 */
695struct ns_common *get_net_ns(struct ns_common *ns)
696{
697	return &get_net(container_of(ns, struct net, ns))->ns;
698}
699EXPORT_SYMBOL_GPL(get_net_ns);
700
701struct net *get_net_ns_by_fd(int fd)
702{
703	struct fd f = fdget(fd);
704	struct net *net = ERR_PTR(-EINVAL);
705
706	if (!f.file)
707		return ERR_PTR(-EBADF);
708
709	if (proc_ns_file(f.file)) {
710		struct ns_common *ns = get_proc_ns(file_inode(f.file));
711		if (ns->ops == &netns_operations)
712			net = get_net(container_of(ns, struct net, ns));
713	}
714	fdput(f);
715
716	return net;
717}
718EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
719#endif
720
721struct net *get_net_ns_by_pid(pid_t pid)
722{
723	struct task_struct *tsk;
724	struct net *net;
725
726	/* Lookup the network namespace */
727	net = ERR_PTR(-ESRCH);
728	rcu_read_lock();
729	tsk = find_task_by_vpid(pid);
730	if (tsk) {
731		struct nsproxy *nsproxy;
732		task_lock(tsk);
733		nsproxy = tsk->nsproxy;
734		if (nsproxy)
735			net = get_net(nsproxy->net_ns);
736		task_unlock(tsk);
737	}
738	rcu_read_unlock();
739	return net;
740}
741EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
742
743static __net_init int net_ns_net_init(struct net *net)
744{
745#ifdef CONFIG_NET_NS
746	net->ns.ops = &netns_operations;
747#endif
748	return ns_alloc_inum(&net->ns);
749}
750
751static __net_exit void net_ns_net_exit(struct net *net)
752{
753	ns_free_inum(&net->ns);
754}
755
756static struct pernet_operations __net_initdata net_ns_ops = {
757	.init = net_ns_net_init,
758	.exit = net_ns_net_exit,
759};
760
761static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
762	[NETNSA_NONE]		= { .type = NLA_UNSPEC },
763	[NETNSA_NSID]		= { .type = NLA_S32 },
764	[NETNSA_PID]		= { .type = NLA_U32 },
765	[NETNSA_FD]		= { .type = NLA_U32 },
766	[NETNSA_TARGET_NSID]	= { .type = NLA_S32 },
767};
768
769static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
770			  struct netlink_ext_ack *extack)
771{
772	struct net *net = sock_net(skb->sk);
773	struct nlattr *tb[NETNSA_MAX + 1];
774	struct nlattr *nla;
775	struct net *peer;
776	int nsid, err;
777
778	err = nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), tb,
779				     NETNSA_MAX, rtnl_net_policy, extack);
780	if (err < 0)
781		return err;
782	if (!tb[NETNSA_NSID]) {
783		NL_SET_ERR_MSG(extack, "nsid is missing");
784		return -EINVAL;
785	}
786	nsid = nla_get_s32(tb[NETNSA_NSID]);
787
788	if (tb[NETNSA_PID]) {
789		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
790		nla = tb[NETNSA_PID];
791	} else if (tb[NETNSA_FD]) {
792		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
793		nla = tb[NETNSA_FD];
794	} else {
795		NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
796		return -EINVAL;
797	}
798	if (IS_ERR(peer)) {
799		NL_SET_BAD_ATTR(extack, nla);
800		NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
801		return PTR_ERR(peer);
802	}
803
804	spin_lock_bh(&net->nsid_lock);
805	if (__peernet2id(net, peer) >= 0) {
806		spin_unlock_bh(&net->nsid_lock);
807		err = -EEXIST;
808		NL_SET_BAD_ATTR(extack, nla);
809		NL_SET_ERR_MSG(extack,
810			       "Peer netns already has a nsid assigned");
811		goto out;
812	}
813
814	err = alloc_netid(net, peer, nsid);
815	spin_unlock_bh(&net->nsid_lock);
816	if (err >= 0) {
817		rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
818				  nlh, GFP_KERNEL);
819		err = 0;
820	} else if (err == -ENOSPC && nsid >= 0) {
821		err = -EEXIST;
822		NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]);
823		NL_SET_ERR_MSG(extack, "The specified nsid is already used");
824	}
825out:
826	put_net(peer);
827	return err;
828}
829
830static int rtnl_net_get_size(void)
831{
832	return NLMSG_ALIGN(sizeof(struct rtgenmsg))
833	       + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
834	       + nla_total_size(sizeof(s32)) /* NETNSA_CURRENT_NSID */
835	       ;
836}
837
838struct net_fill_args {
839	u32 portid;
840	u32 seq;
841	int flags;
842	int cmd;
843	int nsid;
844	bool add_ref;
845	int ref_nsid;
846};
847
848static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args)
849{
850	struct nlmsghdr *nlh;
851	struct rtgenmsg *rth;
852
853	nlh = nlmsg_put(skb, args->portid, args->seq, args->cmd, sizeof(*rth),
854			args->flags);
855	if (!nlh)
856		return -EMSGSIZE;
857
858	rth = nlmsg_data(nlh);
859	rth->rtgen_family = AF_UNSPEC;
860
861	if (nla_put_s32(skb, NETNSA_NSID, args->nsid))
862		goto nla_put_failure;
863
864	if (args->add_ref &&
865	    nla_put_s32(skb, NETNSA_CURRENT_NSID, args->ref_nsid))
866		goto nla_put_failure;
867
868	nlmsg_end(skb, nlh);
869	return 0;
870
871nla_put_failure:
872	nlmsg_cancel(skb, nlh);
873	return -EMSGSIZE;
874}
875
876static int rtnl_net_valid_getid_req(struct sk_buff *skb,
877				    const struct nlmsghdr *nlh,
878				    struct nlattr **tb,
879				    struct netlink_ext_ack *extack)
880{
881	int i, err;
882
883	if (!netlink_strict_get_check(skb))
884		return nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg),
885					      tb, NETNSA_MAX, rtnl_net_policy,
886					      extack);
887
888	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
889					    NETNSA_MAX, rtnl_net_policy,
890					    extack);
891	if (err)
892		return err;
893
894	for (i = 0; i <= NETNSA_MAX; i++) {
895		if (!tb[i])
896			continue;
897
898		switch (i) {
899		case NETNSA_PID:
900		case NETNSA_FD:
901		case NETNSA_NSID:
902		case NETNSA_TARGET_NSID:
903			break;
904		default:
905			NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request");
906			return -EINVAL;
907		}
908	}
909
910	return 0;
911}
912
913static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
914			  struct netlink_ext_ack *extack)
915{
916	struct net *net = sock_net(skb->sk);
917	struct nlattr *tb[NETNSA_MAX + 1];
918	struct net_fill_args fillargs = {
919		.portid = NETLINK_CB(skb).portid,
920		.seq = nlh->nlmsg_seq,
921		.cmd = RTM_NEWNSID,
922	};
923	struct net *peer, *target = net;
924	struct nlattr *nla;
925	struct sk_buff *msg;
926	int err;
927
928	err = rtnl_net_valid_getid_req(skb, nlh, tb, extack);
929	if (err < 0)
930		return err;
931	if (tb[NETNSA_PID]) {
932		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
933		nla = tb[NETNSA_PID];
934	} else if (tb[NETNSA_FD]) {
935		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
936		nla = tb[NETNSA_FD];
937	} else if (tb[NETNSA_NSID]) {
938		peer = get_net_ns_by_id(net, nla_get_s32(tb[NETNSA_NSID]));
939		if (!peer)
940			peer = ERR_PTR(-ENOENT);
941		nla = tb[NETNSA_NSID];
942	} else {
943		NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
944		return -EINVAL;
945	}
946
947	if (IS_ERR(peer)) {
948		NL_SET_BAD_ATTR(extack, nla);
949		NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
950		return PTR_ERR(peer);
951	}
952
953	if (tb[NETNSA_TARGET_NSID]) {
954		int id = nla_get_s32(tb[NETNSA_TARGET_NSID]);
955
956		target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, id);
957		if (IS_ERR(target)) {
958			NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]);
959			NL_SET_ERR_MSG(extack,
960				       "Target netns reference is invalid");
961			err = PTR_ERR(target);
962			goto out;
963		}
964		fillargs.add_ref = true;
965		fillargs.ref_nsid = peernet2id(net, peer);
966	}
967
968	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
969	if (!msg) {
970		err = -ENOMEM;
971		goto out;
972	}
973
974	fillargs.nsid = peernet2id(target, peer);
975	err = rtnl_net_fill(msg, &fillargs);
976	if (err < 0)
977		goto err_out;
978
979	err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
980	goto out;
981
982err_out:
983	nlmsg_free(msg);
984out:
985	if (fillargs.add_ref)
986		put_net(target);
987	put_net(peer);
988	return err;
989}
990
991struct rtnl_net_dump_cb {
992	struct net *tgt_net;
993	struct net *ref_net;
994	struct sk_buff *skb;
995	struct net_fill_args fillargs;
996	int idx;
997	int s_idx;
998};
999
1000/* Runs in RCU-critical section. */
1001static int rtnl_net_dumpid_one(int id, void *peer, void *data)
1002{
1003	struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
1004	int ret;
1005
1006	if (net_cb->idx < net_cb->s_idx)
1007		goto cont;
1008
1009	net_cb->fillargs.nsid = id;
1010	if (net_cb->fillargs.add_ref)
1011		net_cb->fillargs.ref_nsid = __peernet2id(net_cb->ref_net, peer);
1012	ret = rtnl_net_fill(net_cb->skb, &net_cb->fillargs);
1013	if (ret < 0)
1014		return ret;
1015
1016cont:
1017	net_cb->idx++;
1018	return 0;
1019}
1020
1021static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk,
1022				   struct rtnl_net_dump_cb *net_cb,
1023				   struct netlink_callback *cb)
1024{
1025	struct netlink_ext_ack *extack = cb->extack;
1026	struct nlattr *tb[NETNSA_MAX + 1];
1027	int err, i;
1028
1029	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
1030					    NETNSA_MAX, rtnl_net_policy,
1031					    extack);
1032	if (err < 0)
1033		return err;
1034
1035	for (i = 0; i <= NETNSA_MAX; i++) {
1036		if (!tb[i])
1037			continue;
1038
1039		if (i == NETNSA_TARGET_NSID) {
1040			struct net *net;
1041
1042			net = rtnl_get_net_ns_capable(sk, nla_get_s32(tb[i]));
1043			if (IS_ERR(net)) {
1044				NL_SET_BAD_ATTR(extack, tb[i]);
1045				NL_SET_ERR_MSG(extack,
1046					       "Invalid target network namespace id");
1047				return PTR_ERR(net);
1048			}
1049			net_cb->fillargs.add_ref = true;
1050			net_cb->ref_net = net_cb->tgt_net;
1051			net_cb->tgt_net = net;
1052		} else {
1053			NL_SET_BAD_ATTR(extack, tb[i]);
1054			NL_SET_ERR_MSG(extack,
1055				       "Unsupported attribute in dump request");
1056			return -EINVAL;
1057		}
1058	}
1059
1060	return 0;
1061}
1062
1063static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
1064{
1065	struct rtnl_net_dump_cb net_cb = {
1066		.tgt_net = sock_net(skb->sk),
1067		.skb = skb,
1068		.fillargs = {
1069			.portid = NETLINK_CB(cb->skb).portid,
1070			.seq = cb->nlh->nlmsg_seq,
1071			.flags = NLM_F_MULTI,
1072			.cmd = RTM_NEWNSID,
1073		},
1074		.idx = 0,
1075		.s_idx = cb->args[0],
1076	};
1077	int err = 0;
1078
1079	if (cb->strict_check) {
1080		err = rtnl_valid_dump_net_req(cb->nlh, skb->sk, &net_cb, cb);
1081		if (err < 0)
1082			goto end;
1083	}
1084
1085	rcu_read_lock();
1086	idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb);
1087	rcu_read_unlock();
1088
1089	cb->args[0] = net_cb.idx;
1090end:
1091	if (net_cb.fillargs.add_ref)
1092		put_net(net_cb.tgt_net);
1093	return err < 0 ? err : skb->len;
1094}
1095
1096static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
1097			      struct nlmsghdr *nlh, gfp_t gfp)
1098{
1099	struct net_fill_args fillargs = {
1100		.portid = portid,
1101		.seq = nlh ? nlh->nlmsg_seq : 0,
1102		.cmd = cmd,
1103		.nsid = id,
1104	};
1105	struct sk_buff *msg;
1106	int err = -ENOMEM;
1107
1108	msg = nlmsg_new(rtnl_net_get_size(), gfp);
1109	if (!msg)
1110		goto out;
1111
1112	err = rtnl_net_fill(msg, &fillargs);
1113	if (err < 0)
1114		goto err_out;
1115
1116	rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, gfp);
1117	return;
1118
1119err_out:
1120	nlmsg_free(msg);
1121out:
1122	rtnl_set_sk_err(net, RTNLGRP_NSID, err);
1123}
1124
1125#ifdef CONFIG_NET_NS
1126static void __init netns_ipv4_struct_check(void)
1127{
1128	/* TX readonly hotpath cache lines */
1129	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1130				      sysctl_tcp_early_retrans);
1131	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1132				      sysctl_tcp_tso_win_divisor);
1133	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1134				      sysctl_tcp_tso_rtt_log);
1135	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1136				      sysctl_tcp_autocorking);
1137	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1138				      sysctl_tcp_min_snd_mss);
1139	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1140				      sysctl_tcp_notsent_lowat);
1141	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1142				      sysctl_tcp_limit_output_bytes);
1143	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1144				      sysctl_tcp_min_rtt_wlen);
1145	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1146				      sysctl_tcp_wmem);
1147	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1148				      sysctl_ip_fwd_use_pmtu);
1149	CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_tx, 33);
1150
1151	/* TXRX readonly hotpath cache lines */
1152	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_txrx,
1153				      sysctl_tcp_moderate_rcvbuf);
1154	CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_txrx, 1);
1155
1156	/* RX readonly hotpath cache line */
1157	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
1158				      sysctl_ip_early_demux);
1159	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
1160				      sysctl_tcp_early_demux);
1161	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
1162				      sysctl_tcp_reordering);
1163	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
1164				      sysctl_tcp_rmem);
1165	CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18);
1166}
1167#endif
1168
1169void __init net_ns_init(void)
1170{
1171	struct net_generic *ng;
1172
1173#ifdef CONFIG_NET_NS
1174	netns_ipv4_struct_check();
1175	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
1176					SMP_CACHE_BYTES,
1177					SLAB_PANIC|SLAB_ACCOUNT, NULL);
1178
1179	/* Create workqueue for cleanup */
1180	netns_wq = create_singlethread_workqueue("netns");
1181	if (!netns_wq)
1182		panic("Could not create netns workq");
1183#endif
1184
1185	ng = net_alloc_generic();
1186	if (!ng)
1187		panic("Could not allocate generic netns");
1188
1189	rcu_assign_pointer(init_net.gen, ng);
1190
1191#ifdef CONFIG_KEYS
1192	init_net.key_domain = &init_net_key_domain;
1193#endif
1194	down_write(&pernet_ops_rwsem);
1195	preinit_net(&init_net);
1196	if (setup_net(&init_net, &init_user_ns))
1197		panic("Could not setup the initial network namespace");
1198
1199	init_net_initialized = true;
1200	up_write(&pernet_ops_rwsem);
1201
1202	if (register_pernet_subsys(&net_ns_ops))
1203		panic("Could not register network namespace subsystems");
1204
1205	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
1206		      RTNL_FLAG_DOIT_UNLOCKED);
1207	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
1208		      RTNL_FLAG_DOIT_UNLOCKED);
1209}
1210
1211static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
1212{
1213	ops_pre_exit_list(ops, net_exit_list);
1214	synchronize_rcu();
1215
1216	if (ops->exit_batch_rtnl) {
1217		LIST_HEAD(dev_kill_list);
1218
1219		rtnl_lock();
1220		ops->exit_batch_rtnl(net_exit_list, &dev_kill_list);
1221		unregister_netdevice_many(&dev_kill_list);
1222		rtnl_unlock();
1223	}
1224	ops_exit_list(ops, net_exit_list);
1225
1226	ops_free_list(ops, net_exit_list);
1227}
1228
1229#ifdef CONFIG_NET_NS
1230static int __register_pernet_operations(struct list_head *list,
1231					struct pernet_operations *ops)
1232{
1233	struct net *net;
1234	int error;
1235	LIST_HEAD(net_exit_list);
1236
1237	list_add_tail(&ops->list, list);
1238	if (ops->init || (ops->id && ops->size)) {
1239		/* We held write locked pernet_ops_rwsem, and parallel
1240		 * setup_net() and cleanup_net() are not possible.
1241		 */
1242		for_each_net(net) {
1243			error = ops_init(ops, net);
1244			if (error)
1245				goto out_undo;
1246			list_add_tail(&net->exit_list, &net_exit_list);
1247		}
1248	}
1249	return 0;
1250
1251out_undo:
1252	/* If I have an error cleanup all namespaces I initialized */
1253	list_del(&ops->list);
1254	free_exit_list(ops, &net_exit_list);
1255	return error;
1256}
1257
1258static void __unregister_pernet_operations(struct pernet_operations *ops)
1259{
1260	struct net *net;
1261	LIST_HEAD(net_exit_list);
1262
1263	list_del(&ops->list);
1264	/* See comment in __register_pernet_operations() */
1265	for_each_net(net)
1266		list_add_tail(&net->exit_list, &net_exit_list);
1267
1268	free_exit_list(ops, &net_exit_list);
1269}
1270
1271#else
1272
1273static int __register_pernet_operations(struct list_head *list,
1274					struct pernet_operations *ops)
1275{
1276	if (!init_net_initialized) {
1277		list_add_tail(&ops->list, list);
1278		return 0;
1279	}
1280
1281	return ops_init(ops, &init_net);
1282}
1283
1284static void __unregister_pernet_operations(struct pernet_operations *ops)
1285{
1286	if (!init_net_initialized) {
1287		list_del(&ops->list);
1288	} else {
1289		LIST_HEAD(net_exit_list);
1290		list_add(&init_net.exit_list, &net_exit_list);
1291		free_exit_list(ops, &net_exit_list);
1292	}
1293}
1294
1295#endif /* CONFIG_NET_NS */
1296
1297static DEFINE_IDA(net_generic_ids);
1298
1299static int register_pernet_operations(struct list_head *list,
1300				      struct pernet_operations *ops)
1301{
1302	int error;
1303
1304	if (ops->id) {
1305		error = ida_alloc_min(&net_generic_ids, MIN_PERNET_OPS_ID,
1306				GFP_KERNEL);
1307		if (error < 0)
1308			return error;
1309		*ops->id = error;
1310		max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
1311	}
1312	error = __register_pernet_operations(list, ops);
1313	if (error) {
1314		rcu_barrier();
1315		if (ops->id)
1316			ida_free(&net_generic_ids, *ops->id);
1317	}
1318
1319	return error;
1320}
1321
1322static void unregister_pernet_operations(struct pernet_operations *ops)
1323{
1324	__unregister_pernet_operations(ops);
1325	rcu_barrier();
1326	if (ops->id)
1327		ida_free(&net_generic_ids, *ops->id);
1328}
1329
1330/**
1331 *      register_pernet_subsys - register a network namespace subsystem
1332 *	@ops:  pernet operations structure for the subsystem
1333 *
1334 *	Register a subsystem which has init and exit functions
1335 *	that are called when network namespaces are created and
1336 *	destroyed respectively.
1337 *
1338 *	When registered all network namespace init functions are
1339 *	called for every existing network namespace.  Allowing kernel
1340 *	modules to have a race free view of the set of network namespaces.
1341 *
1342 *	When a new network namespace is created all of the init
1343 *	methods are called in the order in which they were registered.
1344 *
1345 *	When a network namespace is destroyed all of the exit methods
1346 *	are called in the reverse of the order with which they were
1347 *	registered.
1348 */
1349int register_pernet_subsys(struct pernet_operations *ops)
1350{
1351	int error;
1352	down_write(&pernet_ops_rwsem);
1353	error =  register_pernet_operations(first_device, ops);
1354	up_write(&pernet_ops_rwsem);
1355	return error;
1356}
1357EXPORT_SYMBOL_GPL(register_pernet_subsys);
1358
1359/**
1360 *      unregister_pernet_subsys - unregister a network namespace subsystem
1361 *	@ops: pernet operations structure to manipulate
1362 *
1363 *	Remove the pernet operations structure from the list to be
1364 *	used when network namespaces are created or destroyed.  In
1365 *	addition run the exit method for all existing network
1366 *	namespaces.
1367 */
1368void unregister_pernet_subsys(struct pernet_operations *ops)
1369{
1370	down_write(&pernet_ops_rwsem);
1371	unregister_pernet_operations(ops);
1372	up_write(&pernet_ops_rwsem);
1373}
1374EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
1375
1376/**
1377 *      register_pernet_device - register a network namespace device
1378 *	@ops:  pernet operations structure for the subsystem
1379 *
1380 *	Register a device which has init and exit functions
1381 *	that are called when network namespaces are created and
1382 *	destroyed respectively.
1383 *
1384 *	When registered all network namespace init functions are
1385 *	called for every existing network namespace.  Allowing kernel
1386 *	modules to have a race free view of the set of network namespaces.
1387 *
1388 *	When a new network namespace is created all of the init
1389 *	methods are called in the order in which they were registered.
1390 *
1391 *	When a network namespace is destroyed all of the exit methods
1392 *	are called in the reverse of the order with which they were
1393 *	registered.
1394 */
1395int register_pernet_device(struct pernet_operations *ops)
1396{
1397	int error;
1398	down_write(&pernet_ops_rwsem);
1399	error = register_pernet_operations(&pernet_list, ops);
1400	if (!error && (first_device == &pernet_list))
1401		first_device = &ops->list;
1402	up_write(&pernet_ops_rwsem);
1403	return error;
1404}
1405EXPORT_SYMBOL_GPL(register_pernet_device);
1406
1407/**
1408 *      unregister_pernet_device - unregister a network namespace netdevice
1409 *	@ops: pernet operations structure to manipulate
1410 *
1411 *	Remove the pernet operations structure from the list to be
1412 *	used when network namespaces are created or destroyed.  In
1413 *	addition run the exit method for all existing network
1414 *	namespaces.
1415 */
1416void unregister_pernet_device(struct pernet_operations *ops)
1417{
1418	down_write(&pernet_ops_rwsem);
1419	if (&ops->list == first_device)
1420		first_device = first_device->next;
1421	unregister_pernet_operations(ops);
1422	up_write(&pernet_ops_rwsem);
1423}
1424EXPORT_SYMBOL_GPL(unregister_pernet_device);
1425
1426#ifdef CONFIG_NET_NS
1427static struct ns_common *netns_get(struct task_struct *task)
1428{
1429	struct net *net = NULL;
1430	struct nsproxy *nsproxy;
1431
1432	task_lock(task);
1433	nsproxy = task->nsproxy;
1434	if (nsproxy)
1435		net = get_net(nsproxy->net_ns);
1436	task_unlock(task);
1437
1438	return net ? &net->ns : NULL;
1439}
1440
1441static inline struct net *to_net_ns(struct ns_common *ns)
1442{
1443	return container_of(ns, struct net, ns);
1444}
1445
1446static void netns_put(struct ns_common *ns)
1447{
1448	put_net(to_net_ns(ns));
1449}
1450
1451static int netns_install(struct nsset *nsset, struct ns_common *ns)
1452{
1453	struct nsproxy *nsproxy = nsset->nsproxy;
1454	struct net *net = to_net_ns(ns);
1455
1456	if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
1457	    !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
1458		return -EPERM;
1459
1460	put_net(nsproxy->net_ns);
1461	nsproxy->net_ns = get_net(net);
1462	return 0;
1463}
1464
1465static struct user_namespace *netns_owner(struct ns_common *ns)
1466{
1467	return to_net_ns(ns)->user_ns;
1468}
1469
1470const struct proc_ns_operations netns_operations = {
1471	.name		= "net",
1472	.type		= CLONE_NEWNET,
1473	.get		= netns_get,
1474	.put		= netns_put,
1475	.install	= netns_install,
1476	.owner		= netns_owner,
1477};
1478#endif
1479