1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *	ip6_flowlabel.c		IPv6 flowlabel manager.
4 *
5 *	Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 */
7
8#include <linux/capability.h>
9#include <linux/errno.h>
10#include <linux/types.h>
11#include <linux/socket.h>
12#include <linux/net.h>
13#include <linux/netdevice.h>
14#include <linux/in6.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/slab.h>
18#include <linux/export.h>
19#include <linux/pid_namespace.h>
20#include <linux/jump_label_ratelimit.h>
21
22#include <net/net_namespace.h>
23#include <net/sock.h>
24
25#include <net/ipv6.h>
26#include <net/rawv6.h>
27#include <net/transp_v6.h>
28
29#include <linux/uaccess.h>
30
31#define FL_MIN_LINGER	6	/* Minimal linger. It is set to 6sec specified
32				   in old IPv6 RFC. Well, it was reasonable value.
33				 */
34#define FL_MAX_LINGER	150	/* Maximal linger timeout */
35
36/* FL hash table */
37
38#define FL_MAX_PER_SOCK	32
39#define FL_MAX_SIZE	4096
40#define FL_HASH_MASK	255
41#define FL_HASH(l)	(ntohl(l)&FL_HASH_MASK)
42
43static atomic_t fl_size = ATOMIC_INIT(0);
44static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
45
46static void ip6_fl_gc(struct timer_list *unused);
47static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc);
48
49/* FL hash table lock: it protects only of GC */
50
51static DEFINE_SPINLOCK(ip6_fl_lock);
52
53/* Big socket sock */
54
55static DEFINE_SPINLOCK(ip6_sk_fl_lock);
56
57DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ);
58EXPORT_SYMBOL(ipv6_flowlabel_exclusive);
59
60#define for_each_fl_rcu(hash, fl)				\
61	for (fl = rcu_dereference(fl_ht[(hash)]);		\
62	     fl != NULL;					\
63	     fl = rcu_dereference(fl->next))
64#define for_each_fl_continue_rcu(fl)				\
65	for (fl = rcu_dereference(fl->next);			\
66	     fl != NULL;					\
67	     fl = rcu_dereference(fl->next))
68
69#define for_each_sk_fl_rcu(np, sfl)				\
70	for (sfl = rcu_dereference(np->ipv6_fl_list);	\
71	     sfl != NULL;					\
72	     sfl = rcu_dereference(sfl->next))
73
74static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
75{
76	struct ip6_flowlabel *fl;
77
78	for_each_fl_rcu(FL_HASH(label), fl) {
79		if (fl->label == label && net_eq(fl->fl_net, net))
80			return fl;
81	}
82	return NULL;
83}
84
85static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
86{
87	struct ip6_flowlabel *fl;
88
89	rcu_read_lock();
90	fl = __fl_lookup(net, label);
91	if (fl && !atomic_inc_not_zero(&fl->users))
92		fl = NULL;
93	rcu_read_unlock();
94	return fl;
95}
96
97static bool fl_shared_exclusive(struct ip6_flowlabel *fl)
98{
99	return fl->share == IPV6_FL_S_EXCL ||
100	       fl->share == IPV6_FL_S_PROCESS ||
101	       fl->share == IPV6_FL_S_USER;
102}
103
104static void fl_free_rcu(struct rcu_head *head)
105{
106	struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
107
108	if (fl->share == IPV6_FL_S_PROCESS)
109		put_pid(fl->owner.pid);
110	kfree(fl->opt);
111	kfree(fl);
112}
113
114
115static void fl_free(struct ip6_flowlabel *fl)
116{
117	if (!fl)
118		return;
119
120	if (fl_shared_exclusive(fl) || fl->opt)
121		static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive);
122
123	call_rcu(&fl->rcu, fl_free_rcu);
124}
125
126static void fl_release(struct ip6_flowlabel *fl)
127{
128	spin_lock_bh(&ip6_fl_lock);
129
130	fl->lastuse = jiffies;
131	if (atomic_dec_and_test(&fl->users)) {
132		unsigned long ttd = fl->lastuse + fl->linger;
133		if (time_after(ttd, fl->expires))
134			fl->expires = ttd;
135		ttd = fl->expires;
136		if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
137			struct ipv6_txoptions *opt = fl->opt;
138			fl->opt = NULL;
139			kfree(opt);
140		}
141		if (!timer_pending(&ip6_fl_gc_timer) ||
142		    time_after(ip6_fl_gc_timer.expires, ttd))
143			mod_timer(&ip6_fl_gc_timer, ttd);
144	}
145	spin_unlock_bh(&ip6_fl_lock);
146}
147
148static void ip6_fl_gc(struct timer_list *unused)
149{
150	int i;
151	unsigned long now = jiffies;
152	unsigned long sched = 0;
153
154	spin_lock(&ip6_fl_lock);
155
156	for (i = 0; i <= FL_HASH_MASK; i++) {
157		struct ip6_flowlabel *fl;
158		struct ip6_flowlabel __rcu **flp;
159
160		flp = &fl_ht[i];
161		while ((fl = rcu_dereference_protected(*flp,
162						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
163			if (atomic_read(&fl->users) == 0) {
164				unsigned long ttd = fl->lastuse + fl->linger;
165				if (time_after(ttd, fl->expires))
166					fl->expires = ttd;
167				ttd = fl->expires;
168				if (time_after_eq(now, ttd)) {
169					*flp = fl->next;
170					fl_free(fl);
171					atomic_dec(&fl_size);
172					continue;
173				}
174				if (!sched || time_before(ttd, sched))
175					sched = ttd;
176			}
177			flp = &fl->next;
178		}
179	}
180	if (!sched && atomic_read(&fl_size))
181		sched = now + FL_MAX_LINGER;
182	if (sched) {
183		mod_timer(&ip6_fl_gc_timer, sched);
184	}
185	spin_unlock(&ip6_fl_lock);
186}
187
188static void __net_exit ip6_fl_purge(struct net *net)
189{
190	int i;
191
192	spin_lock_bh(&ip6_fl_lock);
193	for (i = 0; i <= FL_HASH_MASK; i++) {
194		struct ip6_flowlabel *fl;
195		struct ip6_flowlabel __rcu **flp;
196
197		flp = &fl_ht[i];
198		while ((fl = rcu_dereference_protected(*flp,
199						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
200			if (net_eq(fl->fl_net, net) &&
201			    atomic_read(&fl->users) == 0) {
202				*flp = fl->next;
203				fl_free(fl);
204				atomic_dec(&fl_size);
205				continue;
206			}
207			flp = &fl->next;
208		}
209	}
210	spin_unlock_bh(&ip6_fl_lock);
211}
212
213static struct ip6_flowlabel *fl_intern(struct net *net,
214				       struct ip6_flowlabel *fl, __be32 label)
215{
216	struct ip6_flowlabel *lfl;
217
218	fl->label = label & IPV6_FLOWLABEL_MASK;
219
220	rcu_read_lock();
221	spin_lock_bh(&ip6_fl_lock);
222	if (label == 0) {
223		for (;;) {
224			fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK;
225			if (fl->label) {
226				lfl = __fl_lookup(net, fl->label);
227				if (!lfl)
228					break;
229			}
230		}
231	} else {
232		/*
233		 * we dropper the ip6_fl_lock, so this entry could reappear
234		 * and we need to recheck with it.
235		 *
236		 * OTOH no need to search the active socket first, like it is
237		 * done in ipv6_flowlabel_opt - sock is locked, so new entry
238		 * with the same label can only appear on another sock
239		 */
240		lfl = __fl_lookup(net, fl->label);
241		if (lfl) {
242			atomic_inc(&lfl->users);
243			spin_unlock_bh(&ip6_fl_lock);
244			rcu_read_unlock();
245			return lfl;
246		}
247	}
248
249	fl->lastuse = jiffies;
250	fl->next = fl_ht[FL_HASH(fl->label)];
251	rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
252	atomic_inc(&fl_size);
253	spin_unlock_bh(&ip6_fl_lock);
254	rcu_read_unlock();
255	return NULL;
256}
257
258
259
260/* Socket flowlabel lists */
261
262struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label)
263{
264	struct ipv6_fl_socklist *sfl;
265	struct ipv6_pinfo *np = inet6_sk(sk);
266
267	label &= IPV6_FLOWLABEL_MASK;
268
269	rcu_read_lock();
270	for_each_sk_fl_rcu(np, sfl) {
271		struct ip6_flowlabel *fl = sfl->fl;
272
273		if (fl->label == label && atomic_inc_not_zero(&fl->users)) {
274			fl->lastuse = jiffies;
275			rcu_read_unlock();
276			return fl;
277		}
278	}
279	rcu_read_unlock();
280	return NULL;
281}
282EXPORT_SYMBOL_GPL(__fl6_sock_lookup);
283
284void fl6_free_socklist(struct sock *sk)
285{
286	struct ipv6_pinfo *np = inet6_sk(sk);
287	struct ipv6_fl_socklist *sfl;
288
289	if (!rcu_access_pointer(np->ipv6_fl_list))
290		return;
291
292	spin_lock_bh(&ip6_sk_fl_lock);
293	while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
294						lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
295		np->ipv6_fl_list = sfl->next;
296		spin_unlock_bh(&ip6_sk_fl_lock);
297
298		fl_release(sfl->fl);
299		kfree_rcu(sfl, rcu);
300
301		spin_lock_bh(&ip6_sk_fl_lock);
302	}
303	spin_unlock_bh(&ip6_sk_fl_lock);
304}
305
306/* Service routines */
307
308
309/*
310   It is the only difficult place. flowlabel enforces equal headers
311   before and including routing header, however user may supply options
312   following rthdr.
313 */
314
315struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
316					 struct ip6_flowlabel *fl,
317					 struct ipv6_txoptions *fopt)
318{
319	struct ipv6_txoptions *fl_opt = fl->opt;
320
321	if (!fopt || fopt->opt_flen == 0)
322		return fl_opt;
323
324	if (fl_opt) {
325		opt_space->hopopt = fl_opt->hopopt;
326		opt_space->dst0opt = fl_opt->dst0opt;
327		opt_space->srcrt = fl_opt->srcrt;
328		opt_space->opt_nflen = fl_opt->opt_nflen;
329	} else {
330		if (fopt->opt_nflen == 0)
331			return fopt;
332		opt_space->hopopt = NULL;
333		opt_space->dst0opt = NULL;
334		opt_space->srcrt = NULL;
335		opt_space->opt_nflen = 0;
336	}
337	opt_space->dst1opt = fopt->dst1opt;
338	opt_space->opt_flen = fopt->opt_flen;
339	opt_space->tot_len = fopt->tot_len;
340	return opt_space;
341}
342EXPORT_SYMBOL_GPL(fl6_merge_options);
343
344static unsigned long check_linger(unsigned long ttl)
345{
346	if (ttl < FL_MIN_LINGER)
347		return FL_MIN_LINGER*HZ;
348	if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
349		return 0;
350	return ttl*HZ;
351}
352
353static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
354{
355	linger = check_linger(linger);
356	if (!linger)
357		return -EPERM;
358	expires = check_linger(expires);
359	if (!expires)
360		return -EPERM;
361
362	spin_lock_bh(&ip6_fl_lock);
363	fl->lastuse = jiffies;
364	if (time_before(fl->linger, linger))
365		fl->linger = linger;
366	if (time_before(expires, fl->linger))
367		expires = fl->linger;
368	if (time_before(fl->expires, fl->lastuse + expires))
369		fl->expires = fl->lastuse + expires;
370	spin_unlock_bh(&ip6_fl_lock);
371
372	return 0;
373}
374
375static struct ip6_flowlabel *
376fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
377	  sockptr_t optval, int optlen, int *err_p)
378{
379	struct ip6_flowlabel *fl = NULL;
380	int olen;
381	int addr_type;
382	int err;
383
384	olen = optlen - CMSG_ALIGN(sizeof(*freq));
385	err = -EINVAL;
386	if (olen > 64 * 1024)
387		goto done;
388
389	err = -ENOMEM;
390	fl = kzalloc(sizeof(*fl), GFP_KERNEL);
391	if (!fl)
392		goto done;
393
394	if (olen > 0) {
395		struct msghdr msg;
396		struct flowi6 flowi6;
397		struct ipcm6_cookie ipc6;
398
399		err = -ENOMEM;
400		fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
401		if (!fl->opt)
402			goto done;
403
404		memset(fl->opt, 0, sizeof(*fl->opt));
405		fl->opt->tot_len = sizeof(*fl->opt) + olen;
406		err = -EFAULT;
407		if (copy_from_sockptr_offset(fl->opt + 1, optval,
408				CMSG_ALIGN(sizeof(*freq)), olen))
409			goto done;
410
411		msg.msg_controllen = olen;
412		msg.msg_control = (void *)(fl->opt+1);
413		memset(&flowi6, 0, sizeof(flowi6));
414
415		ipc6.opt = fl->opt;
416		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6);
417		if (err)
418			goto done;
419		err = -EINVAL;
420		if (fl->opt->opt_flen)
421			goto done;
422		if (fl->opt->opt_nflen == 0) {
423			kfree(fl->opt);
424			fl->opt = NULL;
425		}
426	}
427
428	fl->fl_net = net;
429	fl->expires = jiffies;
430	err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
431	if (err)
432		goto done;
433	fl->share = freq->flr_share;
434	addr_type = ipv6_addr_type(&freq->flr_dst);
435	if ((addr_type & IPV6_ADDR_MAPPED) ||
436	    addr_type == IPV6_ADDR_ANY) {
437		err = -EINVAL;
438		goto done;
439	}
440	fl->dst = freq->flr_dst;
441	atomic_set(&fl->users, 1);
442	switch (fl->share) {
443	case IPV6_FL_S_EXCL:
444	case IPV6_FL_S_ANY:
445		break;
446	case IPV6_FL_S_PROCESS:
447		fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
448		break;
449	case IPV6_FL_S_USER:
450		fl->owner.uid = current_euid();
451		break;
452	default:
453		err = -EINVAL;
454		goto done;
455	}
456	if (fl_shared_exclusive(fl) || fl->opt) {
457		WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1);
458		static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
459	}
460	return fl;
461
462done:
463	if (fl) {
464		kfree(fl->opt);
465		kfree(fl);
466	}
467	*err_p = err;
468	return NULL;
469}
470
471static int mem_check(struct sock *sk)
472{
473	struct ipv6_pinfo *np = inet6_sk(sk);
474	struct ipv6_fl_socklist *sfl;
475	int room = FL_MAX_SIZE - atomic_read(&fl_size);
476	int count = 0;
477
478	if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
479		return 0;
480
481	rcu_read_lock();
482	for_each_sk_fl_rcu(np, sfl)
483		count++;
484	rcu_read_unlock();
485
486	if (room <= 0 ||
487	    ((count >= FL_MAX_PER_SOCK ||
488	      (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
489	     !capable(CAP_NET_ADMIN)))
490		return -ENOBUFS;
491
492	return 0;
493}
494
495static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
496		struct ip6_flowlabel *fl)
497{
498	spin_lock_bh(&ip6_sk_fl_lock);
499	sfl->fl = fl;
500	sfl->next = np->ipv6_fl_list;
501	rcu_assign_pointer(np->ipv6_fl_list, sfl);
502	spin_unlock_bh(&ip6_sk_fl_lock);
503}
504
505int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
506			   int flags)
507{
508	struct ipv6_pinfo *np = inet6_sk(sk);
509	struct ipv6_fl_socklist *sfl;
510
511	if (flags & IPV6_FL_F_REMOTE) {
512		freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK;
513		return 0;
514	}
515
516	if (inet6_test_bit(REPFLOW, sk)) {
517		freq->flr_label = np->flow_label;
518		return 0;
519	}
520
521	rcu_read_lock();
522
523	for_each_sk_fl_rcu(np, sfl) {
524		if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) {
525			spin_lock_bh(&ip6_fl_lock);
526			freq->flr_label = sfl->fl->label;
527			freq->flr_dst = sfl->fl->dst;
528			freq->flr_share = sfl->fl->share;
529			freq->flr_expires = (sfl->fl->expires - jiffies) / HZ;
530			freq->flr_linger = sfl->fl->linger / HZ;
531
532			spin_unlock_bh(&ip6_fl_lock);
533			rcu_read_unlock();
534			return 0;
535		}
536	}
537	rcu_read_unlock();
538
539	return -ENOENT;
540}
541
542#define socklist_dereference(__sflp) \
543	rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock))
544
545static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
546{
547	struct ipv6_pinfo *np = inet6_sk(sk);
548	struct ipv6_fl_socklist __rcu **sflp;
549	struct ipv6_fl_socklist *sfl;
550
551	if (freq->flr_flags & IPV6_FL_F_REFLECT) {
552		if (sk->sk_protocol != IPPROTO_TCP)
553			return -ENOPROTOOPT;
554		if (!inet6_test_bit(REPFLOW, sk))
555			return -ESRCH;
556		np->flow_label = 0;
557		inet6_clear_bit(REPFLOW, sk);
558		return 0;
559	}
560
561	spin_lock_bh(&ip6_sk_fl_lock);
562	for (sflp = &np->ipv6_fl_list;
563	     (sfl = socklist_dereference(*sflp)) != NULL;
564	     sflp = &sfl->next) {
565		if (sfl->fl->label == freq->flr_label)
566			goto found;
567	}
568	spin_unlock_bh(&ip6_sk_fl_lock);
569	return -ESRCH;
570found:
571	if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK))
572		np->flow_label &= ~IPV6_FLOWLABEL_MASK;
573	*sflp = sfl->next;
574	spin_unlock_bh(&ip6_sk_fl_lock);
575	fl_release(sfl->fl);
576	kfree_rcu(sfl, rcu);
577	return 0;
578}
579
580static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq)
581{
582	struct ipv6_pinfo *np = inet6_sk(sk);
583	struct net *net = sock_net(sk);
584	struct ipv6_fl_socklist *sfl;
585	int err;
586
587	rcu_read_lock();
588	for_each_sk_fl_rcu(np, sfl) {
589		if (sfl->fl->label == freq->flr_label) {
590			err = fl6_renew(sfl->fl, freq->flr_linger,
591					freq->flr_expires);
592			rcu_read_unlock();
593			return err;
594		}
595	}
596	rcu_read_unlock();
597
598	if (freq->flr_share == IPV6_FL_S_NONE &&
599	    ns_capable(net->user_ns, CAP_NET_ADMIN)) {
600		struct ip6_flowlabel *fl = fl_lookup(net, freq->flr_label);
601
602		if (fl) {
603			err = fl6_renew(fl, freq->flr_linger,
604					freq->flr_expires);
605			fl_release(fl);
606			return err;
607		}
608	}
609	return -ESRCH;
610}
611
612static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
613		sockptr_t optval, int optlen)
614{
615	struct ipv6_fl_socklist *sfl, *sfl1 = NULL;
616	struct ip6_flowlabel *fl, *fl1 = NULL;
617	struct ipv6_pinfo *np = inet6_sk(sk);
618	struct net *net = sock_net(sk);
619	int err;
620
621	if (freq->flr_flags & IPV6_FL_F_REFLECT) {
622		if (net->ipv6.sysctl.flowlabel_consistency) {
623			net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
624			return -EPERM;
625		}
626
627		if (sk->sk_protocol != IPPROTO_TCP)
628			return -ENOPROTOOPT;
629		inet6_set_bit(REPFLOW, sk);
630		return 0;
631	}
632
633	if (freq->flr_label & ~IPV6_FLOWLABEL_MASK)
634		return -EINVAL;
635	if (net->ipv6.sysctl.flowlabel_state_ranges &&
636	    (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
637		return -ERANGE;
638
639	fl = fl_create(net, sk, freq, optval, optlen, &err);
640	if (!fl)
641		return err;
642
643	sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
644
645	if (freq->flr_label) {
646		err = -EEXIST;
647		rcu_read_lock();
648		for_each_sk_fl_rcu(np, sfl) {
649			if (sfl->fl->label == freq->flr_label) {
650				if (freq->flr_flags & IPV6_FL_F_EXCL) {
651					rcu_read_unlock();
652					goto done;
653				}
654				fl1 = sfl->fl;
655				if (!atomic_inc_not_zero(&fl1->users))
656					fl1 = NULL;
657				break;
658			}
659		}
660		rcu_read_unlock();
661
662		if (!fl1)
663			fl1 = fl_lookup(net, freq->flr_label);
664		if (fl1) {
665recheck:
666			err = -EEXIST;
667			if (freq->flr_flags&IPV6_FL_F_EXCL)
668				goto release;
669			err = -EPERM;
670			if (fl1->share == IPV6_FL_S_EXCL ||
671			    fl1->share != fl->share ||
672			    ((fl1->share == IPV6_FL_S_PROCESS) &&
673			     (fl1->owner.pid != fl->owner.pid)) ||
674			    ((fl1->share == IPV6_FL_S_USER) &&
675			     !uid_eq(fl1->owner.uid, fl->owner.uid)))
676				goto release;
677
678			err = -ENOMEM;
679			if (!sfl1)
680				goto release;
681			if (fl->linger > fl1->linger)
682				fl1->linger = fl->linger;
683			if ((long)(fl->expires - fl1->expires) > 0)
684				fl1->expires = fl->expires;
685			fl_link(np, sfl1, fl1);
686			fl_free(fl);
687			return 0;
688
689release:
690			fl_release(fl1);
691			goto done;
692		}
693	}
694	err = -ENOENT;
695	if (!(freq->flr_flags & IPV6_FL_F_CREATE))
696		goto done;
697
698	err = -ENOMEM;
699	if (!sfl1)
700		goto done;
701
702	err = mem_check(sk);
703	if (err != 0)
704		goto done;
705
706	fl1 = fl_intern(net, fl, freq->flr_label);
707	if (fl1)
708		goto recheck;
709
710	if (!freq->flr_label) {
711		size_t offset = offsetof(struct in6_flowlabel_req, flr_label);
712
713		if (copy_to_sockptr_offset(optval, offset, &fl->label,
714				sizeof(fl->label))) {
715			/* Intentionally ignore fault. */
716		}
717	}
718
719	fl_link(np, sfl1, fl);
720	return 0;
721done:
722	fl_free(fl);
723	kfree(sfl1);
724	return err;
725}
726
727int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen)
728{
729	struct in6_flowlabel_req freq;
730
731	if (optlen < sizeof(freq))
732		return -EINVAL;
733	if (copy_from_sockptr(&freq, optval, sizeof(freq)))
734		return -EFAULT;
735
736	switch (freq.flr_action) {
737	case IPV6_FL_A_PUT:
738		return ipv6_flowlabel_put(sk, &freq);
739	case IPV6_FL_A_RENEW:
740		return ipv6_flowlabel_renew(sk, &freq);
741	case IPV6_FL_A_GET:
742		return ipv6_flowlabel_get(sk, &freq, optval, optlen);
743	default:
744		return -EINVAL;
745	}
746}
747
748#ifdef CONFIG_PROC_FS
749
750struct ip6fl_iter_state {
751	struct seq_net_private p;
752	struct pid_namespace *pid_ns;
753	int bucket;
754};
755
756#define ip6fl_seq_private(seq)	((struct ip6fl_iter_state *)(seq)->private)
757
758static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
759{
760	struct ip6_flowlabel *fl = NULL;
761	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
762	struct net *net = seq_file_net(seq);
763
764	for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
765		for_each_fl_rcu(state->bucket, fl) {
766			if (net_eq(fl->fl_net, net))
767				goto out;
768		}
769	}
770	fl = NULL;
771out:
772	return fl;
773}
774
775static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
776{
777	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
778	struct net *net = seq_file_net(seq);
779
780	for_each_fl_continue_rcu(fl) {
781		if (net_eq(fl->fl_net, net))
782			goto out;
783	}
784
785try_again:
786	if (++state->bucket <= FL_HASH_MASK) {
787		for_each_fl_rcu(state->bucket, fl) {
788			if (net_eq(fl->fl_net, net))
789				goto out;
790		}
791		goto try_again;
792	}
793	fl = NULL;
794
795out:
796	return fl;
797}
798
799static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
800{
801	struct ip6_flowlabel *fl = ip6fl_get_first(seq);
802	if (fl)
803		while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
804			--pos;
805	return pos ? NULL : fl;
806}
807
808static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
809	__acquires(RCU)
810{
811	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
812
813	state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb);
814
815	rcu_read_lock();
816	return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
817}
818
819static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
820{
821	struct ip6_flowlabel *fl;
822
823	if (v == SEQ_START_TOKEN)
824		fl = ip6fl_get_first(seq);
825	else
826		fl = ip6fl_get_next(seq, v);
827	++*pos;
828	return fl;
829}
830
831static void ip6fl_seq_stop(struct seq_file *seq, void *v)
832	__releases(RCU)
833{
834	rcu_read_unlock();
835}
836
837static int ip6fl_seq_show(struct seq_file *seq, void *v)
838{
839	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
840	if (v == SEQ_START_TOKEN) {
841		seq_puts(seq, "Label S Owner  Users  Linger Expires  Dst                              Opt\n");
842	} else {
843		struct ip6_flowlabel *fl = v;
844		seq_printf(seq,
845			   "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
846			   (unsigned int)ntohl(fl->label),
847			   fl->share,
848			   ((fl->share == IPV6_FL_S_PROCESS) ?
849			    pid_nr_ns(fl->owner.pid, state->pid_ns) :
850			    ((fl->share == IPV6_FL_S_USER) ?
851			     from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
852			     0)),
853			   atomic_read(&fl->users),
854			   fl->linger/HZ,
855			   (long)(fl->expires - jiffies)/HZ,
856			   &fl->dst,
857			   fl->opt ? fl->opt->opt_nflen : 0);
858	}
859	return 0;
860}
861
862static const struct seq_operations ip6fl_seq_ops = {
863	.start	=	ip6fl_seq_start,
864	.next	=	ip6fl_seq_next,
865	.stop	=	ip6fl_seq_stop,
866	.show	=	ip6fl_seq_show,
867};
868
869static int __net_init ip6_flowlabel_proc_init(struct net *net)
870{
871	if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net,
872			&ip6fl_seq_ops, sizeof(struct ip6fl_iter_state)))
873		return -ENOMEM;
874	return 0;
875}
876
877static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
878{
879	remove_proc_entry("ip6_flowlabel", net->proc_net);
880}
881#else
882static inline int ip6_flowlabel_proc_init(struct net *net)
883{
884	return 0;
885}
886static inline void ip6_flowlabel_proc_fini(struct net *net)
887{
888}
889#endif
890
891static void __net_exit ip6_flowlabel_net_exit(struct net *net)
892{
893	ip6_fl_purge(net);
894	ip6_flowlabel_proc_fini(net);
895}
896
897static struct pernet_operations ip6_flowlabel_net_ops = {
898	.init = ip6_flowlabel_proc_init,
899	.exit = ip6_flowlabel_net_exit,
900};
901
902int ip6_flowlabel_init(void)
903{
904	return register_pernet_subsys(&ip6_flowlabel_net_ops);
905}
906
907void ip6_flowlabel_cleanup(void)
908{
909	static_key_deferred_flush(&ipv6_flowlabel_exclusive);
910	del_timer(&ip6_fl_gc_timer);
911	unregister_pernet_subsys(&ip6_flowlabel_net_ops);
912}
913