1/*
2 * net/sched/cls_rsvp.h	Template file for RSVPv[46] classifiers.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
12/*
13   Comparing to general packet classification problem,
14   RSVP needs only sevaral relatively simple rules:
15
16   * (dst, protocol) are always specified,
17     so that we are able to hash them.
18   * src may be exact, or may be wildcard, so that
19     we can keep a hash table plus one wildcard entry.
20   * source port (or flow label) is important only if src is given.
21
22   IMPLEMENTATION.
23
24   We use a two level hash table: The top level is keyed by
25   destination address and protocol ID, every bucket contains a list
26   of "rsvp sessions", identified by destination address, protocol and
27   DPI(="Destination Port ID"): triple (key, mask, offset).
28
29   Every bucket has a smaller hash table keyed by source address
30   (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31   Every bucket is again a list of "RSVP flows", selected by
32   source address and SPI(="Source Port ID" here rather than
33   "security parameter index"): triple (key, mask, offset).
34
35
36   NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37   and all fragmented packets go to the best-effort traffic class.
38
39
40   NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41   only one "Generalized Port Identifier". So that for classic
42   ah, esp (and udp,tcp) both *pi should coincide or one of them
43   should be wildcard.
44
45   At first sight, this redundancy is just a waste of CPU
46   resources. But DPI and SPI add the possibility to assign different
47   priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50   NOTE 3. One complication is the case of tunneled packets.
51   We implement it as following: if the first lookup
52   matches a special session with "tunnelhdr" value not zero,
53   flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54   In this case, we pull tunnelhdr bytes and restart lookup
55   with tunnel ID added to the list of keys. Simple and stupid 8)8)
56   It's enough for PIMREG and IPIP.
57
58
59   NOTE 4. Two GPIs make it possible to parse even GRE packets.
60   F.e. DPI can select ETH_P_IP (and necessary flags to make
61   tunnelhdr correct) in GRE protocol field and SPI matches
62   GRE key. Is it not nice? 8)8)
63
64
65   Well, as result, despite its simplicity, we get a pretty
66   powerful classification engine.  */
67
68
69struct rsvp_head
70{
71	u32			tmap[256/32];
72	u32			hgenerator;
73	u8			tgenerator;
74	struct rsvp_session	*ht[256];
75};
76
77struct rsvp_session
78{
79	struct rsvp_session	*next;
80	__be32			dst[RSVP_DST_LEN];
81	struct tc_rsvp_gpi 	dpi;
82	u8			protocol;
83	u8			tunnelid;
84	/* 16 (src,sport) hash slots, and one wildcard source slot */
85	struct rsvp_filter	*ht[16+1];
86};
87
88
89struct rsvp_filter
90{
91	struct rsvp_filter	*next;
92	__be32			src[RSVP_DST_LEN];
93	struct tc_rsvp_gpi	spi;
94	u8			tunnelhdr;
95
96	struct tcf_result	res;
97	struct tcf_exts		exts;
98
99	u32			handle;
100	struct rsvp_session	*sess;
101};
102
103static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
104{
105	unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
106	h ^= h>>16;
107	h ^= h>>8;
108	return (h ^ protocol ^ tunnelid) & 0xFF;
109}
110
111static __inline__ unsigned hash_src(__be32 *src)
112{
113	unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
114	h ^= h>>16;
115	h ^= h>>8;
116	h ^= h>>4;
117	return h & 0xF;
118}
119
120static struct tcf_ext_map rsvp_ext_map = {
121	.police = TCA_RSVP_POLICE,
122	.action = TCA_RSVP_ACT
123};
124
125#define RSVP_APPLY_RESULT()				\
126{							\
127	int r = tcf_exts_exec(skb, &f->exts, res);	\
128	if (r < 0)					\
129		continue;				\
130	else if (r > 0)					\
131		return r;				\
132}
133
134static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
135			 struct tcf_result *res)
136{
137	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
138	struct rsvp_session *s;
139	struct rsvp_filter *f;
140	unsigned h1, h2;
141	__be32 *dst, *src;
142	u8 protocol;
143	u8 tunnelid = 0;
144	u8 *xprt;
145#if RSVP_DST_LEN == 4
146	struct ipv6hdr *nhptr;
147
148	if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
149		return -1;
150	nhptr = ipv6_hdr(skb);
151#else
152	struct iphdr *nhptr;
153
154	if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
155		return -1;
156	nhptr = ip_hdr(skb);
157#endif
158
159restart:
160
161#if RSVP_DST_LEN == 4
162	src = &nhptr->saddr.s6_addr32[0];
163	dst = &nhptr->daddr.s6_addr32[0];
164	protocol = nhptr->nexthdr;
165	xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
166#else
167	src = &nhptr->saddr;
168	dst = &nhptr->daddr;
169	protocol = nhptr->protocol;
170	xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
171	if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
172		return -1;
173#endif
174
175	h1 = hash_dst(dst, protocol, tunnelid);
176	h2 = hash_src(src);
177
178	for (s = sht[h1]; s; s = s->next) {
179		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
180		    protocol == s->protocol &&
181		    !(s->dpi.mask &
182		      (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) &&
183#if RSVP_DST_LEN == 4
184		    dst[0] == s->dst[0] &&
185		    dst[1] == s->dst[1] &&
186		    dst[2] == s->dst[2] &&
187#endif
188		    tunnelid == s->tunnelid) {
189
190			for (f = s->ht[h2]; f; f = f->next) {
191				if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
192				    !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
193#if RSVP_DST_LEN == 4
194				    &&
195				    src[0] == f->src[0] &&
196				    src[1] == f->src[1] &&
197				    src[2] == f->src[2]
198#endif
199				    ) {
200					*res = f->res;
201					RSVP_APPLY_RESULT();
202
203matched:
204					if (f->tunnelhdr == 0)
205						return 0;
206
207					tunnelid = f->res.classid;
208					nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
209					goto restart;
210				}
211			}
212
213			/* And wildcard bucket... */
214			for (f = s->ht[16]; f; f = f->next) {
215				*res = f->res;
216				RSVP_APPLY_RESULT();
217				goto matched;
218			}
219			return -1;
220		}
221	}
222	return -1;
223}
224
225static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
226{
227	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
228	struct rsvp_session *s;
229	struct rsvp_filter *f;
230	unsigned h1 = handle&0xFF;
231	unsigned h2 = (handle>>8)&0xFF;
232
233	if (h2 > 16)
234		return 0;
235
236	for (s = sht[h1]; s; s = s->next) {
237		for (f = s->ht[h2]; f; f = f->next) {
238			if (f->handle == handle)
239				return (unsigned long)f;
240		}
241	}
242	return 0;
243}
244
245static void rsvp_put(struct tcf_proto *tp, unsigned long f)
246{
247}
248
249static int rsvp_init(struct tcf_proto *tp)
250{
251	struct rsvp_head *data;
252
253	data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
254	if (data) {
255		tp->root = data;
256		return 0;
257	}
258	return -ENOBUFS;
259}
260
261static inline void
262rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
263{
264	tcf_unbind_filter(tp, &f->res);
265	tcf_exts_destroy(tp, &f->exts);
266	kfree(f);
267}
268
269static void rsvp_destroy(struct tcf_proto *tp)
270{
271	struct rsvp_head *data = xchg(&tp->root, NULL);
272	struct rsvp_session **sht;
273	int h1, h2;
274
275	if (data == NULL)
276		return;
277
278	sht = data->ht;
279
280	for (h1=0; h1<256; h1++) {
281		struct rsvp_session *s;
282
283		while ((s = sht[h1]) != NULL) {
284			sht[h1] = s->next;
285
286			for (h2=0; h2<=16; h2++) {
287				struct rsvp_filter *f;
288
289				while ((f = s->ht[h2]) != NULL) {
290					s->ht[h2] = f->next;
291					rsvp_delete_filter(tp, f);
292				}
293			}
294			kfree(s);
295		}
296	}
297	kfree(data);
298}
299
300static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
301{
302	struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
303	unsigned h = f->handle;
304	struct rsvp_session **sp;
305	struct rsvp_session *s = f->sess;
306	int i;
307
308	for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
309		if (*fp == f) {
310			tcf_tree_lock(tp);
311			*fp = f->next;
312			tcf_tree_unlock(tp);
313			rsvp_delete_filter(tp, f);
314
315			/* Strip tree */
316
317			for (i=0; i<=16; i++)
318				if (s->ht[i])
319					return 0;
320
321			/* OK, session has no flows */
322			for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
323			     *sp; sp = &(*sp)->next) {
324				if (*sp == s) {
325					tcf_tree_lock(tp);
326					*sp = s->next;
327					tcf_tree_unlock(tp);
328
329					kfree(s);
330					return 0;
331				}
332			}
333
334			return 0;
335		}
336	}
337	return 0;
338}
339
340static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
341{
342	struct rsvp_head *data = tp->root;
343	int i = 0xFFFF;
344
345	while (i-- > 0) {
346		u32 h;
347		if ((data->hgenerator += 0x10000) == 0)
348			data->hgenerator = 0x10000;
349		h = data->hgenerator|salt;
350		if (rsvp_get(tp, h) == 0)
351			return h;
352	}
353	return 0;
354}
355
356static int tunnel_bts(struct rsvp_head *data)
357{
358	int n = data->tgenerator>>5;
359	u32 b = 1<<(data->tgenerator&0x1F);
360
361	if (data->tmap[n]&b)
362		return 0;
363	data->tmap[n] |= b;
364	return 1;
365}
366
367static void tunnel_recycle(struct rsvp_head *data)
368{
369	struct rsvp_session **sht = data->ht;
370	u32 tmap[256/32];
371	int h1, h2;
372
373	memset(tmap, 0, sizeof(tmap));
374
375	for (h1=0; h1<256; h1++) {
376		struct rsvp_session *s;
377		for (s = sht[h1]; s; s = s->next) {
378			for (h2=0; h2<=16; h2++) {
379				struct rsvp_filter *f;
380
381				for (f = s->ht[h2]; f; f = f->next) {
382					if (f->tunnelhdr == 0)
383						continue;
384					data->tgenerator = f->res.classid;
385					tunnel_bts(data);
386				}
387			}
388		}
389	}
390
391	memcpy(data->tmap, tmap, sizeof(tmap));
392}
393
394static u32 gen_tunnel(struct rsvp_head *data)
395{
396	int i, k;
397
398	for (k=0; k<2; k++) {
399		for (i=255; i>0; i--) {
400			if (++data->tgenerator == 0)
401				data->tgenerator = 1;
402			if (tunnel_bts(data))
403				return data->tgenerator;
404		}
405		tunnel_recycle(data);
406	}
407	return 0;
408}
409
410static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
411	[TCA_RSVP_CLASSID]	= { .type = NLA_U32 },
412	[TCA_RSVP_DST]		= { .type = NLA_BINARY,
413				    .len = RSVP_DST_LEN * sizeof(u32) },
414	[TCA_RSVP_SRC]		= { .type = NLA_BINARY,
415				    .len = RSVP_DST_LEN * sizeof(u32) },
416	[TCA_RSVP_PINFO]	= { .len = sizeof(struct tc_rsvp_pinfo) },
417};
418
419static int rsvp_change(struct tcf_proto *tp, unsigned long base,
420		       u32 handle,
421		       struct nlattr **tca,
422		       unsigned long *arg)
423{
424	struct rsvp_head *data = tp->root;
425	struct rsvp_filter *f, **fp;
426	struct rsvp_session *s, **sp;
427	struct tc_rsvp_pinfo *pinfo = NULL;
428	struct nlattr *opt = tca[TCA_OPTIONS-1];
429	struct nlattr *tb[TCA_RSVP_MAX + 1];
430	struct tcf_exts e;
431	unsigned h1, h2;
432	__be32 *dst;
433	int err;
434
435	if (opt == NULL)
436		return handle ? -EINVAL : 0;
437
438	err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
439	if (err < 0)
440		return err;
441
442	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
443	if (err < 0)
444		return err;
445
446	if ((f = (struct rsvp_filter*)*arg) != NULL) {
447		/* Node exists: adjust only classid */
448
449		if (f->handle != handle && handle)
450			goto errout2;
451		if (tb[TCA_RSVP_CLASSID-1]) {
452			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
453			tcf_bind_filter(tp, &f->res, base);
454		}
455
456		tcf_exts_change(tp, &f->exts, &e);
457		return 0;
458	}
459
460	/* Now more serious part... */
461	err = -EINVAL;
462	if (handle)
463		goto errout2;
464	if (tb[TCA_RSVP_DST-1] == NULL)
465		goto errout2;
466
467	err = -ENOBUFS;
468	f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
469	if (f == NULL)
470		goto errout2;
471
472	h2 = 16;
473	if (tb[TCA_RSVP_SRC-1]) {
474		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
475		h2 = hash_src(f->src);
476	}
477	if (tb[TCA_RSVP_PINFO-1]) {
478		pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
479		f->spi = pinfo->spi;
480		f->tunnelhdr = pinfo->tunnelhdr;
481	}
482	if (tb[TCA_RSVP_CLASSID-1])
483		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
484
485	dst = nla_data(tb[TCA_RSVP_DST-1]);
486	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
487
488	err = -ENOMEM;
489	if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
490		goto errout;
491
492	if (f->tunnelhdr) {
493		err = -EINVAL;
494		if (f->res.classid > 255)
495			goto errout;
496
497		err = -ENOMEM;
498		if (f->res.classid == 0 &&
499		    (f->res.classid = gen_tunnel(data)) == 0)
500			goto errout;
501	}
502
503	for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
504		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
505		    pinfo && pinfo->protocol == s->protocol &&
506		    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
507#if RSVP_DST_LEN == 4
508		    dst[0] == s->dst[0] &&
509		    dst[1] == s->dst[1] &&
510		    dst[2] == s->dst[2] &&
511#endif
512		    pinfo->tunnelid == s->tunnelid) {
513
514insert:
515			/* OK, we found appropriate session */
516
517			fp = &s->ht[h2];
518
519			f->sess = s;
520			if (f->tunnelhdr == 0)
521				tcf_bind_filter(tp, &f->res, base);
522
523			tcf_exts_change(tp, &f->exts, &e);
524
525			for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
526				if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
527					break;
528			f->next = *fp;
529			wmb();
530			*fp = f;
531
532			*arg = (unsigned long)f;
533			return 0;
534		}
535	}
536
537	/* No session found. Create new one. */
538
539	err = -ENOBUFS;
540	s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
541	if (s == NULL)
542		goto errout;
543	memcpy(s->dst, dst, sizeof(s->dst));
544
545	if (pinfo) {
546		s->dpi = pinfo->dpi;
547		s->protocol = pinfo->protocol;
548		s->tunnelid = pinfo->tunnelid;
549	}
550	for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
551		if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
552			break;
553	}
554	s->next = *sp;
555	wmb();
556	*sp = s;
557
558	goto insert;
559
560errout:
561	kfree(f);
562errout2:
563	tcf_exts_destroy(tp, &e);
564	return err;
565}
566
567static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
568{
569	struct rsvp_head *head = tp->root;
570	unsigned h, h1;
571
572	if (arg->stop)
573		return;
574
575	for (h = 0; h < 256; h++) {
576		struct rsvp_session *s;
577
578		for (s = head->ht[h]; s; s = s->next) {
579			for (h1 = 0; h1 <= 16; h1++) {
580				struct rsvp_filter *f;
581
582				for (f = s->ht[h1]; f; f = f->next) {
583					if (arg->count < arg->skip) {
584						arg->count++;
585						continue;
586					}
587					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
588						arg->stop = 1;
589						return;
590					}
591					arg->count++;
592				}
593			}
594		}
595	}
596}
597
598static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
599		     struct sk_buff *skb, struct tcmsg *t)
600{
601	struct rsvp_filter *f = (struct rsvp_filter*)fh;
602	struct rsvp_session *s;
603	unsigned char *b = skb_tail_pointer(skb);
604	struct nlattr *nest;
605	struct tc_rsvp_pinfo pinfo;
606
607	if (f == NULL)
608		return skb->len;
609	s = f->sess;
610
611	t->tcm_handle = f->handle;
612
613	nest = nla_nest_start(skb, TCA_OPTIONS);
614	if (nest == NULL)
615		goto nla_put_failure;
616
617	NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
618	pinfo.dpi = s->dpi;
619	pinfo.spi = f->spi;
620	pinfo.protocol = s->protocol;
621	pinfo.tunnelid = s->tunnelid;
622	pinfo.tunnelhdr = f->tunnelhdr;
623	pinfo.pad = 0;
624	NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
625	if (f->res.classid)
626		NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
627	if (((f->handle>>8)&0xFF) != 16)
628		NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
629
630	if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
631		goto nla_put_failure;
632
633	nla_nest_end(skb, nest);
634
635	if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
636		goto nla_put_failure;
637	return skb->len;
638
639nla_put_failure:
640	nlmsg_trim(skb, b);
641	return -1;
642}
643
644static struct tcf_proto_ops RSVP_OPS = {
645	.next		=	NULL,
646	.kind		=	RSVP_ID,
647	.classify	=	rsvp_classify,
648	.init		=	rsvp_init,
649	.destroy	=	rsvp_destroy,
650	.get		=	rsvp_get,
651	.put		=	rsvp_put,
652	.change		=	rsvp_change,
653	.delete		=	rsvp_delete,
654	.walk		=	rsvp_walk,
655	.dump		=	rsvp_dump,
656	.owner		=	THIS_MODULE,
657};
658
659static int __init init_rsvp(void)
660{
661	return register_tcf_proto_ops(&RSVP_OPS);
662}
663
664static void __exit exit_rsvp(void)
665{
666	unregister_tcf_proto_ops(&RSVP_OPS);
667}
668
669module_init(init_rsvp)
670module_exit(exit_rsvp)
671