1/*
2 * net/sched/cls_rsvp.h	Template file for RSVPv[46] classifiers.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
12/*
13   Comparing to general packet classification problem,
14   RSVP needs only sevaral relatively simple rules:
15
16   * (dst, protocol) are always specified,
17     so that we are able to hash them.
18   * src may be exact, or may be wildcard, so that
19     we can keep a hash table plus one wildcard entry.
20   * source port (or flow label) is important only if src is given.
21
22   IMPLEMENTATION.
23
24   We use a two level hash table: The top level is keyed by
25   destination address and protocol ID, every bucket contains a list
26   of "rsvp sessions", identified by destination address, protocol and
27   DPI(="Destination Port ID"): triple (key, mask, offset).
28
29   Every bucket has a smaller hash table keyed by source address
30   (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31   Every bucket is again a list of "RSVP flows", selected by
32   source address and SPI(="Source Port ID" here rather than
33   "security parameter index"): triple (key, mask, offset).
34
35
36   NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37   and all fragmented packets go to the best-effort traffic class.
38
39
40   NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41   only one "Generalized Port Identifier". So that for classic
42   ah, esp (and udp,tcp) both *pi should coincide or one of them
43   should be wildcard.
44
45   At first sight, this redundancy is just a waste of CPU
46   resources. But DPI and SPI add the possibility to assign different
47   priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50   NOTE 3. One complication is the case of tunneled packets.
51   We implement it as following: if the first lookup
52   matches a special session with "tunnelhdr" value not zero,
53   flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54   In this case, we pull tunnelhdr bytes and restart lookup
55   with tunnel ID added to the list of keys. Simple and stupid 8)8)
56   It's enough for PIMREG and IPIP.
57
58
59   NOTE 4. Two GPIs make it possible to parse even GRE packets.
60   F.e. DPI can select ETH_P_IP (and necessary flags to make
61   tunnelhdr correct) in GRE protocol field and SPI matches
62   GRE key. Is it not nice? 8)8)
63
64
65   Well, as result, despite its simplicity, we get a pretty
66   powerful classification engine.  */
67
68
69struct rsvp_head
70{
71	u32			tmap[256/32];
72	u32			hgenerator;
73	u8			tgenerator;
74	struct rsvp_session	*ht[256];
75};
76
77struct rsvp_session
78{
79	struct rsvp_session	*next;
80	__be32			dst[RSVP_DST_LEN];
81	struct tc_rsvp_gpi 	dpi;
82	u8			protocol;
83	u8			tunnelid;
84	/* 16 (src,sport) hash slots, and one wildcard source slot */
85	struct rsvp_filter	*ht[16+1];
86};
87
88
89struct rsvp_filter
90{
91	struct rsvp_filter	*next;
92	__be32			src[RSVP_DST_LEN];
93	struct tc_rsvp_gpi	spi;
94	u8			tunnelhdr;
95
96	struct tcf_result	res;
97	struct tcf_exts		exts;
98
99	u32			handle;
100	struct rsvp_session	*sess;
101};
102
103static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
104{
105	unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
106	h ^= h>>16;
107	h ^= h>>8;
108	return (h ^ protocol ^ tunnelid) & 0xFF;
109}
110
111static __inline__ unsigned hash_src(__be32 *src)
112{
113	unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
114	h ^= h>>16;
115	h ^= h>>8;
116	h ^= h>>4;
117	return h & 0xF;
118}
119
120static struct tcf_ext_map rsvp_ext_map = {
121	.police = TCA_RSVP_POLICE,
122	.action = TCA_RSVP_ACT
123};
124
125#define RSVP_APPLY_RESULT()				\
126{							\
127	int r = tcf_exts_exec(skb, &f->exts, res);	\
128	if (r < 0)					\
129		continue;				\
130	else if (r > 0)					\
131		return r;				\
132}
133
134static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
135			 struct tcf_result *res)
136{
137	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
138	struct rsvp_session *s;
139	struct rsvp_filter *f;
140	unsigned h1, h2;
141	__be32 *dst, *src;
142	u8 protocol;
143	u8 tunnelid = 0;
144	u8 *xprt;
145#if RSVP_DST_LEN == 4
146	struct ipv6hdr *nhptr = ipv6_hdr(skb);
147#else
148	struct iphdr *nhptr = ip_hdr(skb);
149#endif
150
151restart:
152
153#if RSVP_DST_LEN == 4
154	src = &nhptr->saddr.s6_addr32[0];
155	dst = &nhptr->daddr.s6_addr32[0];
156	protocol = nhptr->nexthdr;
157	xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
158#else
159	src = &nhptr->saddr;
160	dst = &nhptr->daddr;
161	protocol = nhptr->protocol;
162	xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
163	if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
164		return -1;
165#endif
166
167	h1 = hash_dst(dst, protocol, tunnelid);
168	h2 = hash_src(src);
169
170	for (s = sht[h1]; s; s = s->next) {
171		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
172		    protocol == s->protocol &&
173		    !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
174#if RSVP_DST_LEN == 4
175		    && dst[0] == s->dst[0]
176		    && dst[1] == s->dst[1]
177		    && dst[2] == s->dst[2]
178#endif
179		    && tunnelid == s->tunnelid) {
180
181			for (f = s->ht[h2]; f; f = f->next) {
182				if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
183				    !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
184#if RSVP_DST_LEN == 4
185				    && src[0] == f->src[0]
186				    && src[1] == f->src[1]
187				    && src[2] == f->src[2]
188#endif
189				    ) {
190					*res = f->res;
191					RSVP_APPLY_RESULT();
192
193matched:
194					if (f->tunnelhdr == 0)
195						return 0;
196
197					tunnelid = f->res.classid;
198					nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
199					goto restart;
200				}
201			}
202
203			/* And wildcard bucket... */
204			for (f = s->ht[16]; f; f = f->next) {
205				*res = f->res;
206				RSVP_APPLY_RESULT();
207				goto matched;
208			}
209			return -1;
210		}
211	}
212	return -1;
213}
214
215static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
216{
217	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
218	struct rsvp_session *s;
219	struct rsvp_filter *f;
220	unsigned h1 = handle&0xFF;
221	unsigned h2 = (handle>>8)&0xFF;
222
223	if (h2 > 16)
224		return 0;
225
226	for (s = sht[h1]; s; s = s->next) {
227		for (f = s->ht[h2]; f; f = f->next) {
228			if (f->handle == handle)
229				return (unsigned long)f;
230		}
231	}
232	return 0;
233}
234
235static void rsvp_put(struct tcf_proto *tp, unsigned long f)
236{
237}
238
239static int rsvp_init(struct tcf_proto *tp)
240{
241	struct rsvp_head *data;
242
243	data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
244	if (data) {
245		tp->root = data;
246		return 0;
247	}
248	return -ENOBUFS;
249}
250
251static inline void
252rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
253{
254	tcf_unbind_filter(tp, &f->res);
255	tcf_exts_destroy(tp, &f->exts);
256	kfree(f);
257}
258
259static void rsvp_destroy(struct tcf_proto *tp)
260{
261	struct rsvp_head *data = xchg(&tp->root, NULL);
262	struct rsvp_session **sht;
263	int h1, h2;
264
265	if (data == NULL)
266		return;
267
268	sht = data->ht;
269
270	for (h1=0; h1<256; h1++) {
271		struct rsvp_session *s;
272
273		while ((s = sht[h1]) != NULL) {
274			sht[h1] = s->next;
275
276			for (h2=0; h2<=16; h2++) {
277				struct rsvp_filter *f;
278
279				while ((f = s->ht[h2]) != NULL) {
280					s->ht[h2] = f->next;
281					rsvp_delete_filter(tp, f);
282				}
283			}
284			kfree(s);
285		}
286	}
287	kfree(data);
288}
289
290static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
291{
292	struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
293	unsigned h = f->handle;
294	struct rsvp_session **sp;
295	struct rsvp_session *s = f->sess;
296	int i;
297
298	for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
299		if (*fp == f) {
300			tcf_tree_lock(tp);
301			*fp = f->next;
302			tcf_tree_unlock(tp);
303			rsvp_delete_filter(tp, f);
304
305			/* Strip tree */
306
307			for (i=0; i<=16; i++)
308				if (s->ht[i])
309					return 0;
310
311			/* OK, session has no flows */
312			for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
313			     *sp; sp = &(*sp)->next) {
314				if (*sp == s) {
315					tcf_tree_lock(tp);
316					*sp = s->next;
317					tcf_tree_unlock(tp);
318
319					kfree(s);
320					return 0;
321				}
322			}
323
324			return 0;
325		}
326	}
327	return 0;
328}
329
330static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
331{
332	struct rsvp_head *data = tp->root;
333	int i = 0xFFFF;
334
335	while (i-- > 0) {
336		u32 h;
337		if ((data->hgenerator += 0x10000) == 0)
338			data->hgenerator = 0x10000;
339		h = data->hgenerator|salt;
340		if (rsvp_get(tp, h) == 0)
341			return h;
342	}
343	return 0;
344}
345
346static int tunnel_bts(struct rsvp_head *data)
347{
348	int n = data->tgenerator>>5;
349	u32 b = 1<<(data->tgenerator&0x1F);
350
351	if (data->tmap[n]&b)
352		return 0;
353	data->tmap[n] |= b;
354	return 1;
355}
356
357static void tunnel_recycle(struct rsvp_head *data)
358{
359	struct rsvp_session **sht = data->ht;
360	u32 tmap[256/32];
361	int h1, h2;
362
363	memset(tmap, 0, sizeof(tmap));
364
365	for (h1=0; h1<256; h1++) {
366		struct rsvp_session *s;
367		for (s = sht[h1]; s; s = s->next) {
368			for (h2=0; h2<=16; h2++) {
369				struct rsvp_filter *f;
370
371				for (f = s->ht[h2]; f; f = f->next) {
372					if (f->tunnelhdr == 0)
373						continue;
374					data->tgenerator = f->res.classid;
375					tunnel_bts(data);
376				}
377			}
378		}
379	}
380
381	memcpy(data->tmap, tmap, sizeof(tmap));
382}
383
384static u32 gen_tunnel(struct rsvp_head *data)
385{
386	int i, k;
387
388	for (k=0; k<2; k++) {
389		for (i=255; i>0; i--) {
390			if (++data->tgenerator == 0)
391				data->tgenerator = 1;
392			if (tunnel_bts(data))
393				return data->tgenerator;
394		}
395		tunnel_recycle(data);
396	}
397	return 0;
398}
399
400static int rsvp_change(struct tcf_proto *tp, unsigned long base,
401		       u32 handle,
402		       struct rtattr **tca,
403		       unsigned long *arg)
404{
405	struct rsvp_head *data = tp->root;
406	struct rsvp_filter *f, **fp;
407	struct rsvp_session *s, **sp;
408	struct tc_rsvp_pinfo *pinfo = NULL;
409	struct rtattr *opt = tca[TCA_OPTIONS-1];
410	struct rtattr *tb[TCA_RSVP_MAX];
411	struct tcf_exts e;
412	unsigned h1, h2;
413	__be32 *dst;
414	int err;
415
416	if (opt == NULL)
417		return handle ? -EINVAL : 0;
418
419	if (rtattr_parse_nested(tb, TCA_RSVP_MAX, opt) < 0)
420		return -EINVAL;
421
422	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
423	if (err < 0)
424		return err;
425
426	if ((f = (struct rsvp_filter*)*arg) != NULL) {
427		/* Node exists: adjust only classid */
428
429		if (f->handle != handle && handle)
430			goto errout2;
431		if (tb[TCA_RSVP_CLASSID-1]) {
432			f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
433			tcf_bind_filter(tp, &f->res, base);
434		}
435
436		tcf_exts_change(tp, &f->exts, &e);
437		return 0;
438	}
439
440	/* Now more serious part... */
441	err = -EINVAL;
442	if (handle)
443		goto errout2;
444	if (tb[TCA_RSVP_DST-1] == NULL)
445		goto errout2;
446
447	err = -ENOBUFS;
448	f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
449	if (f == NULL)
450		goto errout2;
451
452	h2 = 16;
453	if (tb[TCA_RSVP_SRC-1]) {
454		err = -EINVAL;
455		if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
456			goto errout;
457		memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
458		h2 = hash_src(f->src);
459	}
460	if (tb[TCA_RSVP_PINFO-1]) {
461		err = -EINVAL;
462		if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
463			goto errout;
464		pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
465		f->spi = pinfo->spi;
466		f->tunnelhdr = pinfo->tunnelhdr;
467	}
468	if (tb[TCA_RSVP_CLASSID-1]) {
469		err = -EINVAL;
470		if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
471			goto errout;
472		f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
473	}
474
475	err = -EINVAL;
476	if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
477		goto errout;
478	dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
479	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
480
481	err = -ENOMEM;
482	if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
483		goto errout;
484
485	if (f->tunnelhdr) {
486		err = -EINVAL;
487		if (f->res.classid > 255)
488			goto errout;
489
490		err = -ENOMEM;
491		if (f->res.classid == 0 &&
492		    (f->res.classid = gen_tunnel(data)) == 0)
493			goto errout;
494	}
495
496	for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
497		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
498		    pinfo && pinfo->protocol == s->protocol &&
499		    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
500#if RSVP_DST_LEN == 4
501		    && dst[0] == s->dst[0]
502		    && dst[1] == s->dst[1]
503		    && dst[2] == s->dst[2]
504#endif
505		    && pinfo->tunnelid == s->tunnelid) {
506
507insert:
508			/* OK, we found appropriate session */
509
510			fp = &s->ht[h2];
511
512			f->sess = s;
513			if (f->tunnelhdr == 0)
514				tcf_bind_filter(tp, &f->res, base);
515
516			tcf_exts_change(tp, &f->exts, &e);
517
518			for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
519				if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
520					break;
521			f->next = *fp;
522			wmb();
523			*fp = f;
524
525			*arg = (unsigned long)f;
526			return 0;
527		}
528	}
529
530	/* No session found. Create new one. */
531
532	err = -ENOBUFS;
533	s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
534	if (s == NULL)
535		goto errout;
536	memcpy(s->dst, dst, sizeof(s->dst));
537
538	if (pinfo) {
539		s->dpi = pinfo->dpi;
540		s->protocol = pinfo->protocol;
541		s->tunnelid = pinfo->tunnelid;
542	}
543	for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
544		if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
545			break;
546	}
547	s->next = *sp;
548	wmb();
549	*sp = s;
550
551	goto insert;
552
553errout:
554	kfree(f);
555errout2:
556	tcf_exts_destroy(tp, &e);
557	return err;
558}
559
560static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
561{
562	struct rsvp_head *head = tp->root;
563	unsigned h, h1;
564
565	if (arg->stop)
566		return;
567
568	for (h = 0; h < 256; h++) {
569		struct rsvp_session *s;
570
571		for (s = head->ht[h]; s; s = s->next) {
572			for (h1 = 0; h1 <= 16; h1++) {
573				struct rsvp_filter *f;
574
575				for (f = s->ht[h1]; f; f = f->next) {
576					if (arg->count < arg->skip) {
577						arg->count++;
578						continue;
579					}
580					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
581						arg->stop = 1;
582						return;
583					}
584					arg->count++;
585				}
586			}
587		}
588	}
589}
590
591static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
592		     struct sk_buff *skb, struct tcmsg *t)
593{
594	struct rsvp_filter *f = (struct rsvp_filter*)fh;
595	struct rsvp_session *s;
596	unsigned char *b = skb_tail_pointer(skb);
597	struct rtattr *rta;
598	struct tc_rsvp_pinfo pinfo;
599
600	if (f == NULL)
601		return skb->len;
602	s = f->sess;
603
604	t->tcm_handle = f->handle;
605
606
607	rta = (struct rtattr*)b;
608	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
609
610	RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
611	pinfo.dpi = s->dpi;
612	pinfo.spi = f->spi;
613	pinfo.protocol = s->protocol;
614	pinfo.tunnelid = s->tunnelid;
615	pinfo.tunnelhdr = f->tunnelhdr;
616	pinfo.pad = 0;
617	RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
618	if (f->res.classid)
619		RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
620	if (((f->handle>>8)&0xFF) != 16)
621		RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
622
623	if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
624		goto rtattr_failure;
625
626	rta->rta_len = skb_tail_pointer(skb) - b;
627
628	if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
629		goto rtattr_failure;
630	return skb->len;
631
632rtattr_failure:
633	nlmsg_trim(skb, b);
634	return -1;
635}
636
637static struct tcf_proto_ops RSVP_OPS = {
638	.next		=	NULL,
639	.kind		=	RSVP_ID,
640	.classify	=	rsvp_classify,
641	.init		=	rsvp_init,
642	.destroy	=	rsvp_destroy,
643	.get		=	rsvp_get,
644	.put		=	rsvp_put,
645	.change		=	rsvp_change,
646	.delete		=	rsvp_delete,
647	.walk		=	rsvp_walk,
648	.dump		=	rsvp_dump,
649	.owner		=	THIS_MODULE,
650};
651
652static int __init init_rsvp(void)
653{
654	return register_tcf_proto_ops(&RSVP_OPS);
655}
656
657static void __exit exit_rsvp(void)
658{
659	unregister_tcf_proto_ops(&RSVP_OPS);
660}
661
662module_init(init_rsvp)
663module_exit(exit_rsvp)
664