1/*
2 * Linux Socket Filter - Kernel level socket filtering
3 *
4 * Author:
5 *     Jay Schulist <jschlst@samba.org>
6 *
7 * Based on the design of:
8 *     - The Berkeley Packet Filter
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Andi Kleen - Fix a few bad bugs and races.
16 * Kris Katterjohn - Added many additional checks in sk_chk_filter()
17 */
18
19#include <linux/module.h>
20#include <linux/types.h>
21#include <linux/mm.h>
22#include <linux/fcntl.h>
23#include <linux/socket.h>
24#include <linux/in.h>
25#include <linux/inet.h>
26#include <linux/netdevice.h>
27#include <linux/if_packet.h>
28#include <net/ip.h>
29#include <net/protocol.h>
30#include <linux/skbuff.h>
31#include <net/sock.h>
32#include <linux/errno.h>
33#include <linux/timer.h>
34#include <asm/system.h>
35#include <asm/uaccess.h>
36#include <asm/unaligned.h>
37#include <linux/filter.h>
38
39/* No hurry in this branch */
40static void *__load_pointer(struct sk_buff *skb, int k)
41{
42	u8 *ptr = NULL;
43
44	if (k >= SKF_NET_OFF)
45		ptr = skb_network_header(skb) + k - SKF_NET_OFF;
46	else if (k >= SKF_LL_OFF)
47		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
48
49	if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
50		return ptr;
51	return NULL;
52}
53
54static inline void *load_pointer(struct sk_buff *skb, int k,
55				 unsigned int size, void *buffer)
56{
57	if (k >= 0)
58		return skb_header_pointer(skb, k, size, buffer);
59	else {
60		if (k >= SKF_AD_OFF)
61			return NULL;
62		return __load_pointer(skb, k);
63	}
64}
65
66/**
67 *	sk_run_filter - run a filter on a socket
68 *	@skb: buffer to run the filter on
69 *	@filter: filter to apply
70 *	@flen: length of filter
71 *
72 * Decode and apply filter instructions to the skb->data.
73 * Return length to keep, 0 for none. skb is the data we are
74 * filtering, filter is the array of filter instructions, and
75 * len is the number of filter blocks in the array.
76 */
77unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
78{
79	struct sock_filter *fentry;	/* We walk down these */
80	void *ptr;
81	u32 A = 0;			/* Accumulator */
82	u32 X = 0;			/* Index Register */
83	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
84	u32 tmp;
85	int k;
86	int pc;
87
88	/*
89	 * Process array of filter instructions.
90	 */
91	for (pc = 0; pc < flen; pc++) {
92		fentry = &filter[pc];
93
94		switch (fentry->code) {
95		case BPF_ALU|BPF_ADD|BPF_X:
96			A += X;
97			continue;
98		case BPF_ALU|BPF_ADD|BPF_K:
99			A += fentry->k;
100			continue;
101		case BPF_ALU|BPF_SUB|BPF_X:
102			A -= X;
103			continue;
104		case BPF_ALU|BPF_SUB|BPF_K:
105			A -= fentry->k;
106			continue;
107		case BPF_ALU|BPF_MUL|BPF_X:
108			A *= X;
109			continue;
110		case BPF_ALU|BPF_MUL|BPF_K:
111			A *= fentry->k;
112			continue;
113		case BPF_ALU|BPF_DIV|BPF_X:
114			if (X == 0)
115				return 0;
116			A /= X;
117			continue;
118		case BPF_ALU|BPF_DIV|BPF_K:
119			A /= fentry->k;
120			continue;
121		case BPF_ALU|BPF_AND|BPF_X:
122			A &= X;
123			continue;
124		case BPF_ALU|BPF_AND|BPF_K:
125			A &= fentry->k;
126			continue;
127		case BPF_ALU|BPF_OR|BPF_X:
128			A |= X;
129			continue;
130		case BPF_ALU|BPF_OR|BPF_K:
131			A |= fentry->k;
132			continue;
133		case BPF_ALU|BPF_LSH|BPF_X:
134			A <<= X;
135			continue;
136		case BPF_ALU|BPF_LSH|BPF_K:
137			A <<= fentry->k;
138			continue;
139		case BPF_ALU|BPF_RSH|BPF_X:
140			A >>= X;
141			continue;
142		case BPF_ALU|BPF_RSH|BPF_K:
143			A >>= fentry->k;
144			continue;
145		case BPF_ALU|BPF_NEG:
146			A = -A;
147			continue;
148		case BPF_JMP|BPF_JA:
149			pc += fentry->k;
150			continue;
151		case BPF_JMP|BPF_JGT|BPF_K:
152			pc += (A > fentry->k) ? fentry->jt : fentry->jf;
153			continue;
154		case BPF_JMP|BPF_JGE|BPF_K:
155			pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
156			continue;
157		case BPF_JMP|BPF_JEQ|BPF_K:
158			pc += (A == fentry->k) ? fentry->jt : fentry->jf;
159			continue;
160		case BPF_JMP|BPF_JSET|BPF_K:
161			pc += (A & fentry->k) ? fentry->jt : fentry->jf;
162			continue;
163		case BPF_JMP|BPF_JGT|BPF_X:
164			pc += (A > X) ? fentry->jt : fentry->jf;
165			continue;
166		case BPF_JMP|BPF_JGE|BPF_X:
167			pc += (A >= X) ? fentry->jt : fentry->jf;
168			continue;
169		case BPF_JMP|BPF_JEQ|BPF_X:
170			pc += (A == X) ? fentry->jt : fentry->jf;
171			continue;
172		case BPF_JMP|BPF_JSET|BPF_X:
173			pc += (A & X) ? fentry->jt : fentry->jf;
174			continue;
175		case BPF_LD|BPF_W|BPF_ABS:
176			k = fentry->k;
177load_w:
178			ptr = load_pointer(skb, k, 4, &tmp);
179			if (ptr != NULL) {
180				A = ntohl(get_unaligned((__be32 *)ptr));
181				continue;
182			}
183			break;
184		case BPF_LD|BPF_H|BPF_ABS:
185			k = fentry->k;
186load_h:
187			ptr = load_pointer(skb, k, 2, &tmp);
188			if (ptr != NULL) {
189				A = ntohs(get_unaligned((__be16 *)ptr));
190				continue;
191			}
192			break;
193		case BPF_LD|BPF_B|BPF_ABS:
194			k = fentry->k;
195load_b:
196			ptr = load_pointer(skb, k, 1, &tmp);
197			if (ptr != NULL) {
198				A = *(u8 *)ptr;
199				continue;
200			}
201			break;
202		case BPF_LD|BPF_W|BPF_LEN:
203			A = skb->len;
204			continue;
205		case BPF_LDX|BPF_W|BPF_LEN:
206			X = skb->len;
207			continue;
208		case BPF_LD|BPF_W|BPF_IND:
209			k = X + fentry->k;
210			goto load_w;
211		case BPF_LD|BPF_H|BPF_IND:
212			k = X + fentry->k;
213			goto load_h;
214		case BPF_LD|BPF_B|BPF_IND:
215			k = X + fentry->k;
216			goto load_b;
217		case BPF_LDX|BPF_B|BPF_MSH:
218			ptr = load_pointer(skb, fentry->k, 1, &tmp);
219			if (ptr != NULL) {
220				X = (*(u8 *)ptr & 0xf) << 2;
221				continue;
222			}
223			return 0;
224		case BPF_LD|BPF_IMM:
225			A = fentry->k;
226			continue;
227		case BPF_LDX|BPF_IMM:
228			X = fentry->k;
229			continue;
230		case BPF_LD|BPF_MEM:
231			A = mem[fentry->k];
232			continue;
233		case BPF_LDX|BPF_MEM:
234			X = mem[fentry->k];
235			continue;
236		case BPF_MISC|BPF_TAX:
237			X = A;
238			continue;
239		case BPF_MISC|BPF_TXA:
240			A = X;
241			continue;
242		case BPF_RET|BPF_K:
243			return fentry->k;
244		case BPF_RET|BPF_A:
245			return A;
246		case BPF_ST:
247			mem[fentry->k] = A;
248			continue;
249		case BPF_STX:
250			mem[fentry->k] = X;
251			continue;
252		default:
253			WARN_ON(1);
254			return 0;
255		}
256
257		/*
258		 * Handle ancillary data, which are impossible
259		 * (or very difficult) to get parsing packet contents.
260		 */
261		switch (k-SKF_AD_OFF) {
262		case SKF_AD_PROTOCOL:
263			A = ntohs(skb->protocol);
264			continue;
265		case SKF_AD_PKTTYPE:
266			A = skb->pkt_type;
267			continue;
268		case SKF_AD_IFINDEX:
269			A = skb->dev->ifindex;
270			continue;
271		default:
272			return 0;
273		}
274	}
275
276	return 0;
277}
278
279/**
280 *	sk_chk_filter - verify socket filter code
281 *	@filter: filter to verify
282 *	@flen: length of filter
283 *
284 * Check the user's filter code. If we let some ugly
285 * filter code slip through kaboom! The filter must contain
286 * no references or jumps that are out of range, no illegal
287 * instructions, and must end with a RET instruction.
288 *
289 * All jumps are forward as they are not signed.
290 *
291 * Returns 0 if the rule set is legal or -EINVAL if not.
292 */
293int sk_chk_filter(struct sock_filter *filter, int flen)
294{
295	struct sock_filter *ftest;
296	int pc;
297
298	if (flen == 0 || flen > BPF_MAXINSNS)
299		return -EINVAL;
300
301	/* check the filter code now */
302	for (pc = 0; pc < flen; pc++) {
303		ftest = &filter[pc];
304
305		/* Only allow valid instructions */
306		switch (ftest->code) {
307		case BPF_ALU|BPF_ADD|BPF_K:
308		case BPF_ALU|BPF_ADD|BPF_X:
309		case BPF_ALU|BPF_SUB|BPF_K:
310		case BPF_ALU|BPF_SUB|BPF_X:
311		case BPF_ALU|BPF_MUL|BPF_K:
312		case BPF_ALU|BPF_MUL|BPF_X:
313		case BPF_ALU|BPF_DIV|BPF_X:
314		case BPF_ALU|BPF_AND|BPF_K:
315		case BPF_ALU|BPF_AND|BPF_X:
316		case BPF_ALU|BPF_OR|BPF_K:
317		case BPF_ALU|BPF_OR|BPF_X:
318		case BPF_ALU|BPF_LSH|BPF_K:
319		case BPF_ALU|BPF_LSH|BPF_X:
320		case BPF_ALU|BPF_RSH|BPF_K:
321		case BPF_ALU|BPF_RSH|BPF_X:
322		case BPF_ALU|BPF_NEG:
323		case BPF_LD|BPF_W|BPF_ABS:
324		case BPF_LD|BPF_H|BPF_ABS:
325		case BPF_LD|BPF_B|BPF_ABS:
326		case BPF_LD|BPF_W|BPF_LEN:
327		case BPF_LD|BPF_W|BPF_IND:
328		case BPF_LD|BPF_H|BPF_IND:
329		case BPF_LD|BPF_B|BPF_IND:
330		case BPF_LD|BPF_IMM:
331		case BPF_LDX|BPF_W|BPF_LEN:
332		case BPF_LDX|BPF_B|BPF_MSH:
333		case BPF_LDX|BPF_IMM:
334		case BPF_MISC|BPF_TAX:
335		case BPF_MISC|BPF_TXA:
336		case BPF_RET|BPF_K:
337		case BPF_RET|BPF_A:
338			break;
339
340		/* Some instructions need special checks */
341
342		case BPF_ALU|BPF_DIV|BPF_K:
343			/* check for division by zero */
344			if (ftest->k == 0)
345				return -EINVAL;
346			break;
347
348		case BPF_LD|BPF_MEM:
349		case BPF_LDX|BPF_MEM:
350		case BPF_ST:
351		case BPF_STX:
352			/* check for invalid memory addresses */
353			if (ftest->k >= BPF_MEMWORDS)
354				return -EINVAL;
355			break;
356
357		case BPF_JMP|BPF_JA:
358			/*
359			 * Note, the large ftest->k might cause loops.
360			 * Compare this with conditional jumps below,
361			 * where offsets are limited. --ANK (981016)
362			 */
363			if (ftest->k >= (unsigned)(flen-pc-1))
364				return -EINVAL;
365			break;
366
367		case BPF_JMP|BPF_JEQ|BPF_K:
368		case BPF_JMP|BPF_JEQ|BPF_X:
369		case BPF_JMP|BPF_JGE|BPF_K:
370		case BPF_JMP|BPF_JGE|BPF_X:
371		case BPF_JMP|BPF_JGT|BPF_K:
372		case BPF_JMP|BPF_JGT|BPF_X:
373		case BPF_JMP|BPF_JSET|BPF_K:
374		case BPF_JMP|BPF_JSET|BPF_X:
375			/* for conditionals both must be safe */
376			if (pc + ftest->jt + 1 >= flen ||
377			    pc + ftest->jf + 1 >= flen)
378				return -EINVAL;
379			break;
380
381		default:
382			return -EINVAL;
383		}
384	}
385
386	return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
387}
388
389/**
390 *	sk_attach_filter - attach a socket filter
391 *	@fprog: the filter program
392 *	@sk: the socket to use
393 *
394 * Attach the user's filter code. We first run some sanity checks on
395 * it to make sure it does not explode on us later. If an error
396 * occurs or there is insufficient memory for the filter a negative
397 * errno code is returned. On success the return is zero.
398 */
399int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
400{
401	struct sk_filter *fp;
402	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
403	int err;
404
405	/* Make sure new filter is there and in the right amounts. */
406	if (fprog->filter == NULL)
407		return -EINVAL;
408
409	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
410	if (!fp)
411		return -ENOMEM;
412	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
413		sock_kfree_s(sk, fp, fsize+sizeof(*fp));
414		return -EFAULT;
415	}
416
417	atomic_set(&fp->refcnt, 1);
418	fp->len = fprog->len;
419
420	err = sk_chk_filter(fp->insns, fp->len);
421	if (!err) {
422		struct sk_filter *old_fp;
423
424		rcu_read_lock_bh();
425		old_fp = rcu_dereference(sk->sk_filter);
426		rcu_assign_pointer(sk->sk_filter, fp);
427		rcu_read_unlock_bh();
428		fp = old_fp;
429	}
430
431	if (fp)
432		sk_filter_release(sk, fp);
433	return err;
434}
435
436EXPORT_SYMBOL(sk_chk_filter);
437EXPORT_SYMBOL(sk_run_filter);
438