npf_bpf_comp.c revision 1.1
1/*	$NetBSD: npf_bpf_comp.c,v 1.1 2013/09/19 01:04:45 rmind Exp $	*/
2
3/*-
4 * Copyright (c) 2010-2013 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * BPF byte-code generation for NPF rules.
34 */
35
36#include <sys/cdefs.h>
37__RCSID("$NetBSD: npf_bpf_comp.c,v 1.1 2013/09/19 01:04:45 rmind Exp $");
38
39#include <stdlib.h>
40#include <stdbool.h>
41#include <stddef.h>
42#include <string.h>
43#include <inttypes.h>
44#include <err.h>
45#include <assert.h>
46
47#include <netinet/in.h>
48#include <netinet/in_systm.h>
49#include <netinet/ip.h>
50#include <netinet/ip6.h>
51#include <netinet/udp.h>
52#include <netinet/tcp.h>
53#include <netinet/ip_icmp.h>
54#include <netinet/icmp6.h>
55
56#include <net/bpf.h>
57
58#include "npfctl.h"
59
60/*
61 * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores
62 * something other than L4 header offset.  Generally, when BPF_LDX is used.
63 */
64#define	FETCHED_L3		0x01
65#define	X_EQ_L4OFF		0x02
66
67struct npf_bpf {
68	/*
69	 * BPF program code, the allocated length (in bytes), the number
70	 * of logical blocks and the flags.
71	 */
72	struct bpf_program	prog;
73	size_t			alen;
74	u_int			nblocks;
75	sa_family_t		af;
76	uint32_t		flags;
77
78	/* The current group offset and block number. */
79	bool			ingroup;
80	u_int			goff;
81	u_int			gblock;
82
83	/* BPF marks, allocated length and the real length. */
84	uint32_t *		marks;
85	size_t			malen;
86	size_t			mlen;
87};
88
89/*
90 * NPF success and failure values to be returned from BPF.
91 */
92#define	NPF_BPF_SUCCESS		((u_int)-1)
93#define	NPF_BPF_FAILURE		0
94
95/*
96 * Magic value to indicate the failure path, which is fixed up on completion.
97 * Note: this is the longest jump offset in BPF, since the offset is one byte.
98 */
99#define	JUMP_MAGIC		0xff
100
101/* Reduce re-allocations by expanding in 64 byte blocks. */
102#define	ALLOC_MASK		(64 - 1)
103#define	ALLOC_ROUND(x)		(((x) + ALLOC_MASK) & ~ALLOC_MASK)
104
105npf_bpf_t *
106npfctl_bpf_create(void)
107{
108	return ecalloc(1, sizeof(npf_bpf_t));
109}
110
111static void
112fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap)
113{
114	struct bpf_program *bp = &ctx->prog;
115
116	for (u_int i = start; i < end; i++) {
117		struct bpf_insn *insn = &bp->bf_insns[i];
118		const u_int fail_off = end - i;
119
120		if (fail_off >= JUMP_MAGIC) {
121			errx(EXIT_FAILURE, "BPF generation error: "
122			    "the number of instructions is over the limit");
123		}
124		if (BPF_CLASS(insn->code) != BPF_JMP) {
125			continue;
126		}
127		if (swap) {
128			uint8_t jt = insn->jt;
129			insn->jt = insn->jf;
130			insn->jf = jt;
131		}
132		if (insn->jt == JUMP_MAGIC)
133			insn->jt = fail_off;
134		if (insn->jf == JUMP_MAGIC)
135			insn->jf = fail_off;
136	}
137}
138
139static void
140add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count)
141{
142	struct bpf_program *bp = &ctx->prog;
143	size_t offset, len, reqlen;
144
145	/* Note: bf_len is the count of instructions. */
146	offset = bp->bf_len * sizeof(struct bpf_insn);
147	len = count * sizeof(struct bpf_insn);
148
149	/* Ensure the memory buffer for the program. */
150	reqlen = ALLOC_ROUND(offset + len);
151	if (reqlen > ctx->alen) {
152		bp->bf_insns = erealloc(bp->bf_insns, reqlen);
153		ctx->alen = reqlen;
154	}
155
156	/* Add the code block. */
157	memcpy((uint8_t *)bp->bf_insns + offset, insns, len);
158	bp->bf_len += count;
159}
160
161static void
162done_raw_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
163{
164	size_t reqlen, nargs = m[1];
165
166	if ((len / sizeof(uint32_t) - 2) != nargs) {
167		errx(EXIT_FAILURE, "invalid BPF block description");
168	}
169	reqlen = ALLOC_ROUND(ctx->mlen + len);
170	if (reqlen > ctx->malen) {
171		ctx->marks = erealloc(ctx->marks, reqlen);
172		ctx->malen = reqlen;
173	}
174	memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len);
175	ctx->mlen += len;
176}
177
178static void
179done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
180{
181	done_raw_block(ctx, m, len);
182	ctx->nblocks++;
183}
184
185struct bpf_program *
186npfctl_bpf_complete(npf_bpf_t *ctx)
187{
188	struct bpf_program *bp = &ctx->prog;
189	const u_int retoff = bp->bf_len;
190
191	/* Add the return fragment (success and failure paths). */
192	struct bpf_insn insns_ret[] = {
193		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS),
194		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
195	};
196	add_insns(ctx, insns_ret, __arraycount(insns_ret));
197
198	/* Fixup all jumps to the main failure path. */
199	fixup_jumps(ctx, 0, retoff, false);
200
201	return &ctx->prog;
202}
203
204const void *
205npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len)
206{
207	*len = ctx->mlen;
208	return ctx->marks;
209}
210
211void
212npfctl_bpf_destroy(npf_bpf_t *ctx)
213{
214	free(ctx->prog.bf_insns);
215	free(ctx->marks);
216	free(ctx);
217}
218
219/*
220 * npfctl_bpf_group: begin a logical group.  It merely uses logical
221 * disjunction (OR) for compares within the group.
222 */
223void
224npfctl_bpf_group(npf_bpf_t *ctx)
225{
226	struct bpf_program *bp = &ctx->prog;
227
228	assert(ctx->goff == 0);
229	assert(ctx->gblock == 0);
230
231	ctx->goff = bp->bf_len;
232	ctx->gblock = ctx->nblocks;
233	ctx->ingroup = true;
234}
235
236void
237npfctl_bpf_endgroup(npf_bpf_t *ctx)
238{
239	struct bpf_program *bp = &ctx->prog;
240	const size_t curoff = bp->bf_len;
241
242	/* If there are no blocks or only one - nothing to do. */
243	if ((ctx->nblocks - ctx->gblock) <= 1) {
244		ctx->goff = ctx->gblock = 0;
245		return;
246	}
247
248	/*
249	 * Append a failure return as a fall-through i.e. if there is
250	 * no match within the group.
251	 */
252	struct bpf_insn insns_ret[] = {
253		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
254	};
255	add_insns(ctx, insns_ret, __arraycount(insns_ret));
256
257	/*
258	 * Adjust jump offsets: on match - jump outside the group i.e.
259	 * to the current offset.  Otherwise, jump to the next instruction
260	 * which would lead to the fall-through code above if none matches.
261	 */
262	fixup_jumps(ctx, ctx->goff, curoff, true);
263	ctx->goff = ctx->gblock = 0;
264}
265
266static void
267fetch_l3(npf_bpf_t *ctx, sa_family_t af, u_int flags)
268{
269	u_int ver;
270
271	switch (af) {
272	case AF_INET:
273		ver = IPVERSION;
274		break;
275	case AF_INET6:
276		ver = IPV6_VERSION >> 4;
277		break;
278	case AF_UNSPEC:
279		ver = 0;
280		break;
281	default:
282		abort();
283	}
284
285	/*
286	 * Fetch L3 information.  The coprocessor populates the following
287	 * words in the scratch memory store:
288	 * - BPF_MW_IPVER: IP version (4 or 6).
289	 * - BPF_MW_L4OFF: L4 header offset.
290	 * - BPF_MW_L4PROTO: L4 protocol.
291	 */
292	if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) {
293		const uint8_t jt = ver ? 0 : JUMP_MAGIC;
294		const uint8_t jf = ver ? JUMP_MAGIC : 0;
295		bool ingroup = ctx->ingroup;
296
297		/*
298		 * L3 block cannot be inserted in the middle of a group.
299		 * In fact, it never is.  Check and start the group after.
300		 */
301		if (ingroup) {
302			assert(ctx->nblocks == ctx->gblock);
303			npfctl_bpf_endgroup(ctx);
304		}
305
306		/*
307		 * A <- IP version; A == expected-version?
308		 * If no particular version specified, check for non-zero.
309		 */
310		struct bpf_insn insns_l3[] = {
311			BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_L3),
312			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf),
313		};
314		add_insns(ctx, insns_l3, __arraycount(insns_l3));
315		ctx->flags |= FETCHED_L3;
316		ctx->af = af;
317
318		if (af) {
319			uint32_t mwords[] = { BM_IPVER, 1, af };
320			done_raw_block(ctx, mwords, sizeof(mwords));
321		}
322		if (ingroup) {
323			npfctl_bpf_group(ctx);
324		}
325
326	} else if (af && af != ctx->af) {
327		errx(EXIT_FAILURE, "address family mismatch");
328	}
329
330	if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) {
331		/* X <- IP header length */
332		struct bpf_insn insns_hlen[] = {
333			BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF),
334		};
335		add_insns(ctx, insns_hlen, __arraycount(insns_hlen));
336		ctx->flags |= X_EQ_L4OFF;
337	}
338}
339
340/*
341 * npfctl_bpf_proto: code block to match IP version and L4 protocol.
342 */
343void
344npfctl_bpf_proto(npf_bpf_t *ctx, sa_family_t af, int proto)
345{
346	assert(af != AF_UNSPEC || proto != -1);
347
348	/* Note: fails if IP version does not match. */
349	fetch_l3(ctx, af, 0);
350	if (proto == -1) {
351		return;
352	}
353
354	struct bpf_insn insns_proto[] = {
355		/* A <- L4 protocol; A == expected-protocol? */
356		BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
357		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC),
358	};
359	add_insns(ctx, insns_proto, __arraycount(insns_proto));
360
361	uint32_t mwords[] = { BM_PROTO, 1, proto };
362	done_block(ctx, mwords, sizeof(mwords));
363}
364
365/*
366 * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR.
367 *
368 * => IP address shall be in the network byte order.
369 */
370void
371npfctl_bpf_cidr(npf_bpf_t *ctx, u_int opts, sa_family_t af,
372    const npf_addr_t *addr, const npf_netmask_t mask)
373{
374	const uint32_t *awords = (const uint32_t *)addr;
375	u_int nwords, length, maxmask, off;
376
377	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
378	assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK);
379
380	switch (af) {
381	case AF_INET:
382		maxmask = 32;
383		off = (opts & MATCH_SRC) ?
384		    offsetof(struct ip, ip_src) :
385		    offsetof(struct ip, ip_dst);
386		nwords = sizeof(struct in_addr) / sizeof(uint32_t);
387		break;
388	case AF_INET6:
389		maxmask = 128;
390		off = (opts & MATCH_SRC) ?
391		    offsetof(struct ip6_hdr, ip6_src) :
392		    offsetof(struct ip6_hdr, ip6_dst);
393		nwords = sizeof(struct in6_addr) / sizeof(uint32_t);
394		break;
395	default:
396		abort();
397	}
398
399	/* Ensure address family. */
400	fetch_l3(ctx, af, 0);
401
402	length = (mask == NPF_NO_NETMASK) ? maxmask : mask;
403
404	/* CAUTION: BPF operates in host byte-order. */
405	for (u_int i = 0; i < nwords; i++) {
406		const u_int woff = i * sizeof(uint32_t);
407		uint32_t word = ntohl(awords[i]);
408		uint32_t wordmask;
409
410		if (length >= 32) {
411			/* The mask is a full word - do not apply it. */
412			wordmask = 0;
413			length -= 32;
414		} else if (length) {
415			wordmask = 0xffffffff << (maxmask - length);
416			length = 0;
417		} else {
418			/*
419			 * The mask is zero - just compare the word
420			 * against zero.
421			 */
422			wordmask = 0;
423			word = 0;
424		}
425
426		/* A <- IP address (or one word of it) */
427		struct bpf_insn insns_ip[] = {
428			BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff),
429		};
430		add_insns(ctx, insns_ip, __arraycount(insns_ip));
431
432		/* A <- (A & MASK) */
433		if (wordmask) {
434			struct bpf_insn insns_mask[] = {
435				BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask),
436			};
437			add_insns(ctx, insns_mask, __arraycount(insns_mask));
438		}
439
440		/* A == expected-IP-word ? */
441		struct bpf_insn insns_cmp[] = {
442			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, JUMP_MAGIC),
443		};
444		add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
445	}
446
447	uint32_t mwords[] = {
448		(opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6,
449		af, mask, awords[0], awords[1], awords[2], awords[3],
450	};
451	done_block(ctx, mwords, sizeof(mwords));
452}
453
454/*
455 * npfctl_bpf_ports: code block to match TCP/UDP port range.
456 *
457 * => Port numbers shall be in the network byte order.
458 */
459void
460npfctl_bpf_ports(npf_bpf_t *ctx, u_int opts, in_port_t from, in_port_t to)
461{
462	const u_int sport_off = offsetof(struct udphdr, uh_sport);
463	const u_int dport_off = offsetof(struct udphdr, uh_dport);
464	u_int off;
465
466	/* TCP and UDP port offsets are the same. */
467	assert(sport_off == offsetof(struct tcphdr, th_sport));
468	assert(dport_off == offsetof(struct tcphdr, th_dport));
469
470	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
471	off = (opts & MATCH_SRC) ? sport_off : dport_off;
472
473	/* X <- IP header length */
474	fetch_l3(ctx, 0, X_EQ_L4OFF);
475
476	struct bpf_insn insns_fetch[] = {
477		/* A <- port */
478		BPF_STMT(BPF_LD+BPF_H+BPF_IND, off),
479	};
480	add_insns(ctx, insns_fetch, __arraycount(insns_fetch));
481
482	/* CAUTION: BPF operates in host byte-order. */
483	from = ntohs(from);
484	to = ntohs(to);
485
486	if (from == to) {
487		/* Single port case. */
488		struct bpf_insn insns_port[] = {
489			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC),
490		};
491		add_insns(ctx, insns_port, __arraycount(insns_port));
492	} else {
493		/* Port range case. */
494		struct bpf_insn insns_range[] = {
495			BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, JUMP_MAGIC),
496			BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, JUMP_MAGIC, 0),
497		};
498		add_insns(ctx, insns_range, __arraycount(insns_range));
499	}
500
501	uint32_t mwords[] = {
502		opts & MATCH_SRC ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to
503	};
504	done_block(ctx, mwords, sizeof(mwords));
505}
506
507/*
508 * npfctl_bpf_tcpfl: code block to match TCP flags.
509 */
510void
511npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask)
512{
513	const u_int tcpfl_off = offsetof(struct tcphdr, th_flags);
514
515	/* X <- IP header length */
516	fetch_l3(ctx, 0, X_EQ_L4OFF);
517
518	struct bpf_insn insns_tf[] = {
519		/* A <- TCP flags */
520		BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off),
521	};
522	add_insns(ctx, insns_tf, __arraycount(insns_tf));
523
524	if (tf_mask != tf) {
525		/* A <- (A & mask) */
526		struct bpf_insn insns_mask[] = {
527			BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask),
528		};
529		add_insns(ctx, insns_mask, __arraycount(insns_mask));
530	}
531
532	struct bpf_insn insns_cmp[] = {
533		/* A == expected-TCP-flags? */
534		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC),
535	};
536	add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
537
538	uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask};
539	done_block(ctx, mwords, sizeof(mwords));
540}
541
542/*
543 * npfctl_bpf_icmp: code block to match ICMP type and/or code.
544 * Note: suitable both for the ICMPv4 and ICMPv6.
545 */
546void
547npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code)
548{
549	const u_int type_off = offsetof(struct icmp, icmp_type);
550	const u_int code_off = offsetof(struct icmp, icmp_code);
551
552	assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off);
553	assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off);
554	assert(type != -1 || code != -1);
555
556	/* X <- IP header length */
557	fetch_l3(ctx, 0, X_EQ_L4OFF);
558
559	if (type != -1) {
560		struct bpf_insn insns_type[] = {
561			BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off),
562			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC),
563		};
564		add_insns(ctx, insns_type, __arraycount(insns_type));
565
566		uint32_t mwords[] = { BM_ICMP_TYPE, 1, type };
567		done_block(ctx, mwords, sizeof(mwords));
568	}
569
570	if (code != -1) {
571		struct bpf_insn insns_code[] = {
572			BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off),
573			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC),
574		};
575		add_insns(ctx, insns_code, __arraycount(insns_code));
576
577		uint32_t mwords[] = { BM_ICMP_CODE, 1, code };
578		done_block(ctx, mwords, sizeof(mwords));
579	}
580}
581
582#define	SRC_FLAG_BIT	(1U << 31)
583
584/*
585 * npfctl_bpf_table: code block to match source/destination IP address
586 * against NPF table specified by ID.
587 */
588void
589npfctl_bpf_table(npf_bpf_t *ctx, u_int opts, u_int tid)
590{
591	const bool src = (opts & MATCH_SRC) != 0;
592
593	struct bpf_insn insns_table[] = {
594		BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid),
595		BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE),
596		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0),
597	};
598	add_insns(ctx, insns_table, __arraycount(insns_table));
599
600	uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid };
601	done_block(ctx, mwords, sizeof(mwords));
602}
603