optimize.c revision 190225
117683Spst/*
217683Spst * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
317683Spst *	The Regents of the University of California.  All rights reserved.
417683Spst *
517683Spst * Redistribution and use in source and binary forms, with or without
617683Spst * modification, are permitted provided that: (1) source code distributions
717683Spst * retain the above copyright notice and this paragraph in its entirety, (2)
817683Spst * distributions including binary code include the above copyright notice and
917683Spst * this paragraph in its entirety in the documentation or other materials
1017683Spst * provided with the distribution, and (3) all advertising materials mentioning
1117683Spst * features or use of this software display the following acknowledgement:
1217683Spst * ``This product includes software developed by the University of California,
1317683Spst * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
1417683Spst * the University nor the names of its contributors may be used to endorse
1517683Spst * or promote products derived from this software without specific prior
1617683Spst * written permission.
1717683Spst * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
1817683Spst * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
1917683Spst * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
2017683Spst *
2117683Spst *  Optimization module for tcpdump intermediate representation.
2217683Spst */
2317683Spst#ifndef lint
24127664Sbmsstatic const char rcsid[] _U_ =
25190225Srpaulo    "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.90.2.1 2008/01/02 04:22:16 guy Exp $ (LBL)";
2617683Spst#endif
2717683Spst
2875107Sfenner#ifdef HAVE_CONFIG_H
2975107Sfenner#include "config.h"
3075107Sfenner#endif
3175107Sfenner
3217683Spst#include <stdio.h>
3317683Spst#include <stdlib.h>
3417683Spst#include <memory.h>
35146768Ssam#include <string.h>
3617683Spst
3775107Sfenner#include <errno.h>
3875107Sfenner
3917683Spst#include "pcap-int.h"
4017683Spst
4117683Spst#include "gencode.h"
4217683Spst
4317683Spst#ifdef HAVE_OS_PROTO_H
4417683Spst#include "os-proto.h"
4517683Spst#endif
4617683Spst
4717683Spst#ifdef BDEBUG
4817683Spstextern int dflag;
4917683Spst#endif
5017683Spst
51146768Ssam#if defined(MSDOS) && !defined(__DJGPP__)
52146768Ssamextern int _w32_ffs (int mask);
53146768Ssam#define ffs _w32_ffs
54146768Ssam#endif
5517683Spst
56190225Srpaulo#if defined(WIN32) && defined (_MSC_VER)
57190225Srpauloint ffs(int mask);
58190225Srpaulo#endif
59190225Srpaulo
60146768Ssam/*
61146768Ssam * Represents a deleted instruction.
62146768Ssam */
6317683Spst#define NOP -1
6417683Spst
6517683Spst/*
66146768Ssam * Register numbers for use-def values.
67146768Ssam * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
68146768Ssam * location.  A_ATOM is the accumulator and X_ATOM is the index
69146768Ssam * register.
70146768Ssam */
71146768Ssam#define A_ATOM BPF_MEMWORDS
72146768Ssam#define X_ATOM (BPF_MEMWORDS+1)
73146768Ssam
74146768Ssam/*
7517683Spst * This define is used to represent *both* the accumulator and
7617683Spst * x register in use-def computations.
7717683Spst * Currently, the use-def code assumes only one definition per instruction.
7817683Spst */
7917683Spst#define AX_ATOM N_ATOMS
8017683Spst
8117683Spst/*
8217683Spst * A flag to indicate that further optimization is needed.
8317683Spst * Iterative passes are continued until a given pass yields no
8417683Spst * branch movement.
8517683Spst */
8617683Spststatic int done;
8717683Spst
8817683Spst/*
8917683Spst * A block is marked if only if its mark equals the current mark.
9017683Spst * Rather than traverse the code array, marking each item, 'cur_mark' is
9117683Spst * incremented.  This automatically makes each element unmarked.
9217683Spst */
9317683Spststatic int cur_mark;
9417683Spst#define isMarked(p) ((p)->mark == cur_mark)
9517683Spst#define unMarkAll() cur_mark += 1
9617683Spst#define Mark(p) ((p)->mark = cur_mark)
9717683Spst
9817683Spststatic void opt_init(struct block *);
9917683Spststatic void opt_cleanup(void);
10017683Spst
10117683Spststatic void make_marks(struct block *);
10217683Spststatic void mark_code(struct block *);
10317683Spst
10417683Spststatic void intern_blocks(struct block *);
10517683Spst
10617683Spststatic int eq_slist(struct slist *, struct slist *);
10717683Spst
10817683Spststatic void find_levels_r(struct block *);
10917683Spst
11017683Spststatic void find_levels(struct block *);
11117683Spststatic void find_dom(struct block *);
11217683Spststatic void propedom(struct edge *);
11317683Spststatic void find_edom(struct block *);
11417683Spststatic void find_closure(struct block *);
11517683Spststatic int atomuse(struct stmt *);
11617683Spststatic int atomdef(struct stmt *);
11717683Spststatic void compute_local_ud(struct block *);
11817683Spststatic void find_ud(struct block *);
11917683Spststatic void init_val(void);
12017683Spststatic int F(int, int, int);
12117683Spststatic inline void vstore(struct stmt *, int *, int, int);
12217683Spststatic void opt_blk(struct block *, int);
12317683Spststatic int use_conflict(struct block *, struct block *);
12417683Spststatic void opt_j(struct edge *);
12517683Spststatic void or_pullup(struct block *);
12617683Spststatic void and_pullup(struct block *);
12717683Spststatic void opt_blks(struct block *, int);
12817683Spststatic inline void link_inedge(struct edge *, struct block *);
12917683Spststatic void find_inedges(struct block *);
13017683Spststatic void opt_root(struct block **);
13117683Spststatic void opt_loop(struct block *, int);
13217683Spststatic void fold_op(struct stmt *, int, int);
13317683Spststatic inline struct slist *this_op(struct slist *);
13417683Spststatic void opt_not(struct block *);
13517683Spststatic void opt_peep(struct block *);
13617683Spststatic void opt_stmt(struct stmt *, int[], int);
13717683Spststatic void deadstmt(struct stmt *, struct stmt *[]);
13817683Spststatic void opt_deadstores(struct block *);
13917683Spststatic struct block *fold_edge(struct block *, struct edge *);
14017683Spststatic inline int eq_blk(struct block *, struct block *);
14117683Spststatic int slength(struct slist *);
14217683Spststatic int count_blocks(struct block *);
14317683Spststatic void number_blks_r(struct block *);
14417683Spststatic int count_stmts(struct block *);
14517683Spststatic int convert_code_r(struct block *);
14617683Spst#ifdef BDEBUG
14717683Spststatic void opt_dump(struct block *);
14817683Spst#endif
14917683Spst
15017683Spststatic int n_blocks;
15117683Spststruct block **blocks;
15217683Spststatic int n_edges;
15317683Spststruct edge **edges;
15417683Spst
15517683Spst/*
15617683Spst * A bit vector set representation of the dominators.
15717683Spst * We round up the set size to the next power of two.
15817683Spst */
15917683Spststatic int nodewords;
16017683Spststatic int edgewords;
16117683Spststruct block **levels;
16217683Spstbpf_u_int32 *space;
16317683Spst#define BITS_PER_WORD (8*sizeof(bpf_u_int32))
16417683Spst/*
16517683Spst * True if a is in uset {p}
16617683Spst */
16717683Spst#define SET_MEMBER(p, a) \
16817683Spst((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
16917683Spst
17017683Spst/*
17117683Spst * Add 'a' to uset p.
17217683Spst */
17317683Spst#define SET_INSERT(p, a) \
17417683Spst(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
17517683Spst
17617683Spst/*
17717683Spst * Delete 'a' from uset p.
17817683Spst */
17917683Spst#define SET_DELETE(p, a) \
18017683Spst(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
18117683Spst
18217683Spst/*
18317683Spst * a := a intersect b
18417683Spst */
18517683Spst#define SET_INTERSECT(a, b, n)\
18617683Spst{\
18717683Spst	register bpf_u_int32 *_x = a, *_y = b;\
18817683Spst	register int _n = n;\
18917683Spst	while (--_n >= 0) *_x++ &= *_y++;\
19017683Spst}
19117683Spst
19217683Spst/*
19317683Spst * a := a - b
19417683Spst */
19517683Spst#define SET_SUBTRACT(a, b, n)\
19617683Spst{\
19717683Spst	register bpf_u_int32 *_x = a, *_y = b;\
19817683Spst	register int _n = n;\
19917683Spst	while (--_n >= 0) *_x++ &=~ *_y++;\
20017683Spst}
20117683Spst
20217683Spst/*
20317683Spst * a := a union b
20417683Spst */
20517683Spst#define SET_UNION(a, b, n)\
20617683Spst{\
20717683Spst	register bpf_u_int32 *_x = a, *_y = b;\
20817683Spst	register int _n = n;\
20917683Spst	while (--_n >= 0) *_x++ |= *_y++;\
21017683Spst}
21117683Spst
21217683Spststatic uset all_dom_sets;
21317683Spststatic uset all_closure_sets;
21417683Spststatic uset all_edge_sets;
21517683Spst
21617683Spst#ifndef MAX
21717683Spst#define MAX(a,b) ((a)>(b)?(a):(b))
21817683Spst#endif
21917683Spst
22017683Spststatic void
22117683Spstfind_levels_r(b)
22217683Spst	struct block *b;
22317683Spst{
22417683Spst	int level;
22517683Spst
22617683Spst	if (isMarked(b))
22717683Spst		return;
22817683Spst
22917683Spst	Mark(b);
23017683Spst	b->link = 0;
23117683Spst
23217683Spst	if (JT(b)) {
23317683Spst		find_levels_r(JT(b));
23417683Spst		find_levels_r(JF(b));
23517683Spst		level = MAX(JT(b)->level, JF(b)->level) + 1;
23617683Spst	} else
23717683Spst		level = 0;
23817683Spst	b->level = level;
23917683Spst	b->link = levels[level];
24017683Spst	levels[level] = b;
24117683Spst}
24217683Spst
24317683Spst/*
24417683Spst * Level graph.  The levels go from 0 at the leaves to
24517683Spst * N_LEVELS at the root.  The levels[] array points to the
24617683Spst * first node of the level list, whose elements are linked
24717683Spst * with the 'link' field of the struct block.
24817683Spst */
24917683Spststatic void
25017683Spstfind_levels(root)
25117683Spst	struct block *root;
25217683Spst{
25317683Spst	memset((char *)levels, 0, n_blocks * sizeof(*levels));
25417683Spst	unMarkAll();
25517683Spst	find_levels_r(root);
25617683Spst}
25717683Spst
25817683Spst/*
25917683Spst * Find dominator relationships.
26017683Spst * Assumes graph has been leveled.
26117683Spst */
26217683Spststatic void
26317683Spstfind_dom(root)
26417683Spst	struct block *root;
26517683Spst{
26617683Spst	int i;
26717683Spst	struct block *b;
26817683Spst	bpf_u_int32 *x;
26917683Spst
27017683Spst	/*
27117683Spst	 * Initialize sets to contain all nodes.
27217683Spst	 */
27317683Spst	x = all_dom_sets;
27417683Spst	i = n_blocks * nodewords;
27517683Spst	while (--i >= 0)
27617683Spst		*x++ = ~0;
27717683Spst	/* Root starts off empty. */
27817683Spst	for (i = nodewords; --i >= 0;)
27917683Spst		root->dom[i] = 0;
28017683Spst
28117683Spst	/* root->level is the highest level no found. */
28217683Spst	for (i = root->level; i >= 0; --i) {
28317683Spst		for (b = levels[i]; b; b = b->link) {
28417683Spst			SET_INSERT(b->dom, b->id);
28517683Spst			if (JT(b) == 0)
28617683Spst				continue;
28717683Spst			SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
28817683Spst			SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
28917683Spst		}
29017683Spst	}
29117683Spst}
29217683Spst
29317683Spststatic void
29417683Spstpropedom(ep)
29517683Spst	struct edge *ep;
29617683Spst{
29717683Spst	SET_INSERT(ep->edom, ep->id);
29817683Spst	if (ep->succ) {
29917683Spst		SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
30017683Spst		SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
30117683Spst	}
30217683Spst}
30317683Spst
30417683Spst/*
30517683Spst * Compute edge dominators.
30617683Spst * Assumes graph has been leveled and predecessors established.
30717683Spst */
30817683Spststatic void
30917683Spstfind_edom(root)
31017683Spst	struct block *root;
31117683Spst{
31217683Spst	int i;
31317683Spst	uset x;
31417683Spst	struct block *b;
31517683Spst
31617683Spst	x = all_edge_sets;
31717683Spst	for (i = n_edges * edgewords; --i >= 0; )
31817683Spst		x[i] = ~0;
31917683Spst
32017683Spst	/* root->level is the highest level no found. */
32117683Spst	memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
32217683Spst	memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
32317683Spst	for (i = root->level; i >= 0; --i) {
32417683Spst		for (b = levels[i]; b != 0; b = b->link) {
32517683Spst			propedom(&b->et);
32617683Spst			propedom(&b->ef);
32717683Spst		}
32817683Spst	}
32917683Spst}
33017683Spst
33117683Spst/*
33217683Spst * Find the backwards transitive closure of the flow graph.  These sets
33317683Spst * are backwards in the sense that we find the set of nodes that reach
33417683Spst * a given node, not the set of nodes that can be reached by a node.
33517683Spst *
33617683Spst * Assumes graph has been leveled.
33717683Spst */
33817683Spststatic void
33917683Spstfind_closure(root)
34017683Spst	struct block *root;
34117683Spst{
34217683Spst	int i;
34317683Spst	struct block *b;
34417683Spst
34517683Spst	/*
34617683Spst	 * Initialize sets to contain no nodes.
34717683Spst	 */
34817683Spst	memset((char *)all_closure_sets, 0,
34917683Spst	      n_blocks * nodewords * sizeof(*all_closure_sets));
35017683Spst
35117683Spst	/* root->level is the highest level no found. */
35217683Spst	for (i = root->level; i >= 0; --i) {
35317683Spst		for (b = levels[i]; b; b = b->link) {
35417683Spst			SET_INSERT(b->closure, b->id);
35517683Spst			if (JT(b) == 0)
35617683Spst				continue;
35717683Spst			SET_UNION(JT(b)->closure, b->closure, nodewords);
35817683Spst			SET_UNION(JF(b)->closure, b->closure, nodewords);
35917683Spst		}
36017683Spst	}
36117683Spst}
36217683Spst
36317683Spst/*
36417683Spst * Return the register number that is used by s.  If A and X are both
36517683Spst * used, return AX_ATOM.  If no register is used, return -1.
36617683Spst *
36717683Spst * The implementation should probably change to an array access.
36817683Spst */
36917683Spststatic int
37017683Spstatomuse(s)
37117683Spst	struct stmt *s;
37217683Spst{
37317683Spst	register int c = s->code;
37417683Spst
37517683Spst	if (c == NOP)
37617683Spst		return -1;
37717683Spst
37817683Spst	switch (BPF_CLASS(c)) {
37917683Spst
38017683Spst	case BPF_RET:
38117683Spst		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
38217683Spst			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
38317683Spst
38417683Spst	case BPF_LD:
38517683Spst	case BPF_LDX:
38617683Spst		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
38717683Spst			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
38817683Spst
38917683Spst	case BPF_ST:
39017683Spst		return A_ATOM;
39117683Spst
39217683Spst	case BPF_STX:
39317683Spst		return X_ATOM;
39417683Spst
39517683Spst	case BPF_JMP:
39617683Spst	case BPF_ALU:
39717683Spst		if (BPF_SRC(c) == BPF_X)
39817683Spst			return AX_ATOM;
39917683Spst		return A_ATOM;
40017683Spst
40117683Spst	case BPF_MISC:
40217683Spst		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
40317683Spst	}
40417683Spst	abort();
40517683Spst	/* NOTREACHED */
40617683Spst}
40717683Spst
40817683Spst/*
40917683Spst * Return the register number that is defined by 's'.  We assume that
41017683Spst * a single stmt cannot define more than one register.  If no register
41117683Spst * is defined, return -1.
41217683Spst *
41317683Spst * The implementation should probably change to an array access.
41417683Spst */
41517683Spststatic int
41617683Spstatomdef(s)
41717683Spst	struct stmt *s;
41817683Spst{
41917683Spst	if (s->code == NOP)
42017683Spst		return -1;
42117683Spst
42217683Spst	switch (BPF_CLASS(s->code)) {
42317683Spst
42417683Spst	case BPF_LD:
42517683Spst	case BPF_ALU:
42617683Spst		return A_ATOM;
42717683Spst
42817683Spst	case BPF_LDX:
42917683Spst		return X_ATOM;
43017683Spst
43117683Spst	case BPF_ST:
43217683Spst	case BPF_STX:
43317683Spst		return s->k;
43417683Spst
43517683Spst	case BPF_MISC:
43617683Spst		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
43717683Spst	}
43817683Spst	return -1;
43917683Spst}
44017683Spst
441146768Ssam/*
442146768Ssam * Compute the sets of registers used, defined, and killed by 'b'.
443146768Ssam *
444146768Ssam * "Used" means that a statement in 'b' uses the register before any
445146768Ssam * statement in 'b' defines it, i.e. it uses the value left in
446146768Ssam * that register by a predecessor block of this block.
447146768Ssam * "Defined" means that a statement in 'b' defines it.
448146768Ssam * "Killed" means that a statement in 'b' defines it before any
449146768Ssam * statement in 'b' uses it, i.e. it kills the value left in that
450146768Ssam * register by a predecessor block of this block.
451146768Ssam */
45217683Spststatic void
45317683Spstcompute_local_ud(b)
45417683Spst	struct block *b;
45517683Spst{
45617683Spst	struct slist *s;
45717683Spst	atomset def = 0, use = 0, kill = 0;
45817683Spst	int atom;
45917683Spst
46017683Spst	for (s = b->stmts; s; s = s->next) {
46117683Spst		if (s->s.code == NOP)
46217683Spst			continue;
46317683Spst		atom = atomuse(&s->s);
46417683Spst		if (atom >= 0) {
46517683Spst			if (atom == AX_ATOM) {
46617683Spst				if (!ATOMELEM(def, X_ATOM))
46717683Spst					use |= ATOMMASK(X_ATOM);
46817683Spst				if (!ATOMELEM(def, A_ATOM))
46917683Spst					use |= ATOMMASK(A_ATOM);
47017683Spst			}
47117683Spst			else if (atom < N_ATOMS) {
47217683Spst				if (!ATOMELEM(def, atom))
47317683Spst					use |= ATOMMASK(atom);
47417683Spst			}
47517683Spst			else
47617683Spst				abort();
47717683Spst		}
47817683Spst		atom = atomdef(&s->s);
47917683Spst		if (atom >= 0) {
48017683Spst			if (!ATOMELEM(use, atom))
48117683Spst				kill |= ATOMMASK(atom);
48217683Spst			def |= ATOMMASK(atom);
48317683Spst		}
48417683Spst	}
485146768Ssam	if (BPF_CLASS(b->s.code) == BPF_JMP) {
486146768Ssam		/*
487146768Ssam		 * XXX - what about RET?
488146768Ssam		 */
489146768Ssam		atom = atomuse(&b->s);
490146768Ssam		if (atom >= 0) {
491146768Ssam			if (atom == AX_ATOM) {
492146768Ssam				if (!ATOMELEM(def, X_ATOM))
493146768Ssam					use |= ATOMMASK(X_ATOM);
494146768Ssam				if (!ATOMELEM(def, A_ATOM))
495146768Ssam					use |= ATOMMASK(A_ATOM);
496146768Ssam			}
497146768Ssam			else if (atom < N_ATOMS) {
498146768Ssam				if (!ATOMELEM(def, atom))
499146768Ssam					use |= ATOMMASK(atom);
500146768Ssam			}
501146768Ssam			else
502146768Ssam				abort();
503146768Ssam		}
504146768Ssam	}
50517683Spst
50617683Spst	b->def = def;
50717683Spst	b->kill = kill;
50817683Spst	b->in_use = use;
50917683Spst}
51017683Spst
51117683Spst/*
51217683Spst * Assume graph is already leveled.
51317683Spst */
51417683Spststatic void
51517683Spstfind_ud(root)
51617683Spst	struct block *root;
51717683Spst{
51817683Spst	int i, maxlevel;
51917683Spst	struct block *p;
52017683Spst
52117683Spst	/*
52217683Spst	 * root->level is the highest level no found;
52317683Spst	 * count down from there.
52417683Spst	 */
52517683Spst	maxlevel = root->level;
52617683Spst	for (i = maxlevel; i >= 0; --i)
52717683Spst		for (p = levels[i]; p; p = p->link) {
52817683Spst			compute_local_ud(p);
52917683Spst			p->out_use = 0;
53017683Spst		}
53117683Spst
53217683Spst	for (i = 1; i <= maxlevel; ++i) {
53317683Spst		for (p = levels[i]; p; p = p->link) {
53417683Spst			p->out_use |= JT(p)->in_use | JF(p)->in_use;
53517683Spst			p->in_use |= p->out_use &~ p->kill;
53617683Spst		}
53717683Spst	}
53817683Spst}
53917683Spst
54017683Spst/*
54117683Spst * These data structures are used in a Cocke and Shwarz style
54217683Spst * value numbering scheme.  Since the flowgraph is acyclic,
54317683Spst * exit values can be propagated from a node's predecessors
54417683Spst * provided it is uniquely defined.
54517683Spst */
54617683Spststruct valnode {
54717683Spst	int code;
54817683Spst	int v0, v1;
54917683Spst	int val;
55017683Spst	struct valnode *next;
55117683Spst};
55217683Spst
55317683Spst#define MODULUS 213
55417683Spststatic struct valnode *hashtbl[MODULUS];
55517683Spststatic int curval;
55617683Spststatic int maxval;
55717683Spst
55817683Spst/* Integer constants mapped with the load immediate opcode. */
55917683Spst#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
56017683Spst
56117683Spststruct vmapinfo {
56217683Spst	int is_const;
56317683Spst	bpf_int32 const_val;
56417683Spst};
56517683Spst
56617683Spststruct vmapinfo *vmap;
56717683Spststruct valnode *vnode_base;
56817683Spststruct valnode *next_vnode;
56917683Spst
57017683Spststatic void
57117683Spstinit_val()
57217683Spst{
57317683Spst	curval = 0;
57417683Spst	next_vnode = vnode_base;
57517683Spst	memset((char *)vmap, 0, maxval * sizeof(*vmap));
57617683Spst	memset((char *)hashtbl, 0, sizeof hashtbl);
57717683Spst}
57817683Spst
57917683Spst/* Because we really don't have an IR, this stuff is a little messy. */
58017683Spststatic int
58117683SpstF(code, v0, v1)
58217683Spst	int code;
58317683Spst	int v0, v1;
58417683Spst{
58517683Spst	u_int hash;
58617683Spst	int val;
58717683Spst	struct valnode *p;
58817683Spst
58917683Spst	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
59017683Spst	hash %= MODULUS;
59117683Spst
59217683Spst	for (p = hashtbl[hash]; p; p = p->next)
59317683Spst		if (p->code == code && p->v0 == v0 && p->v1 == v1)
59417683Spst			return p->val;
59517683Spst
59617683Spst	val = ++curval;
59717683Spst	if (BPF_MODE(code) == BPF_IMM &&
59817683Spst	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
59917683Spst		vmap[val].const_val = v0;
60017683Spst		vmap[val].is_const = 1;
60117683Spst	}
60217683Spst	p = next_vnode++;
60317683Spst	p->val = val;
60417683Spst	p->code = code;
60517683Spst	p->v0 = v0;
60617683Spst	p->v1 = v1;
60717683Spst	p->next = hashtbl[hash];
60817683Spst	hashtbl[hash] = p;
60917683Spst
61017683Spst	return val;
61117683Spst}
61217683Spst
61317683Spststatic inline void
61417683Spstvstore(s, valp, newval, alter)
61517683Spst	struct stmt *s;
61617683Spst	int *valp;
61717683Spst	int newval;
61817683Spst	int alter;
61917683Spst{
62017683Spst	if (alter && *valp == newval)
62117683Spst		s->code = NOP;
62217683Spst	else
62317683Spst		*valp = newval;
62417683Spst}
62517683Spst
62617683Spststatic void
62717683Spstfold_op(s, v0, v1)
62817683Spst	struct stmt *s;
62917683Spst	int v0, v1;
63017683Spst{
631172677Smlaier	bpf_u_int32 a, b;
63217683Spst
63317683Spst	a = vmap[v0].const_val;
63417683Spst	b = vmap[v1].const_val;
63517683Spst
63617683Spst	switch (BPF_OP(s->code)) {
63717683Spst	case BPF_ADD:
63817683Spst		a += b;
63917683Spst		break;
64017683Spst
64117683Spst	case BPF_SUB:
64217683Spst		a -= b;
64317683Spst		break;
64417683Spst
64517683Spst	case BPF_MUL:
64617683Spst		a *= b;
64717683Spst		break;
64817683Spst
64917683Spst	case BPF_DIV:
65017683Spst		if (b == 0)
65117683Spst			bpf_error("division by zero");
65217683Spst		a /= b;
65317683Spst		break;
65417683Spst
65517683Spst	case BPF_AND:
65617683Spst		a &= b;
65717683Spst		break;
65817683Spst
65917683Spst	case BPF_OR:
66017683Spst		a |= b;
66117683Spst		break;
66217683Spst
66317683Spst	case BPF_LSH:
66417683Spst		a <<= b;
66517683Spst		break;
66617683Spst
66717683Spst	case BPF_RSH:
66817683Spst		a >>= b;
66917683Spst		break;
67017683Spst
67117683Spst	case BPF_NEG:
67217683Spst		a = -a;
67317683Spst		break;
67417683Spst
67517683Spst	default:
67617683Spst		abort();
67717683Spst	}
67817683Spst	s->k = a;
67917683Spst	s->code = BPF_LD|BPF_IMM;
68017683Spst	done = 0;
68117683Spst}
68217683Spst
68317683Spststatic inline struct slist *
68417683Spstthis_op(s)
68517683Spst	struct slist *s;
68617683Spst{
68717683Spst	while (s != 0 && s->s.code == NOP)
68817683Spst		s = s->next;
68917683Spst	return s;
69017683Spst}
69117683Spst
69217683Spststatic void
69317683Spstopt_not(b)
69417683Spst	struct block *b;
69517683Spst{
69617683Spst	struct block *tmp = JT(b);
69717683Spst
69817683Spst	JT(b) = JF(b);
69917683Spst	JF(b) = tmp;
70017683Spst}
70117683Spst
70217683Spststatic void
70317683Spstopt_peep(b)
70417683Spst	struct block *b;
70517683Spst{
70617683Spst	struct slist *s;
70717683Spst	struct slist *next, *last;
70817683Spst	int val;
70917683Spst
71017683Spst	s = b->stmts;
71117683Spst	if (s == 0)
71217683Spst		return;
71317683Spst
71417683Spst	last = s;
715146768Ssam	for (/*empty*/; /*empty*/; s = next) {
716146768Ssam		/*
717146768Ssam		 * Skip over nops.
718146768Ssam		 */
71917683Spst		s = this_op(s);
72017683Spst		if (s == 0)
721146768Ssam			break;	/* nothing left in the block */
722146768Ssam
723146768Ssam		/*
724146768Ssam		 * Find the next real instruction after that one
725146768Ssam		 * (skipping nops).
726146768Ssam		 */
72717683Spst		next = this_op(s->next);
72817683Spst		if (next == 0)
729146768Ssam			break;	/* no next instruction */
73017683Spst		last = next;
73117683Spst
73217683Spst		/*
73317683Spst		 * st  M[k]	-->	st  M[k]
73417683Spst		 * ldx M[k]		tax
73517683Spst		 */
73617683Spst		if (s->s.code == BPF_ST &&
73717683Spst		    next->s.code == (BPF_LDX|BPF_MEM) &&
73817683Spst		    s->s.k == next->s.k) {
73917683Spst			done = 0;
74017683Spst			next->s.code = BPF_MISC|BPF_TAX;
74117683Spst		}
74217683Spst		/*
74317683Spst		 * ld  #k	-->	ldx  #k
74417683Spst		 * tax			txa
74517683Spst		 */
74617683Spst		if (s->s.code == (BPF_LD|BPF_IMM) &&
74717683Spst		    next->s.code == (BPF_MISC|BPF_TAX)) {
74817683Spst			s->s.code = BPF_LDX|BPF_IMM;
74917683Spst			next->s.code = BPF_MISC|BPF_TXA;
75017683Spst			done = 0;
75117683Spst		}
75217683Spst		/*
75317683Spst		 * This is an ugly special case, but it happens
75417683Spst		 * when you say tcp[k] or udp[k] where k is a constant.
75517683Spst		 */
75617683Spst		if (s->s.code == (BPF_LD|BPF_IMM)) {
75717683Spst			struct slist *add, *tax, *ild;
75817683Spst
75917683Spst			/*
76017683Spst			 * Check that X isn't used on exit from this
76117683Spst			 * block (which the optimizer might cause).
76217683Spst			 * We know the code generator won't generate
76317683Spst			 * any local dependencies.
76417683Spst			 */
76517683Spst			if (ATOMELEM(b->out_use, X_ATOM))
766146768Ssam				continue;
76717683Spst
768146768Ssam			/*
769146768Ssam			 * Check that the instruction following the ldi
770146768Ssam			 * is an addx, or it's an ldxms with an addx
771146768Ssam			 * following it (with 0 or more nops between the
772146768Ssam			 * ldxms and addx).
773146768Ssam			 */
77417683Spst			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
77517683Spst				add = next;
77617683Spst			else
77717683Spst				add = this_op(next->next);
77817683Spst			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
779146768Ssam				continue;
78017683Spst
781146768Ssam			/*
782146768Ssam			 * Check that a tax follows that (with 0 or more
783146768Ssam			 * nops between them).
784146768Ssam			 */
78517683Spst			tax = this_op(add->next);
78617683Spst			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
787146768Ssam				continue;
78817683Spst
789146768Ssam			/*
790146768Ssam			 * Check that an ild follows that (with 0 or more
791146768Ssam			 * nops between them).
792146768Ssam			 */
79317683Spst			ild = this_op(tax->next);
79417683Spst			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
79517683Spst			    BPF_MODE(ild->s.code) != BPF_IND)
796146768Ssam				continue;
79717683Spst			/*
79817683Spst			 * We want to turn this sequence:
79917683Spst			 *
80017683Spst			 * (004) ldi     #0x2		{s}
80117683Spst			 * (005) ldxms   [14]		{next}  -- optional
80217683Spst			 * (006) addx			{add}
80317683Spst			 * (007) tax			{tax}
80417683Spst			 * (008) ild     [x+0]		{ild}
80517683Spst			 *
80617683Spst			 * into this sequence:
80717683Spst			 *
80817683Spst			 * (004) nop
80917683Spst			 * (005) ldxms   [14]
81017683Spst			 * (006) nop
81117683Spst			 * (007) nop
81217683Spst			 * (008) ild     [x+2]
81317683Spst			 *
814146768Ssam			 * XXX We need to check that X is not
815146768Ssam			 * subsequently used, because we want to change
816146768Ssam			 * what'll be in it after this sequence.
817146768Ssam			 *
818146768Ssam			 * We know we can eliminate the accumulator
819146768Ssam			 * modifications earlier in the sequence since
820146768Ssam			 * it is defined by the last stmt of this sequence
821146768Ssam			 * (i.e., the last statement of the sequence loads
822146768Ssam			 * a value into the accumulator, so we can eliminate
823146768Ssam			 * earlier operations on the accumulator).
82417683Spst			 */
82517683Spst			ild->s.k += s->s.k;
82617683Spst			s->s.code = NOP;
82717683Spst			add->s.code = NOP;
82817683Spst			tax->s.code = NOP;
82917683Spst			done = 0;
83017683Spst		}
83117683Spst	}
83217683Spst	/*
833146768Ssam	 * If the comparison at the end of a block is an equality
834146768Ssam	 * comparison against a constant, and nobody uses the value
835146768Ssam	 * we leave in the A register at the end of a block, and
836146768Ssam	 * the operation preceding the comparison is an arithmetic
837146768Ssam	 * operation, we can sometime optimize it away.
83817683Spst	 */
839146768Ssam	if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
840146768Ssam	    !ATOMELEM(b->out_use, A_ATOM)) {
841146768Ssam	    	/*
842146768Ssam	    	 * We can optimize away certain subtractions of the
843146768Ssam	    	 * X register.
844146768Ssam	    	 */
845146768Ssam		if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
846127664Sbms			val = b->val[X_ATOM];
847127664Sbms			if (vmap[val].is_const) {
848127664Sbms				/*
849146768Ssam				 * If we have a subtract to do a comparison,
850146768Ssam				 * and the X register is a known constant,
851146768Ssam				 * we can merge this value into the
852146768Ssam				 * comparison:
853146768Ssam				 *
854127664Sbms				 * sub x  ->	nop
855127664Sbms				 * jeq #y	jeq #(x+y)
856127664Sbms				 */
857127664Sbms				b->s.k += vmap[val].const_val;
858127664Sbms				last->s.code = NOP;
859127664Sbms				done = 0;
860127664Sbms			} else if (b->s.k == 0) {
861127664Sbms				/*
862146768Ssam				 * If the X register isn't a constant,
863146768Ssam				 * and the comparison in the test is
864146768Ssam				 * against 0, we can compare with the
865146768Ssam				 * X register, instead:
866146768Ssam				 *
867146768Ssam				 * sub x  ->	nop
868146768Ssam				 * jeq #0	jeq x
869127664Sbms				 */
870127664Sbms				last->s.code = NOP;
871146768Ssam				b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
872127664Sbms				done = 0;
873127664Sbms			}
874127664Sbms		}
875127664Sbms		/*
876146768Ssam		 * Likewise, a constant subtract can be simplified:
877146768Ssam		 *
878146768Ssam		 * sub #x ->	nop
879146768Ssam		 * jeq #y ->	jeq #(x+y)
880127664Sbms		 */
881146768Ssam		else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
88217683Spst			last->s.code = NOP;
883127664Sbms			b->s.k += last->s.k;
88417683Spst			done = 0;
88517683Spst		}
886146768Ssam		/*
887146768Ssam		 * And, similarly, a constant AND can be simplified
888146768Ssam		 * if we're testing against 0, i.e.:
889146768Ssam		 *
890146768Ssam		 * and #k	nop
891146768Ssam		 * jeq #0  ->	jset #k
892146768Ssam		 */
893146768Ssam		else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
894146768Ssam		    b->s.k == 0) {
895146768Ssam			b->s.k = last->s.k;
896146768Ssam			b->s.code = BPF_JMP|BPF_K|BPF_JSET;
897146768Ssam			last->s.code = NOP;
898146768Ssam			done = 0;
899146768Ssam			opt_not(b);
900146768Ssam		}
90117683Spst	}
90217683Spst	/*
903127664Sbms	 * jset #0        ->   never
904127664Sbms	 * jset #ffffffff ->   always
905127664Sbms	 */
906127664Sbms	if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
907127664Sbms		if (b->s.k == 0)
908127664Sbms			JT(b) = JF(b);
909127664Sbms		if (b->s.k == 0xffffffff)
910127664Sbms			JF(b) = JT(b);
911127664Sbms	}
912127664Sbms	/*
913190225Srpaulo	 * If we're comparing against the index register, and the index
914190225Srpaulo	 * register is a known constant, we can just compare against that
915190225Srpaulo	 * constant.
916190225Srpaulo	 */
917190225Srpaulo	val = b->val[X_ATOM];
918190225Srpaulo	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
919190225Srpaulo		bpf_int32 v = vmap[val].const_val;
920190225Srpaulo		b->s.code &= ~BPF_X;
921190225Srpaulo		b->s.k = v;
922190225Srpaulo	}
923190225Srpaulo	/*
92417683Spst	 * If the accumulator is a known constant, we can compute the
92517683Spst	 * comparison result.
92617683Spst	 */
92717683Spst	val = b->val[A_ATOM];
92817683Spst	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
92917683Spst		bpf_int32 v = vmap[val].const_val;
93017683Spst		switch (BPF_OP(b->s.code)) {
93117683Spst
93217683Spst		case BPF_JEQ:
93317683Spst			v = v == b->s.k;
93417683Spst			break;
93517683Spst
93617683Spst		case BPF_JGT:
93717683Spst			v = (unsigned)v > b->s.k;
93817683Spst			break;
93917683Spst
94017683Spst		case BPF_JGE:
94117683Spst			v = (unsigned)v >= b->s.k;
94217683Spst			break;
94317683Spst
94417683Spst		case BPF_JSET:
94517683Spst			v &= b->s.k;
94617683Spst			break;
94717683Spst
94817683Spst		default:
94917683Spst			abort();
95017683Spst		}
95117683Spst		if (JF(b) != JT(b))
95217683Spst			done = 0;
95317683Spst		if (v)
95417683Spst			JF(b) = JT(b);
95517683Spst		else
95617683Spst			JT(b) = JF(b);
95717683Spst	}
95817683Spst}
95917683Spst
96017683Spst/*
96117683Spst * Compute the symbolic value of expression of 's', and update
96217683Spst * anything it defines in the value table 'val'.  If 'alter' is true,
96317683Spst * do various optimizations.  This code would be cleaner if symbolic
96417683Spst * evaluation and code transformations weren't folded together.
96517683Spst */
96617683Spststatic void
96717683Spstopt_stmt(s, val, alter)
96817683Spst	struct stmt *s;
96917683Spst	int val[];
97017683Spst	int alter;
97117683Spst{
97217683Spst	int op;
97317683Spst	int v;
97417683Spst
97517683Spst	switch (s->code) {
97617683Spst
97717683Spst	case BPF_LD|BPF_ABS|BPF_W:
97817683Spst	case BPF_LD|BPF_ABS|BPF_H:
97917683Spst	case BPF_LD|BPF_ABS|BPF_B:
98017683Spst		v = F(s->code, s->k, 0L);
98117683Spst		vstore(s, &val[A_ATOM], v, alter);
98217683Spst		break;
98317683Spst
98417683Spst	case BPF_LD|BPF_IND|BPF_W:
98517683Spst	case BPF_LD|BPF_IND|BPF_H:
98617683Spst	case BPF_LD|BPF_IND|BPF_B:
98717683Spst		v = val[X_ATOM];
98817683Spst		if (alter && vmap[v].is_const) {
98917683Spst			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
99017683Spst			s->k += vmap[v].const_val;
99117683Spst			v = F(s->code, s->k, 0L);
99217683Spst			done = 0;
99317683Spst		}
99417683Spst		else
99517683Spst			v = F(s->code, s->k, v);
99617683Spst		vstore(s, &val[A_ATOM], v, alter);
99717683Spst		break;
99817683Spst
99917683Spst	case BPF_LD|BPF_LEN:
100017683Spst		v = F(s->code, 0L, 0L);
100117683Spst		vstore(s, &val[A_ATOM], v, alter);
100217683Spst		break;
100317683Spst
100417683Spst	case BPF_LD|BPF_IMM:
100517683Spst		v = K(s->k);
100617683Spst		vstore(s, &val[A_ATOM], v, alter);
100717683Spst		break;
100817683Spst
100917683Spst	case BPF_LDX|BPF_IMM:
101017683Spst		v = K(s->k);
101117683Spst		vstore(s, &val[X_ATOM], v, alter);
101217683Spst		break;
101317683Spst
101417683Spst	case BPF_LDX|BPF_MSH|BPF_B:
101517683Spst		v = F(s->code, s->k, 0L);
101617683Spst		vstore(s, &val[X_ATOM], v, alter);
101717683Spst		break;
101817683Spst
101917683Spst	case BPF_ALU|BPF_NEG:
102017683Spst		if (alter && vmap[val[A_ATOM]].is_const) {
102117683Spst			s->code = BPF_LD|BPF_IMM;
102217683Spst			s->k = -vmap[val[A_ATOM]].const_val;
102317683Spst			val[A_ATOM] = K(s->k);
102417683Spst		}
102517683Spst		else
102617683Spst			val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
102717683Spst		break;
102817683Spst
102917683Spst	case BPF_ALU|BPF_ADD|BPF_K:
103017683Spst	case BPF_ALU|BPF_SUB|BPF_K:
103117683Spst	case BPF_ALU|BPF_MUL|BPF_K:
103217683Spst	case BPF_ALU|BPF_DIV|BPF_K:
103317683Spst	case BPF_ALU|BPF_AND|BPF_K:
103417683Spst	case BPF_ALU|BPF_OR|BPF_K:
103517683Spst	case BPF_ALU|BPF_LSH|BPF_K:
103617683Spst	case BPF_ALU|BPF_RSH|BPF_K:
103717683Spst		op = BPF_OP(s->code);
103817683Spst		if (alter) {
103917683Spst			if (s->k == 0) {
104098530Sfenner				/* don't optimize away "sub #0"
104198530Sfenner				 * as it may be needed later to
104298530Sfenner				 * fixup the generated math code */
104398530Sfenner				if (op == BPF_ADD ||
104417683Spst				    op == BPF_LSH || op == BPF_RSH ||
104517683Spst				    op == BPF_OR) {
104617683Spst					s->code = NOP;
104717683Spst					break;
104817683Spst				}
104917683Spst				if (op == BPF_MUL || op == BPF_AND) {
105017683Spst					s->code = BPF_LD|BPF_IMM;
105117683Spst					val[A_ATOM] = K(s->k);
105217683Spst					break;
105317683Spst				}
105417683Spst			}
105517683Spst			if (vmap[val[A_ATOM]].is_const) {
105617683Spst				fold_op(s, val[A_ATOM], K(s->k));
105717683Spst				val[A_ATOM] = K(s->k);
105817683Spst				break;
105917683Spst			}
106017683Spst		}
106117683Spst		val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
106217683Spst		break;
106317683Spst
106417683Spst	case BPF_ALU|BPF_ADD|BPF_X:
106517683Spst	case BPF_ALU|BPF_SUB|BPF_X:
106617683Spst	case BPF_ALU|BPF_MUL|BPF_X:
106717683Spst	case BPF_ALU|BPF_DIV|BPF_X:
106817683Spst	case BPF_ALU|BPF_AND|BPF_X:
106917683Spst	case BPF_ALU|BPF_OR|BPF_X:
107017683Spst	case BPF_ALU|BPF_LSH|BPF_X:
107117683Spst	case BPF_ALU|BPF_RSH|BPF_X:
107217683Spst		op = BPF_OP(s->code);
107317683Spst		if (alter && vmap[val[X_ATOM]].is_const) {
107417683Spst			if (vmap[val[A_ATOM]].is_const) {
107517683Spst				fold_op(s, val[A_ATOM], val[X_ATOM]);
107617683Spst				val[A_ATOM] = K(s->k);
107717683Spst			}
107817683Spst			else {
107917683Spst				s->code = BPF_ALU|BPF_K|op;
108017683Spst				s->k = vmap[val[X_ATOM]].const_val;
108117683Spst				done = 0;
108217683Spst				val[A_ATOM] =
108317683Spst					F(s->code, val[A_ATOM], K(s->k));
108417683Spst			}
108517683Spst			break;
108617683Spst		}
108717683Spst		/*
108817683Spst		 * Check if we're doing something to an accumulator
108917683Spst		 * that is 0, and simplify.  This may not seem like
109017683Spst		 * much of a simplification but it could open up further
109117683Spst		 * optimizations.
1092127664Sbms		 * XXX We could also check for mul by 1, etc.
109317683Spst		 */
109417683Spst		if (alter && vmap[val[A_ATOM]].is_const
109517683Spst		    && vmap[val[A_ATOM]].const_val == 0) {
1096127664Sbms			if (op == BPF_ADD || op == BPF_OR) {
109717683Spst				s->code = BPF_MISC|BPF_TXA;
109817683Spst				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
109917683Spst				break;
110017683Spst			}
110117683Spst			else if (op == BPF_MUL || op == BPF_DIV ||
1102127664Sbms				 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
110317683Spst				s->code = BPF_LD|BPF_IMM;
110417683Spst				s->k = 0;
110517683Spst				vstore(s, &val[A_ATOM], K(s->k), alter);
110617683Spst				break;
110717683Spst			}
110817683Spst			else if (op == BPF_NEG) {
110917683Spst				s->code = NOP;
111017683Spst				break;
111117683Spst			}
111217683Spst		}
111317683Spst		val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
111417683Spst		break;
111517683Spst
111617683Spst	case BPF_MISC|BPF_TXA:
111717683Spst		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
111817683Spst		break;
111917683Spst
112017683Spst	case BPF_LD|BPF_MEM:
112117683Spst		v = val[s->k];
112217683Spst		if (alter && vmap[v].is_const) {
112317683Spst			s->code = BPF_LD|BPF_IMM;
112417683Spst			s->k = vmap[v].const_val;
112517683Spst			done = 0;
112617683Spst		}
112717683Spst		vstore(s, &val[A_ATOM], v, alter);
112817683Spst		break;
112917683Spst
113017683Spst	case BPF_MISC|BPF_TAX:
113117683Spst		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
113217683Spst		break;
113317683Spst
113417683Spst	case BPF_LDX|BPF_MEM:
113517683Spst		v = val[s->k];
113617683Spst		if (alter && vmap[v].is_const) {
113717683Spst			s->code = BPF_LDX|BPF_IMM;
113817683Spst			s->k = vmap[v].const_val;
113917683Spst			done = 0;
114017683Spst		}
114117683Spst		vstore(s, &val[X_ATOM], v, alter);
114217683Spst		break;
114317683Spst
114417683Spst	case BPF_ST:
114517683Spst		vstore(s, &val[s->k], val[A_ATOM], alter);
114617683Spst		break;
114717683Spst
114817683Spst	case BPF_STX:
114917683Spst		vstore(s, &val[s->k], val[X_ATOM], alter);
115017683Spst		break;
115117683Spst	}
115217683Spst}
115317683Spst
115417683Spststatic void
115517683Spstdeadstmt(s, last)
115617683Spst	register struct stmt *s;
115717683Spst	register struct stmt *last[];
115817683Spst{
115917683Spst	register int atom;
116017683Spst
116117683Spst	atom = atomuse(s);
116217683Spst	if (atom >= 0) {
116317683Spst		if (atom == AX_ATOM) {
116417683Spst			last[X_ATOM] = 0;
116517683Spst			last[A_ATOM] = 0;
116617683Spst		}
116717683Spst		else
116817683Spst			last[atom] = 0;
116917683Spst	}
117017683Spst	atom = atomdef(s);
117117683Spst	if (atom >= 0) {
117217683Spst		if (last[atom]) {
117317683Spst			done = 0;
117417683Spst			last[atom]->code = NOP;
117517683Spst		}
117617683Spst		last[atom] = s;
117717683Spst	}
117817683Spst}
117917683Spst
118017683Spststatic void
118117683Spstopt_deadstores(b)
118217683Spst	register struct block *b;
118317683Spst{
118417683Spst	register struct slist *s;
118517683Spst	register int atom;
118617683Spst	struct stmt *last[N_ATOMS];
118717683Spst
118817683Spst	memset((char *)last, 0, sizeof last);
118917683Spst
119017683Spst	for (s = b->stmts; s != 0; s = s->next)
119117683Spst		deadstmt(&s->s, last);
119217683Spst	deadstmt(&b->s, last);
119317683Spst
119417683Spst	for (atom = 0; atom < N_ATOMS; ++atom)
119517683Spst		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
119617683Spst			last[atom]->code = NOP;
119717683Spst			done = 0;
119817683Spst		}
119917683Spst}
120017683Spst
120117683Spststatic void
120217683Spstopt_blk(b, do_stmts)
120317683Spst	struct block *b;
120417683Spst	int do_stmts;
120517683Spst{
120617683Spst	struct slist *s;
120717683Spst	struct edge *p;
120817683Spst	int i;
1209146768Ssam	bpf_int32 aval, xval;
121017683Spst
121156889Sfenner#if 0
121256889Sfenner	for (s = b->stmts; s && s->next; s = s->next)
121356889Sfenner		if (BPF_CLASS(s->s.code) == BPF_JMP) {
121456889Sfenner			do_stmts = 0;
121556889Sfenner			break;
121656889Sfenner		}
121756889Sfenner#endif
121856889Sfenner
121917683Spst	/*
122017683Spst	 * Initialize the atom values.
122117683Spst	 */
122217683Spst	p = b->in_edges;
1223146768Ssam	if (p == 0) {
1224146768Ssam		/*
1225146768Ssam		 * We have no predecessors, so everything is undefined
1226146768Ssam		 * upon entry to this block.
1227146768Ssam		 */
122817683Spst		memset((char *)b->val, 0, sizeof(b->val));
1229146768Ssam	} else {
1230146768Ssam		/*
1231146768Ssam		 * Inherit values from our predecessors.
1232146768Ssam		 *
1233146768Ssam		 * First, get the values from the predecessor along the
1234146768Ssam		 * first edge leading to this node.
1235146768Ssam		 */
123617683Spst		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1237146768Ssam		/*
1238146768Ssam		 * Now look at all the other nodes leading to this node.
1239146768Ssam		 * If, for the predecessor along that edge, a register
1240146768Ssam		 * has a different value from the one we have (i.e.,
1241146768Ssam		 * control paths are merging, and the merging paths
1242146768Ssam		 * assign different values to that register), give the
1243146768Ssam		 * register the undefined value of 0.
1244146768Ssam		 */
124517683Spst		while ((p = p->next) != NULL) {
124617683Spst			for (i = 0; i < N_ATOMS; ++i)
124717683Spst				if (b->val[i] != p->pred->val[i])
124817683Spst					b->val[i] = 0;
124917683Spst		}
125017683Spst	}
125117683Spst	aval = b->val[A_ATOM];
1252146768Ssam	xval = b->val[X_ATOM];
125317683Spst	for (s = b->stmts; s; s = s->next)
125417683Spst		opt_stmt(&s->s, b->val, do_stmts);
125517683Spst
125617683Spst	/*
125717683Spst	 * This is a special case: if we don't use anything from this
1258146768Ssam	 * block, and we load the accumulator or index register with a
1259146768Ssam	 * value that is already there, or if this block is a return,
126017683Spst	 * eliminate all the statements.
1261146768Ssam	 *
1262146768Ssam	 * XXX - what if it does a store?
1263146768Ssam	 *
1264146768Ssam	 * XXX - why does it matter whether we use anything from this
1265146768Ssam	 * block?  If the accumulator or index register doesn't change
1266146768Ssam	 * its value, isn't that OK even if we use that value?
1267146768Ssam	 *
1268146768Ssam	 * XXX - if we load the accumulator with a different value,
1269146768Ssam	 * and the block ends with a conditional branch, we obviously
1270146768Ssam	 * can't eliminate it, as the branch depends on that value.
1271146768Ssam	 * For the index register, the conditional branch only depends
1272146768Ssam	 * on the index register value if the test is against the index
1273146768Ssam	 * register value rather than a constant; if nothing uses the
1274146768Ssam	 * value we put into the index register, and we're not testing
1275146768Ssam	 * against the index register's value, and there aren't any
1276146768Ssam	 * other problems that would keep us from eliminating this
1277146768Ssam	 * block, can we eliminate it?
127817683Spst	 */
1279127664Sbms	if (do_stmts &&
1280146768Ssam	    ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1281146768Ssam	      xval != 0 && b->val[X_ATOM] == xval) ||
128217683Spst	     BPF_CLASS(b->s.code) == BPF_RET)) {
128317683Spst		if (b->stmts != 0) {
128417683Spst			b->stmts = 0;
128517683Spst			done = 0;
128617683Spst		}
128717683Spst	} else {
128817683Spst		opt_peep(b);
128917683Spst		opt_deadstores(b);
129017683Spst	}
129117683Spst	/*
129217683Spst	 * Set up values for branch optimizer.
129317683Spst	 */
129417683Spst	if (BPF_SRC(b->s.code) == BPF_K)
129517683Spst		b->oval = K(b->s.k);
129617683Spst	else
129717683Spst		b->oval = b->val[X_ATOM];
129817683Spst	b->et.code = b->s.code;
129917683Spst	b->ef.code = -b->s.code;
130017683Spst}
130117683Spst
130217683Spst/*
130317683Spst * Return true if any register that is used on exit from 'succ', has
130417683Spst * an exit value that is different from the corresponding exit value
130517683Spst * from 'b'.
130617683Spst */
130717683Spststatic int
130817683Spstuse_conflict(b, succ)
130917683Spst	struct block *b, *succ;
131017683Spst{
131117683Spst	int atom;
131217683Spst	atomset use = succ->out_use;
131317683Spst
131417683Spst	if (use == 0)
131517683Spst		return 0;
131617683Spst
131717683Spst	for (atom = 0; atom < N_ATOMS; ++atom)
131817683Spst		if (ATOMELEM(use, atom))
131917683Spst			if (b->val[atom] != succ->val[atom])
132017683Spst				return 1;
132117683Spst	return 0;
132217683Spst}
132317683Spst
132417683Spststatic struct block *
132517683Spstfold_edge(child, ep)
132617683Spst	struct block *child;
132717683Spst	struct edge *ep;
132817683Spst{
132917683Spst	int sense;
133017683Spst	int aval0, aval1, oval0, oval1;
133117683Spst	int code = ep->code;
133217683Spst
133317683Spst	if (code < 0) {
133417683Spst		code = -code;
133517683Spst		sense = 0;
133617683Spst	} else
133717683Spst		sense = 1;
133817683Spst
133917683Spst	if (child->s.code != code)
134017683Spst		return 0;
134117683Spst
134217683Spst	aval0 = child->val[A_ATOM];
134317683Spst	oval0 = child->oval;
134417683Spst	aval1 = ep->pred->val[A_ATOM];
134517683Spst	oval1 = ep->pred->oval;
134617683Spst
134717683Spst	if (aval0 != aval1)
134817683Spst		return 0;
134917683Spst
135017683Spst	if (oval0 == oval1)
135117683Spst		/*
1352146768Ssam		 * The operands of the branch instructions are
1353146768Ssam		 * identical, so the result is true if a true
1354146768Ssam		 * branch was taken to get here, otherwise false.
135517683Spst		 */
135617683Spst		return sense ? JT(child) : JF(child);
135717683Spst
135817683Spst	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
135917683Spst		/*
136017683Spst		 * At this point, we only know the comparison if we
136117683Spst		 * came down the true branch, and it was an equality
1362146768Ssam		 * comparison with a constant.
1363146768Ssam		 *
1364146768Ssam		 * I.e., if we came down the true branch, and the branch
1365146768Ssam		 * was an equality comparison with a constant, we know the
1366146768Ssam		 * accumulator contains that constant.  If we came down
1367146768Ssam		 * the false branch, or the comparison wasn't with a
1368146768Ssam		 * constant, we don't know what was in the accumulator.
1369146768Ssam		 *
1370146768Ssam		 * We rely on the fact that distinct constants have distinct
1371146768Ssam		 * value numbers.
137217683Spst		 */
137317683Spst		return JF(child);
137417683Spst
137517683Spst	return 0;
137617683Spst}
137717683Spst
137817683Spststatic void
137917683Spstopt_j(ep)
138017683Spst	struct edge *ep;
138117683Spst{
138217683Spst	register int i, k;
138317683Spst	register struct block *target;
138417683Spst
138517683Spst	if (JT(ep->succ) == 0)
138617683Spst		return;
138717683Spst
138817683Spst	if (JT(ep->succ) == JF(ep->succ)) {
138917683Spst		/*
139017683Spst		 * Common branch targets can be eliminated, provided
139117683Spst		 * there is no data dependency.
139217683Spst		 */
139317683Spst		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
139417683Spst			done = 0;
139517683Spst			ep->succ = JT(ep->succ);
139617683Spst		}
139717683Spst	}
139817683Spst	/*
139917683Spst	 * For each edge dominator that matches the successor of this
140017683Spst	 * edge, promote the edge successor to the its grandchild.
140117683Spst	 *
140217683Spst	 * XXX We violate the set abstraction here in favor a reasonably
140317683Spst	 * efficient loop.
140417683Spst	 */
140517683Spst top:
140617683Spst	for (i = 0; i < edgewords; ++i) {
140717683Spst		register bpf_u_int32 x = ep->edom[i];
140817683Spst
140917683Spst		while (x != 0) {
141017683Spst			k = ffs(x) - 1;
141117683Spst			x &=~ (1 << k);
141217683Spst			k += i * BITS_PER_WORD;
141317683Spst
141417683Spst			target = fold_edge(ep->succ, edges[k]);
141517683Spst			/*
141617683Spst			 * Check that there is no data dependency between
141717683Spst			 * nodes that will be violated if we move the edge.
141817683Spst			 */
141917683Spst			if (target != 0 && !use_conflict(ep->pred, target)) {
142017683Spst				done = 0;
142117683Spst				ep->succ = target;
142217683Spst				if (JT(target) != 0)
142317683Spst					/*
142417683Spst					 * Start over unless we hit a leaf.
142517683Spst					 */
142617683Spst					goto top;
142717683Spst				return;
142817683Spst			}
142917683Spst		}
143017683Spst	}
143117683Spst}
143217683Spst
143317683Spst
143417683Spststatic void
143517683Spstor_pullup(b)
143617683Spst	struct block *b;
143717683Spst{
143817683Spst	int val, at_top;
143917683Spst	struct block *pull;
144017683Spst	struct block **diffp, **samep;
144117683Spst	struct edge *ep;
144217683Spst
144317683Spst	ep = b->in_edges;
144417683Spst	if (ep == 0)
144517683Spst		return;
144617683Spst
144717683Spst	/*
144817683Spst	 * Make sure each predecessor loads the same value.
144917683Spst	 * XXX why?
145017683Spst	 */
145117683Spst	val = ep->pred->val[A_ATOM];
145217683Spst	for (ep = ep->next; ep != 0; ep = ep->next)
145317683Spst		if (val != ep->pred->val[A_ATOM])
145417683Spst			return;
145517683Spst
145617683Spst	if (JT(b->in_edges->pred) == b)
145717683Spst		diffp = &JT(b->in_edges->pred);
145817683Spst	else
145917683Spst		diffp = &JF(b->in_edges->pred);
146017683Spst
146117683Spst	at_top = 1;
146217683Spst	while (1) {
146317683Spst		if (*diffp == 0)
146417683Spst			return;
146517683Spst
146617683Spst		if (JT(*diffp) != JT(b))
146717683Spst			return;
146817683Spst
146917683Spst		if (!SET_MEMBER((*diffp)->dom, b->id))
147017683Spst			return;
147117683Spst
147217683Spst		if ((*diffp)->val[A_ATOM] != val)
147317683Spst			break;
147417683Spst
147517683Spst		diffp = &JF(*diffp);
147617683Spst		at_top = 0;
147717683Spst	}
147817683Spst	samep = &JF(*diffp);
147917683Spst	while (1) {
148017683Spst		if (*samep == 0)
148117683Spst			return;
148217683Spst
148317683Spst		if (JT(*samep) != JT(b))
148417683Spst			return;
148517683Spst
148617683Spst		if (!SET_MEMBER((*samep)->dom, b->id))
148717683Spst			return;
148817683Spst
148917683Spst		if ((*samep)->val[A_ATOM] == val)
149017683Spst			break;
149117683Spst
149217683Spst		/* XXX Need to check that there are no data dependencies
149317683Spst		   between dp0 and dp1.  Currently, the code generator
149417683Spst		   will not produce such dependencies. */
149517683Spst		samep = &JF(*samep);
149617683Spst	}
149717683Spst#ifdef notdef
149817683Spst	/* XXX This doesn't cover everything. */
149917683Spst	for (i = 0; i < N_ATOMS; ++i)
150017683Spst		if ((*samep)->val[i] != pred->val[i])
150117683Spst			return;
150217683Spst#endif
150317683Spst	/* Pull up the node. */
150417683Spst	pull = *samep;
150517683Spst	*samep = JF(pull);
150617683Spst	JF(pull) = *diffp;
150717683Spst
150817683Spst	/*
150917683Spst	 * At the top of the chain, each predecessor needs to point at the
151017683Spst	 * pulled up node.  Inside the chain, there is only one predecessor
151117683Spst	 * to worry about.
151217683Spst	 */
151317683Spst	if (at_top) {
151417683Spst		for (ep = b->in_edges; ep != 0; ep = ep->next) {
151517683Spst			if (JT(ep->pred) == b)
151617683Spst				JT(ep->pred) = pull;
151717683Spst			else
151817683Spst				JF(ep->pred) = pull;
151917683Spst		}
152017683Spst	}
152117683Spst	else
152217683Spst		*diffp = pull;
152317683Spst
152417683Spst	done = 0;
152517683Spst}
152617683Spst
152717683Spststatic void
152817683Spstand_pullup(b)
152917683Spst	struct block *b;
153017683Spst{
153117683Spst	int val, at_top;
153217683Spst	struct block *pull;
153317683Spst	struct block **diffp, **samep;
153417683Spst	struct edge *ep;
153517683Spst
153617683Spst	ep = b->in_edges;
153717683Spst	if (ep == 0)
153817683Spst		return;
153917683Spst
154017683Spst	/*
154117683Spst	 * Make sure each predecessor loads the same value.
154217683Spst	 */
154317683Spst	val = ep->pred->val[A_ATOM];
154417683Spst	for (ep = ep->next; ep != 0; ep = ep->next)
154517683Spst		if (val != ep->pred->val[A_ATOM])
154617683Spst			return;
154717683Spst
154817683Spst	if (JT(b->in_edges->pred) == b)
154917683Spst		diffp = &JT(b->in_edges->pred);
155017683Spst	else
155117683Spst		diffp = &JF(b->in_edges->pred);
155217683Spst
155317683Spst	at_top = 1;
155417683Spst	while (1) {
155517683Spst		if (*diffp == 0)
155617683Spst			return;
155717683Spst
155817683Spst		if (JF(*diffp) != JF(b))
155917683Spst			return;
156017683Spst
156117683Spst		if (!SET_MEMBER((*diffp)->dom, b->id))
156217683Spst			return;
156317683Spst
156417683Spst		if ((*diffp)->val[A_ATOM] != val)
156517683Spst			break;
156617683Spst
156717683Spst		diffp = &JT(*diffp);
156817683Spst		at_top = 0;
156917683Spst	}
157017683Spst	samep = &JT(*diffp);
157117683Spst	while (1) {
157217683Spst		if (*samep == 0)
157317683Spst			return;
157417683Spst
157517683Spst		if (JF(*samep) != JF(b))
157617683Spst			return;
157717683Spst
157817683Spst		if (!SET_MEMBER((*samep)->dom, b->id))
157917683Spst			return;
158017683Spst
158117683Spst		if ((*samep)->val[A_ATOM] == val)
158217683Spst			break;
158317683Spst
158417683Spst		/* XXX Need to check that there are no data dependencies
158517683Spst		   between diffp and samep.  Currently, the code generator
158617683Spst		   will not produce such dependencies. */
158717683Spst		samep = &JT(*samep);
158817683Spst	}
158917683Spst#ifdef notdef
159017683Spst	/* XXX This doesn't cover everything. */
159117683Spst	for (i = 0; i < N_ATOMS; ++i)
159217683Spst		if ((*samep)->val[i] != pred->val[i])
159317683Spst			return;
159417683Spst#endif
159517683Spst	/* Pull up the node. */
159617683Spst	pull = *samep;
159717683Spst	*samep = JT(pull);
159817683Spst	JT(pull) = *diffp;
159917683Spst
160017683Spst	/*
160117683Spst	 * At the top of the chain, each predecessor needs to point at the
160217683Spst	 * pulled up node.  Inside the chain, there is only one predecessor
160317683Spst	 * to worry about.
160417683Spst	 */
160517683Spst	if (at_top) {
160617683Spst		for (ep = b->in_edges; ep != 0; ep = ep->next) {
160717683Spst			if (JT(ep->pred) == b)
160817683Spst				JT(ep->pred) = pull;
160917683Spst			else
161017683Spst				JF(ep->pred) = pull;
161117683Spst		}
161217683Spst	}
161317683Spst	else
161417683Spst		*diffp = pull;
161517683Spst
161617683Spst	done = 0;
161717683Spst}
161817683Spst
161917683Spststatic void
162017683Spstopt_blks(root, do_stmts)
162117683Spst	struct block *root;
162217683Spst	int do_stmts;
162317683Spst{
162417683Spst	int i, maxlevel;
162517683Spst	struct block *p;
162617683Spst
162717683Spst	init_val();
162817683Spst	maxlevel = root->level;
162975107Sfenner
163075107Sfenner	find_inedges(root);
163117683Spst	for (i = maxlevel; i >= 0; --i)
163217683Spst		for (p = levels[i]; p; p = p->link)
163317683Spst			opt_blk(p, do_stmts);
163417683Spst
163517683Spst	if (do_stmts)
163617683Spst		/*
163717683Spst		 * No point trying to move branches; it can't possibly
163817683Spst		 * make a difference at this point.
163917683Spst		 */
164017683Spst		return;
164117683Spst
164217683Spst	for (i = 1; i <= maxlevel; ++i) {
164317683Spst		for (p = levels[i]; p; p = p->link) {
164417683Spst			opt_j(&p->et);
164517683Spst			opt_j(&p->ef);
164617683Spst		}
164717683Spst	}
164875107Sfenner
164975107Sfenner	find_inedges(root);
165017683Spst	for (i = 1; i <= maxlevel; ++i) {
165117683Spst		for (p = levels[i]; p; p = p->link) {
165217683Spst			or_pullup(p);
165317683Spst			and_pullup(p);
165417683Spst		}
165517683Spst	}
165617683Spst}
165717683Spst
165817683Spststatic inline void
165917683Spstlink_inedge(parent, child)
166017683Spst	struct edge *parent;
166117683Spst	struct block *child;
166217683Spst{
166317683Spst	parent->next = child->in_edges;
166417683Spst	child->in_edges = parent;
166517683Spst}
166617683Spst
166717683Spststatic void
166817683Spstfind_inedges(root)
166917683Spst	struct block *root;
167017683Spst{
167117683Spst	int i;
167217683Spst	struct block *b;
167317683Spst
167417683Spst	for (i = 0; i < n_blocks; ++i)
167517683Spst		blocks[i]->in_edges = 0;
167617683Spst
167717683Spst	/*
167817683Spst	 * Traverse the graph, adding each edge to the predecessor
167917683Spst	 * list of its successors.  Skip the leaves (i.e. level 0).
168017683Spst	 */
168117683Spst	for (i = root->level; i > 0; --i) {
168217683Spst		for (b = levels[i]; b != 0; b = b->link) {
168317683Spst			link_inedge(&b->et, JT(b));
168417683Spst			link_inedge(&b->ef, JF(b));
168517683Spst		}
168617683Spst	}
168717683Spst}
168817683Spst
168917683Spststatic void
169017683Spstopt_root(b)
169117683Spst	struct block **b;
169217683Spst{
169317683Spst	struct slist *tmp, *s;
169417683Spst
169517683Spst	s = (*b)->stmts;
169617683Spst	(*b)->stmts = 0;
169717683Spst	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
169817683Spst		*b = JT(*b);
169917683Spst
170017683Spst	tmp = (*b)->stmts;
170117683Spst	if (tmp != 0)
170217683Spst		sappend(s, tmp);
170317683Spst	(*b)->stmts = s;
170417683Spst
170517683Spst	/*
170617683Spst	 * If the root node is a return, then there is no
170717683Spst	 * point executing any statements (since the bpf machine
170817683Spst	 * has no side effects).
170917683Spst	 */
171017683Spst	if (BPF_CLASS((*b)->s.code) == BPF_RET)
171117683Spst		(*b)->stmts = 0;
171217683Spst}
171317683Spst
171417683Spststatic void
171517683Spstopt_loop(root, do_stmts)
171617683Spst	struct block *root;
171717683Spst	int do_stmts;
171817683Spst{
171917683Spst
172017683Spst#ifdef BDEBUG
172198530Sfenner	if (dflag > 1) {
172298530Sfenner		printf("opt_loop(root, %d) begin\n", do_stmts);
172317683Spst		opt_dump(root);
172498530Sfenner	}
172517683Spst#endif
172617683Spst	do {
172717683Spst		done = 1;
172817683Spst		find_levels(root);
172917683Spst		find_dom(root);
173017683Spst		find_closure(root);
173117683Spst		find_ud(root);
173217683Spst		find_edom(root);
173317683Spst		opt_blks(root, do_stmts);
173417683Spst#ifdef BDEBUG
173598530Sfenner		if (dflag > 1) {
173698530Sfenner			printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
173717683Spst			opt_dump(root);
173898530Sfenner		}
173917683Spst#endif
174017683Spst	} while (!done);
174117683Spst}
174217683Spst
174317683Spst/*
174417683Spst * Optimize the filter code in its dag representation.
174517683Spst */
174617683Spstvoid
174717683Spstbpf_optimize(rootp)
174817683Spst	struct block **rootp;
174917683Spst{
175017683Spst	struct block *root;
175117683Spst
175217683Spst	root = *rootp;
175317683Spst
175417683Spst	opt_init(root);
175517683Spst	opt_loop(root, 0);
175617683Spst	opt_loop(root, 1);
175717683Spst	intern_blocks(root);
175898530Sfenner#ifdef BDEBUG
175998530Sfenner	if (dflag > 1) {
176098530Sfenner		printf("after intern_blocks()\n");
176198530Sfenner		opt_dump(root);
176298530Sfenner	}
176398530Sfenner#endif
176417683Spst	opt_root(rootp);
176598530Sfenner#ifdef BDEBUG
176698530Sfenner	if (dflag > 1) {
176798530Sfenner		printf("after opt_root()\n");
176898530Sfenner		opt_dump(root);
176998530Sfenner	}
177098530Sfenner#endif
177117683Spst	opt_cleanup();
177217683Spst}
177317683Spst
177417683Spststatic void
177517683Spstmake_marks(p)
177617683Spst	struct block *p;
177717683Spst{
177817683Spst	if (!isMarked(p)) {
177917683Spst		Mark(p);
178017683Spst		if (BPF_CLASS(p->s.code) != BPF_RET) {
178117683Spst			make_marks(JT(p));
178217683Spst			make_marks(JF(p));
178317683Spst		}
178417683Spst	}
178517683Spst}
178617683Spst
178717683Spst/*
178817683Spst * Mark code array such that isMarked(i) is true
178917683Spst * only for nodes that are alive.
179017683Spst */
179117683Spststatic void
179217683Spstmark_code(p)
179317683Spst	struct block *p;
179417683Spst{
179517683Spst	cur_mark += 1;
179617683Spst	make_marks(p);
179717683Spst}
179817683Spst
179917683Spst/*
180017683Spst * True iff the two stmt lists load the same value from the packet into
180117683Spst * the accumulator.
180217683Spst */
180317683Spststatic int
180417683Spsteq_slist(x, y)
180517683Spst	struct slist *x, *y;
180617683Spst{
180717683Spst	while (1) {
180817683Spst		while (x && x->s.code == NOP)
180917683Spst			x = x->next;
181017683Spst		while (y && y->s.code == NOP)
181117683Spst			y = y->next;
181217683Spst		if (x == 0)
181317683Spst			return y == 0;
181417683Spst		if (y == 0)
181517683Spst			return x == 0;
181617683Spst		if (x->s.code != y->s.code || x->s.k != y->s.k)
181717683Spst			return 0;
181817683Spst		x = x->next;
181917683Spst		y = y->next;
182017683Spst	}
182117683Spst}
182217683Spst
182317683Spststatic inline int
182417683Spsteq_blk(b0, b1)
182517683Spst	struct block *b0, *b1;
182617683Spst{
182717683Spst	if (b0->s.code == b1->s.code &&
182817683Spst	    b0->s.k == b1->s.k &&
182917683Spst	    b0->et.succ == b1->et.succ &&
183017683Spst	    b0->ef.succ == b1->ef.succ)
183117683Spst		return eq_slist(b0->stmts, b1->stmts);
183217683Spst	return 0;
183317683Spst}
183417683Spst
183517683Spststatic void
183617683Spstintern_blocks(root)
183717683Spst	struct block *root;
183817683Spst{
183917683Spst	struct block *p;
184017683Spst	int i, j;
1841172677Smlaier	int done1; /* don't shadow global */
184217683Spst top:
1843172677Smlaier	done1 = 1;
184417683Spst	for (i = 0; i < n_blocks; ++i)
184517683Spst		blocks[i]->link = 0;
184617683Spst
184717683Spst	mark_code(root);
184817683Spst
184917683Spst	for (i = n_blocks - 1; --i >= 0; ) {
185017683Spst		if (!isMarked(blocks[i]))
185117683Spst			continue;
185217683Spst		for (j = i + 1; j < n_blocks; ++j) {
185317683Spst			if (!isMarked(blocks[j]))
185417683Spst				continue;
185517683Spst			if (eq_blk(blocks[i], blocks[j])) {
185617683Spst				blocks[i]->link = blocks[j]->link ?
185717683Spst					blocks[j]->link : blocks[j];
185817683Spst				break;
185917683Spst			}
186017683Spst		}
186117683Spst	}
186217683Spst	for (i = 0; i < n_blocks; ++i) {
186317683Spst		p = blocks[i];
186417683Spst		if (JT(p) == 0)
186517683Spst			continue;
186617683Spst		if (JT(p)->link) {
1867172677Smlaier			done1 = 0;
186817683Spst			JT(p) = JT(p)->link;
186917683Spst		}
187017683Spst		if (JF(p)->link) {
1871172677Smlaier			done1 = 0;
187217683Spst			JF(p) = JF(p)->link;
187317683Spst		}
187417683Spst	}
1875172677Smlaier	if (!done1)
187617683Spst		goto top;
187717683Spst}
187817683Spst
187917683Spststatic void
188017683Spstopt_cleanup()
188117683Spst{
188217683Spst	free((void *)vnode_base);
188317683Spst	free((void *)vmap);
188417683Spst	free((void *)edges);
188517683Spst	free((void *)space);
188617683Spst	free((void *)levels);
188717683Spst	free((void *)blocks);
188817683Spst}
188917683Spst
189017683Spst/*
189117683Spst * Return the number of stmts in 's'.
189217683Spst */
189317683Spststatic int
189417683Spstslength(s)
189517683Spst	struct slist *s;
189617683Spst{
189717683Spst	int n = 0;
189817683Spst
189917683Spst	for (; s; s = s->next)
190017683Spst		if (s->s.code != NOP)
190117683Spst			++n;
190217683Spst	return n;
190317683Spst}
190417683Spst
190517683Spst/*
190617683Spst * Return the number of nodes reachable by 'p'.
190717683Spst * All nodes should be initially unmarked.
190817683Spst */
190917683Spststatic int
191017683Spstcount_blocks(p)
191117683Spst	struct block *p;
191217683Spst{
191317683Spst	if (p == 0 || isMarked(p))
191417683Spst		return 0;
191517683Spst	Mark(p);
191617683Spst	return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
191717683Spst}
191817683Spst
191917683Spst/*
192017683Spst * Do a depth first search on the flow graph, numbering the
192117683Spst * the basic blocks, and entering them into the 'blocks' array.`
192217683Spst */
192317683Spststatic void
192417683Spstnumber_blks_r(p)
192517683Spst	struct block *p;
192617683Spst{
192717683Spst	int n;
192817683Spst
192917683Spst	if (p == 0 || isMarked(p))
193017683Spst		return;
193117683Spst
193217683Spst	Mark(p);
193317683Spst	n = n_blocks++;
193417683Spst	p->id = n;
193517683Spst	blocks[n] = p;
193617683Spst
193717683Spst	number_blks_r(JT(p));
193817683Spst	number_blks_r(JF(p));
193917683Spst}
194017683Spst
194117683Spst/*
194217683Spst * Return the number of stmts in the flowgraph reachable by 'p'.
194317683Spst * The nodes should be unmarked before calling.
194475107Sfenner *
194575107Sfenner * Note that "stmts" means "instructions", and that this includes
194675107Sfenner *
194775107Sfenner *	side-effect statements in 'p' (slength(p->stmts));
194875107Sfenner *
194975107Sfenner *	statements in the true branch from 'p' (count_stmts(JT(p)));
195075107Sfenner *
195175107Sfenner *	statements in the false branch from 'p' (count_stmts(JF(p)));
195275107Sfenner *
195375107Sfenner *	the conditional jump itself (1);
195475107Sfenner *
195575107Sfenner *	an extra long jump if the true branch requires it (p->longjt);
195675107Sfenner *
195775107Sfenner *	an extra long jump if the false branch requires it (p->longjf).
195817683Spst */
195917683Spststatic int
196017683Spstcount_stmts(p)
196117683Spst	struct block *p;
196217683Spst{
196317683Spst	int n;
196417683Spst
196517683Spst	if (p == 0 || isMarked(p))
196617683Spst		return 0;
196717683Spst	Mark(p);
196817683Spst	n = count_stmts(JT(p)) + count_stmts(JF(p));
196975107Sfenner	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
197017683Spst}
197117683Spst
197217683Spst/*
197317683Spst * Allocate memory.  All allocation is done before optimization
197417683Spst * is begun.  A linear bound on the size of all data structures is computed
197517683Spst * from the total number of blocks and/or statements.
197617683Spst */
197717683Spststatic void
197817683Spstopt_init(root)
197917683Spst	struct block *root;
198017683Spst{
198117683Spst	bpf_u_int32 *p;
198217683Spst	int i, n, max_stmts;
198317683Spst
198417683Spst	/*
198517683Spst	 * First, count the blocks, so we can malloc an array to map
198617683Spst	 * block number to block.  Then, put the blocks into the array.
198717683Spst	 */
198817683Spst	unMarkAll();
198917683Spst	n = count_blocks(root);
1990172677Smlaier	blocks = (struct block **)calloc(n, sizeof(*blocks));
1991127664Sbms	if (blocks == NULL)
1992127664Sbms		bpf_error("malloc");
199317683Spst	unMarkAll();
199417683Spst	n_blocks = 0;
199517683Spst	number_blks_r(root);
199617683Spst
199717683Spst	n_edges = 2 * n_blocks;
1998172677Smlaier	edges = (struct edge **)calloc(n_edges, sizeof(*edges));
1999127664Sbms	if (edges == NULL)
2000127664Sbms		bpf_error("malloc");
200117683Spst
200217683Spst	/*
200317683Spst	 * The number of levels is bounded by the number of nodes.
200417683Spst	 */
2005172677Smlaier	levels = (struct block **)calloc(n_blocks, sizeof(*levels));
2006127664Sbms	if (levels == NULL)
2007127664Sbms		bpf_error("malloc");
200817683Spst
200917683Spst	edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
201017683Spst	nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
201117683Spst
201217683Spst	/* XXX */
201317683Spst	space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
201417683Spst				 + n_edges * edgewords * sizeof(*space));
2015127664Sbms	if (space == NULL)
2016127664Sbms		bpf_error("malloc");
201717683Spst	p = space;
201817683Spst	all_dom_sets = p;
201917683Spst	for (i = 0; i < n; ++i) {
202017683Spst		blocks[i]->dom = p;
202117683Spst		p += nodewords;
202217683Spst	}
202317683Spst	all_closure_sets = p;
202417683Spst	for (i = 0; i < n; ++i) {
202517683Spst		blocks[i]->closure = p;
202617683Spst		p += nodewords;
202717683Spst	}
202817683Spst	all_edge_sets = p;
202917683Spst	for (i = 0; i < n; ++i) {
203017683Spst		register struct block *b = blocks[i];
203117683Spst
203217683Spst		b->et.edom = p;
203317683Spst		p += edgewords;
203417683Spst		b->ef.edom = p;
203517683Spst		p += edgewords;
203617683Spst		b->et.id = i;
203717683Spst		edges[i] = &b->et;
203817683Spst		b->ef.id = n_blocks + i;
203917683Spst		edges[n_blocks + i] = &b->ef;
204017683Spst		b->et.pred = b;
204117683Spst		b->ef.pred = b;
204217683Spst	}
204317683Spst	max_stmts = 0;
204417683Spst	for (i = 0; i < n; ++i)
204517683Spst		max_stmts += slength(blocks[i]->stmts) + 1;
204617683Spst	/*
204717683Spst	 * We allocate at most 3 value numbers per statement,
204817683Spst	 * so this is an upper bound on the number of valnodes
204917683Spst	 * we'll need.
205017683Spst	 */
205117683Spst	maxval = 3 * max_stmts;
2052172677Smlaier	vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap));
2053172677Smlaier	vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base));
2054127664Sbms	if (vmap == NULL || vnode_base == NULL)
2055127664Sbms		bpf_error("malloc");
205617683Spst}
205717683Spst
205817683Spst/*
205917683Spst * Some pointers used to convert the basic block form of the code,
206017683Spst * into the array form that BPF requires.  'fstart' will point to
206117683Spst * the malloc'd array while 'ftail' is used during the recursive traversal.
206217683Spst */
206317683Spststatic struct bpf_insn *fstart;
206417683Spststatic struct bpf_insn *ftail;
206517683Spst
206617683Spst#ifdef BDEBUG
206717683Spstint bids[1000];
206817683Spst#endif
206917683Spst
207017683Spst/*
207117683Spst * Returns true if successful.  Returns false if a branch has
207217683Spst * an offset that is too large.  If so, we have marked that
207317683Spst * branch so that on a subsequent iteration, it will be treated
207417683Spst * properly.
207517683Spst */
207617683Spststatic int
207717683Spstconvert_code_r(p)
207817683Spst	struct block *p;
207917683Spst{
208017683Spst	struct bpf_insn *dst;
208117683Spst	struct slist *src;
208217683Spst	int slen;
208317683Spst	u_int off;
208417683Spst	int extrajmps;		/* number of extra jumps inserted */
208556889Sfenner	struct slist **offset = NULL;
208617683Spst
208717683Spst	if (p == 0 || isMarked(p))
208817683Spst		return (1);
208917683Spst	Mark(p);
209017683Spst
209117683Spst	if (convert_code_r(JF(p)) == 0)
209217683Spst		return (0);
209317683Spst	if (convert_code_r(JT(p)) == 0)
209417683Spst		return (0);
209517683Spst
209617683Spst	slen = slength(p->stmts);
209717683Spst	dst = ftail -= (slen + 1 + p->longjt + p->longjf);
209817683Spst		/* inflate length by any extra jumps */
209917683Spst
210017683Spst	p->offset = dst - fstart;
210117683Spst
210256889Sfenner	/* generate offset[] for convenience  */
210356889Sfenner	if (slen) {
2104127664Sbms		offset = (struct slist **)calloc(slen, sizeof(struct slist *));
210556889Sfenner		if (!offset) {
210656889Sfenner			bpf_error("not enough core");
210756889Sfenner			/*NOTREACHED*/
210856889Sfenner		}
210956889Sfenner	}
211056889Sfenner	src = p->stmts;
211156889Sfenner	for (off = 0; off < slen && src; off++) {
211256889Sfenner#if 0
211356889Sfenner		printf("off=%d src=%x\n", off, src);
211456889Sfenner#endif
211556889Sfenner		offset[off] = src;
211656889Sfenner		src = src->next;
211756889Sfenner	}
211856889Sfenner
211956889Sfenner	off = 0;
212017683Spst	for (src = p->stmts; src; src = src->next) {
212117683Spst		if (src->s.code == NOP)
212217683Spst			continue;
212317683Spst		dst->code = (u_short)src->s.code;
212417683Spst		dst->k = src->s.k;
212556889Sfenner
212656889Sfenner		/* fill block-local relative jump */
212775107Sfenner		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
212856889Sfenner#if 0
212956889Sfenner			if (src->s.jt || src->s.jf) {
213056889Sfenner				bpf_error("illegal jmp destination");
213156889Sfenner				/*NOTREACHED*/
213256889Sfenner			}
213356889Sfenner#endif
213456889Sfenner			goto filled;
213556889Sfenner		}
213656889Sfenner		if (off == slen - 2)	/*???*/
213756889Sfenner			goto filled;
213856889Sfenner
213956889Sfenner	    {
214056889Sfenner		int i;
214156889Sfenner		int jt, jf;
2142172677Smlaier		const char *ljerr = "%s for block-local relative jump: off=%d";
214356889Sfenner
214456889Sfenner#if 0
214556889Sfenner		printf("code=%x off=%d %x %x\n", src->s.code,
214656889Sfenner			off, src->s.jt, src->s.jf);
214756889Sfenner#endif
214856889Sfenner
214956889Sfenner		if (!src->s.jt || !src->s.jf) {
215056889Sfenner			bpf_error(ljerr, "no jmp destination", off);
215156889Sfenner			/*NOTREACHED*/
215256889Sfenner		}
215356889Sfenner
215456889Sfenner		jt = jf = 0;
215556889Sfenner		for (i = 0; i < slen; i++) {
215656889Sfenner			if (offset[i] == src->s.jt) {
215756889Sfenner				if (jt) {
215856889Sfenner					bpf_error(ljerr, "multiple matches", off);
215956889Sfenner					/*NOTREACHED*/
216056889Sfenner				}
216156889Sfenner
216256889Sfenner				dst->jt = i - off - 1;
216356889Sfenner				jt++;
216456889Sfenner			}
216556889Sfenner			if (offset[i] == src->s.jf) {
216656889Sfenner				if (jf) {
216756889Sfenner					bpf_error(ljerr, "multiple matches", off);
216856889Sfenner					/*NOTREACHED*/
216956889Sfenner				}
217056889Sfenner				dst->jf = i - off - 1;
217156889Sfenner				jf++;
217256889Sfenner			}
217356889Sfenner		}
217456889Sfenner		if (!jt || !jf) {
217556889Sfenner			bpf_error(ljerr, "no destination found", off);
217656889Sfenner			/*NOTREACHED*/
217756889Sfenner		}
217856889Sfenner	    }
217956889Sfennerfilled:
218017683Spst		++dst;
218156889Sfenner		++off;
218217683Spst	}
218356889Sfenner	if (offset)
218456889Sfenner		free(offset);
218556889Sfenner
218617683Spst#ifdef BDEBUG
218717683Spst	bids[dst - fstart] = p->id + 1;
218817683Spst#endif
218917683Spst	dst->code = (u_short)p->s.code;
219017683Spst	dst->k = p->s.k;
219117683Spst	if (JT(p)) {
219217683Spst		extrajmps = 0;
219317683Spst		off = JT(p)->offset - (p->offset + slen) - 1;
219417683Spst		if (off >= 256) {
219517683Spst		    /* offset too large for branch, must add a jump */
219617683Spst		    if (p->longjt == 0) {
219717683Spst		    	/* mark this instruction and retry */
219817683Spst			p->longjt++;
219917683Spst			return(0);
220017683Spst		    }
220117683Spst		    /* branch if T to following jump */
220217683Spst		    dst->jt = extrajmps;
220317683Spst		    extrajmps++;
220417683Spst		    dst[extrajmps].code = BPF_JMP|BPF_JA;
220517683Spst		    dst[extrajmps].k = off - extrajmps;
220617683Spst		}
220717683Spst		else
220817683Spst		    dst->jt = off;
220917683Spst		off = JF(p)->offset - (p->offset + slen) - 1;
221017683Spst		if (off >= 256) {
221117683Spst		    /* offset too large for branch, must add a jump */
221217683Spst		    if (p->longjf == 0) {
221317683Spst		    	/* mark this instruction and retry */
221417683Spst			p->longjf++;
221517683Spst			return(0);
221617683Spst		    }
221717683Spst		    /* branch if F to following jump */
221817683Spst		    /* if two jumps are inserted, F goes to second one */
221917683Spst		    dst->jf = extrajmps;
222017683Spst		    extrajmps++;
222117683Spst		    dst[extrajmps].code = BPF_JMP|BPF_JA;
222217683Spst		    dst[extrajmps].k = off - extrajmps;
222317683Spst		}
222417683Spst		else
222517683Spst		    dst->jf = off;
222617683Spst	}
222717683Spst	return (1);
222817683Spst}
222917683Spst
223017683Spst
223117683Spst/*
223217683Spst * Convert flowgraph intermediate representation to the
223317683Spst * BPF array representation.  Set *lenp to the number of instructions.
2234172677Smlaier *
2235172677Smlaier * This routine does *NOT* leak the memory pointed to by fp.  It *must
2236172677Smlaier * not* do free(fp) before returning fp; doing so would make no sense,
2237172677Smlaier * as the BPF array pointed to by the return value of icode_to_fcode()
2238172677Smlaier * must be valid - it's being returned for use in a bpf_program structure.
2239172677Smlaier *
2240172677Smlaier * If it appears that icode_to_fcode() is leaking, the problem is that
2241172677Smlaier * the program using pcap_compile() is failing to free the memory in
2242172677Smlaier * the BPF program when it's done - the leak is in the program, not in
2243172677Smlaier * the routine that happens to be allocating the memory.  (By analogy, if
2244172677Smlaier * a program calls fopen() without ever calling fclose() on the FILE *,
2245172677Smlaier * it will leak the FILE structure; the leak is not in fopen(), it's in
2246172677Smlaier * the program.)  Change the program to use pcap_freecode() when it's
2247172677Smlaier * done with the filter program.  See the pcap man page.
224817683Spst */
224917683Spststruct bpf_insn *
225017683Spsticode_to_fcode(root, lenp)
225117683Spst	struct block *root;
225217683Spst	int *lenp;
225317683Spst{
225417683Spst	int n;
225517683Spst	struct bpf_insn *fp;
225617683Spst
225717683Spst	/*
225898530Sfenner	 * Loop doing convert_code_r() until no branches remain
225917683Spst	 * with too-large offsets.
226017683Spst	 */
226117683Spst	while (1) {
226217683Spst	    unMarkAll();
226317683Spst	    n = *lenp = count_stmts(root);
2264127664Sbms
226517683Spst	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2266127664Sbms	    if (fp == NULL)
2267127664Sbms		    bpf_error("malloc");
226817683Spst	    memset((char *)fp, 0, sizeof(*fp) * n);
226917683Spst	    fstart = fp;
227017683Spst	    ftail = fp + n;
2271127664Sbms
227217683Spst	    unMarkAll();
227317683Spst	    if (convert_code_r(root))
227417683Spst		break;
227517683Spst	    free(fp);
227617683Spst	}
227717683Spst
227817683Spst	return fp;
227917683Spst}
228017683Spst
228175107Sfenner/*
228275107Sfenner * Make a copy of a BPF program and put it in the "fcode" member of
228375107Sfenner * a "pcap_t".
228475107Sfenner *
228575107Sfenner * If we fail to allocate memory for the copy, fill in the "errbuf"
228675107Sfenner * member of the "pcap_t" with an error message, and return -1;
228775107Sfenner * otherwise, return 0.
228875107Sfenner */
228975107Sfennerint
229075107Sfennerinstall_bpf_program(pcap_t *p, struct bpf_program *fp)
229175107Sfenner{
229275107Sfenner	size_t prog_size;
229375107Sfenner
229475107Sfenner	/*
2295190225Srpaulo	 * Validate the program.
2296190225Srpaulo	 */
2297190225Srpaulo	if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2298190225Srpaulo		snprintf(p->errbuf, sizeof(p->errbuf),
2299190225Srpaulo			"BPF program is not valid");
2300190225Srpaulo		return (-1);
2301190225Srpaulo	}
2302190225Srpaulo
2303190225Srpaulo	/*
230475107Sfenner	 * Free up any already installed program.
230575107Sfenner	 */
230675107Sfenner	pcap_freecode(&p->fcode);
230775107Sfenner
230875107Sfenner	prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
230975107Sfenner	p->fcode.bf_len = fp->bf_len;
231075107Sfenner	p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
231175107Sfenner	if (p->fcode.bf_insns == NULL) {
231275107Sfenner		snprintf(p->errbuf, sizeof(p->errbuf),
231375107Sfenner			 "malloc: %s", pcap_strerror(errno));
231475107Sfenner		return (-1);
231575107Sfenner	}
231675107Sfenner	memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
231775107Sfenner	return (0);
231875107Sfenner}
231975107Sfenner
232017683Spst#ifdef BDEBUG
232117683Spststatic void
232217683Spstopt_dump(root)
232317683Spst	struct block *root;
232417683Spst{
232517683Spst	struct bpf_program f;
232617683Spst
232717683Spst	memset(bids, 0, sizeof bids);
232817683Spst	f.bf_insns = icode_to_fcode(root, &f.bf_len);
232917683Spst	bpf_dump(&f, 1);
233017683Spst	putchar('\n');
233117683Spst	free((char *)f.bf_insns);
233217683Spst}
233317683Spst#endif
2334