optimize.c revision 172677
117683Spst/*
217683Spst * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
317683Spst *	The Regents of the University of California.  All rights reserved.
417683Spst *
517683Spst * Redistribution and use in source and binary forms, with or without
617683Spst * modification, are permitted provided that: (1) source code distributions
717683Spst * retain the above copyright notice and this paragraph in its entirety, (2)
817683Spst * distributions including binary code include the above copyright notice and
917683Spst * this paragraph in its entirety in the documentation or other materials
1017683Spst * provided with the distribution, and (3) all advertising materials mentioning
1117683Spst * features or use of this software display the following acknowledgement:
1217683Spst * ``This product includes software developed by the University of California,
1317683Spst * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
1417683Spst * the University nor the names of its contributors may be used to endorse
1517683Spst * or promote products derived from this software without specific prior
1617683Spst * written permission.
1717683Spst * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
1817683Spst * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
1917683Spst * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
2017683Spst *
2117683Spst *  Optimization module for tcpdump intermediate representation.
2217683Spst */
2317683Spst#ifndef lint
24127664Sbmsstatic const char rcsid[] _U_ =
25172677Smlaier    "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.85.2.3 2007/09/12 21:29:45 guy Exp $ (LBL)";
2617683Spst#endif
2717683Spst
2875107Sfenner#ifdef HAVE_CONFIG_H
2975107Sfenner#include "config.h"
3075107Sfenner#endif
3175107Sfenner
3217683Spst#include <stdio.h>
3317683Spst#include <stdlib.h>
3417683Spst#include <memory.h>
35146768Ssam#include <string.h>
3617683Spst
3775107Sfenner#include <errno.h>
3875107Sfenner
3917683Spst#include "pcap-int.h"
4017683Spst
4117683Spst#include "gencode.h"
4217683Spst
4317683Spst#ifdef HAVE_OS_PROTO_H
4417683Spst#include "os-proto.h"
4517683Spst#endif
4617683Spst
4717683Spst#ifdef BDEBUG
4817683Spstextern int dflag;
4917683Spst#endif
5017683Spst
51146768Ssam#if defined(MSDOS) && !defined(__DJGPP__)
52146768Ssamextern int _w32_ffs (int mask);
53146768Ssam#define ffs _w32_ffs
54146768Ssam#endif
5517683Spst
56146768Ssam/*
57146768Ssam * Represents a deleted instruction.
58146768Ssam */
5917683Spst#define NOP -1
6017683Spst
6117683Spst/*
62146768Ssam * Register numbers for use-def values.
63146768Ssam * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
64146768Ssam * location.  A_ATOM is the accumulator and X_ATOM is the index
65146768Ssam * register.
66146768Ssam */
67146768Ssam#define A_ATOM BPF_MEMWORDS
68146768Ssam#define X_ATOM (BPF_MEMWORDS+1)
69146768Ssam
70146768Ssam/*
7117683Spst * This define is used to represent *both* the accumulator and
7217683Spst * x register in use-def computations.
7317683Spst * Currently, the use-def code assumes only one definition per instruction.
7417683Spst */
7517683Spst#define AX_ATOM N_ATOMS
7617683Spst
7717683Spst/*
7817683Spst * A flag to indicate that further optimization is needed.
7917683Spst * Iterative passes are continued until a given pass yields no
8017683Spst * branch movement.
8117683Spst */
8217683Spststatic int done;
8317683Spst
8417683Spst/*
8517683Spst * A block is marked if only if its mark equals the current mark.
8617683Spst * Rather than traverse the code array, marking each item, 'cur_mark' is
8717683Spst * incremented.  This automatically makes each element unmarked.
8817683Spst */
8917683Spststatic int cur_mark;
9017683Spst#define isMarked(p) ((p)->mark == cur_mark)
9117683Spst#define unMarkAll() cur_mark += 1
9217683Spst#define Mark(p) ((p)->mark = cur_mark)
9317683Spst
9417683Spststatic void opt_init(struct block *);
9517683Spststatic void opt_cleanup(void);
9617683Spst
9717683Spststatic void make_marks(struct block *);
9817683Spststatic void mark_code(struct block *);
9917683Spst
10017683Spststatic void intern_blocks(struct block *);
10117683Spst
10217683Spststatic int eq_slist(struct slist *, struct slist *);
10317683Spst
10417683Spststatic void find_levels_r(struct block *);
10517683Spst
10617683Spststatic void find_levels(struct block *);
10717683Spststatic void find_dom(struct block *);
10817683Spststatic void propedom(struct edge *);
10917683Spststatic void find_edom(struct block *);
11017683Spststatic void find_closure(struct block *);
11117683Spststatic int atomuse(struct stmt *);
11217683Spststatic int atomdef(struct stmt *);
11317683Spststatic void compute_local_ud(struct block *);
11417683Spststatic void find_ud(struct block *);
11517683Spststatic void init_val(void);
11617683Spststatic int F(int, int, int);
11717683Spststatic inline void vstore(struct stmt *, int *, int, int);
11817683Spststatic void opt_blk(struct block *, int);
11917683Spststatic int use_conflict(struct block *, struct block *);
12017683Spststatic void opt_j(struct edge *);
12117683Spststatic void or_pullup(struct block *);
12217683Spststatic void and_pullup(struct block *);
12317683Spststatic void opt_blks(struct block *, int);
12417683Spststatic inline void link_inedge(struct edge *, struct block *);
12517683Spststatic void find_inedges(struct block *);
12617683Spststatic void opt_root(struct block **);
12717683Spststatic void opt_loop(struct block *, int);
12817683Spststatic void fold_op(struct stmt *, int, int);
12917683Spststatic inline struct slist *this_op(struct slist *);
13017683Spststatic void opt_not(struct block *);
13117683Spststatic void opt_peep(struct block *);
13217683Spststatic void opt_stmt(struct stmt *, int[], int);
13317683Spststatic void deadstmt(struct stmt *, struct stmt *[]);
13417683Spststatic void opt_deadstores(struct block *);
13517683Spststatic struct block *fold_edge(struct block *, struct edge *);
13617683Spststatic inline int eq_blk(struct block *, struct block *);
13717683Spststatic int slength(struct slist *);
13817683Spststatic int count_blocks(struct block *);
13917683Spststatic void number_blks_r(struct block *);
14017683Spststatic int count_stmts(struct block *);
14117683Spststatic int convert_code_r(struct block *);
14217683Spst#ifdef BDEBUG
14317683Spststatic void opt_dump(struct block *);
14417683Spst#endif
14517683Spst
14617683Spststatic int n_blocks;
14717683Spststruct block **blocks;
14817683Spststatic int n_edges;
14917683Spststruct edge **edges;
15017683Spst
15117683Spst/*
15217683Spst * A bit vector set representation of the dominators.
15317683Spst * We round up the set size to the next power of two.
15417683Spst */
15517683Spststatic int nodewords;
15617683Spststatic int edgewords;
15717683Spststruct block **levels;
15817683Spstbpf_u_int32 *space;
15917683Spst#define BITS_PER_WORD (8*sizeof(bpf_u_int32))
16017683Spst/*
16117683Spst * True if a is in uset {p}
16217683Spst */
16317683Spst#define SET_MEMBER(p, a) \
16417683Spst((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
16517683Spst
16617683Spst/*
16717683Spst * Add 'a' to uset p.
16817683Spst */
16917683Spst#define SET_INSERT(p, a) \
17017683Spst(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
17117683Spst
17217683Spst/*
17317683Spst * Delete 'a' from uset p.
17417683Spst */
17517683Spst#define SET_DELETE(p, a) \
17617683Spst(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
17717683Spst
17817683Spst/*
17917683Spst * a := a intersect b
18017683Spst */
18117683Spst#define SET_INTERSECT(a, b, n)\
18217683Spst{\
18317683Spst	register bpf_u_int32 *_x = a, *_y = b;\
18417683Spst	register int _n = n;\
18517683Spst	while (--_n >= 0) *_x++ &= *_y++;\
18617683Spst}
18717683Spst
18817683Spst/*
18917683Spst * a := a - b
19017683Spst */
19117683Spst#define SET_SUBTRACT(a, b, n)\
19217683Spst{\
19317683Spst	register bpf_u_int32 *_x = a, *_y = b;\
19417683Spst	register int _n = n;\
19517683Spst	while (--_n >= 0) *_x++ &=~ *_y++;\
19617683Spst}
19717683Spst
19817683Spst/*
19917683Spst * a := a union b
20017683Spst */
20117683Spst#define SET_UNION(a, b, n)\
20217683Spst{\
20317683Spst	register bpf_u_int32 *_x = a, *_y = b;\
20417683Spst	register int _n = n;\
20517683Spst	while (--_n >= 0) *_x++ |= *_y++;\
20617683Spst}
20717683Spst
20817683Spststatic uset all_dom_sets;
20917683Spststatic uset all_closure_sets;
21017683Spststatic uset all_edge_sets;
21117683Spst
21217683Spst#ifndef MAX
21317683Spst#define MAX(a,b) ((a)>(b)?(a):(b))
21417683Spst#endif
21517683Spst
21617683Spststatic void
21717683Spstfind_levels_r(b)
21817683Spst	struct block *b;
21917683Spst{
22017683Spst	int level;
22117683Spst
22217683Spst	if (isMarked(b))
22317683Spst		return;
22417683Spst
22517683Spst	Mark(b);
22617683Spst	b->link = 0;
22717683Spst
22817683Spst	if (JT(b)) {
22917683Spst		find_levels_r(JT(b));
23017683Spst		find_levels_r(JF(b));
23117683Spst		level = MAX(JT(b)->level, JF(b)->level) + 1;
23217683Spst	} else
23317683Spst		level = 0;
23417683Spst	b->level = level;
23517683Spst	b->link = levels[level];
23617683Spst	levels[level] = b;
23717683Spst}
23817683Spst
23917683Spst/*
24017683Spst * Level graph.  The levels go from 0 at the leaves to
24117683Spst * N_LEVELS at the root.  The levels[] array points to the
24217683Spst * first node of the level list, whose elements are linked
24317683Spst * with the 'link' field of the struct block.
24417683Spst */
24517683Spststatic void
24617683Spstfind_levels(root)
24717683Spst	struct block *root;
24817683Spst{
24917683Spst	memset((char *)levels, 0, n_blocks * sizeof(*levels));
25017683Spst	unMarkAll();
25117683Spst	find_levels_r(root);
25217683Spst}
25317683Spst
25417683Spst/*
25517683Spst * Find dominator relationships.
25617683Spst * Assumes graph has been leveled.
25717683Spst */
25817683Spststatic void
25917683Spstfind_dom(root)
26017683Spst	struct block *root;
26117683Spst{
26217683Spst	int i;
26317683Spst	struct block *b;
26417683Spst	bpf_u_int32 *x;
26517683Spst
26617683Spst	/*
26717683Spst	 * Initialize sets to contain all nodes.
26817683Spst	 */
26917683Spst	x = all_dom_sets;
27017683Spst	i = n_blocks * nodewords;
27117683Spst	while (--i >= 0)
27217683Spst		*x++ = ~0;
27317683Spst	/* Root starts off empty. */
27417683Spst	for (i = nodewords; --i >= 0;)
27517683Spst		root->dom[i] = 0;
27617683Spst
27717683Spst	/* root->level is the highest level no found. */
27817683Spst	for (i = root->level; i >= 0; --i) {
27917683Spst		for (b = levels[i]; b; b = b->link) {
28017683Spst			SET_INSERT(b->dom, b->id);
28117683Spst			if (JT(b) == 0)
28217683Spst				continue;
28317683Spst			SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
28417683Spst			SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
28517683Spst		}
28617683Spst	}
28717683Spst}
28817683Spst
28917683Spststatic void
29017683Spstpropedom(ep)
29117683Spst	struct edge *ep;
29217683Spst{
29317683Spst	SET_INSERT(ep->edom, ep->id);
29417683Spst	if (ep->succ) {
29517683Spst		SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
29617683Spst		SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
29717683Spst	}
29817683Spst}
29917683Spst
30017683Spst/*
30117683Spst * Compute edge dominators.
30217683Spst * Assumes graph has been leveled and predecessors established.
30317683Spst */
30417683Spststatic void
30517683Spstfind_edom(root)
30617683Spst	struct block *root;
30717683Spst{
30817683Spst	int i;
30917683Spst	uset x;
31017683Spst	struct block *b;
31117683Spst
31217683Spst	x = all_edge_sets;
31317683Spst	for (i = n_edges * edgewords; --i >= 0; )
31417683Spst		x[i] = ~0;
31517683Spst
31617683Spst	/* root->level is the highest level no found. */
31717683Spst	memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
31817683Spst	memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
31917683Spst	for (i = root->level; i >= 0; --i) {
32017683Spst		for (b = levels[i]; b != 0; b = b->link) {
32117683Spst			propedom(&b->et);
32217683Spst			propedom(&b->ef);
32317683Spst		}
32417683Spst	}
32517683Spst}
32617683Spst
32717683Spst/*
32817683Spst * Find the backwards transitive closure of the flow graph.  These sets
32917683Spst * are backwards in the sense that we find the set of nodes that reach
33017683Spst * a given node, not the set of nodes that can be reached by a node.
33117683Spst *
33217683Spst * Assumes graph has been leveled.
33317683Spst */
33417683Spststatic void
33517683Spstfind_closure(root)
33617683Spst	struct block *root;
33717683Spst{
33817683Spst	int i;
33917683Spst	struct block *b;
34017683Spst
34117683Spst	/*
34217683Spst	 * Initialize sets to contain no nodes.
34317683Spst	 */
34417683Spst	memset((char *)all_closure_sets, 0,
34517683Spst	      n_blocks * nodewords * sizeof(*all_closure_sets));
34617683Spst
34717683Spst	/* root->level is the highest level no found. */
34817683Spst	for (i = root->level; i >= 0; --i) {
34917683Spst		for (b = levels[i]; b; b = b->link) {
35017683Spst			SET_INSERT(b->closure, b->id);
35117683Spst			if (JT(b) == 0)
35217683Spst				continue;
35317683Spst			SET_UNION(JT(b)->closure, b->closure, nodewords);
35417683Spst			SET_UNION(JF(b)->closure, b->closure, nodewords);
35517683Spst		}
35617683Spst	}
35717683Spst}
35817683Spst
35917683Spst/*
36017683Spst * Return the register number that is used by s.  If A and X are both
36117683Spst * used, return AX_ATOM.  If no register is used, return -1.
36217683Spst *
36317683Spst * The implementation should probably change to an array access.
36417683Spst */
36517683Spststatic int
36617683Spstatomuse(s)
36717683Spst	struct stmt *s;
36817683Spst{
36917683Spst	register int c = s->code;
37017683Spst
37117683Spst	if (c == NOP)
37217683Spst		return -1;
37317683Spst
37417683Spst	switch (BPF_CLASS(c)) {
37517683Spst
37617683Spst	case BPF_RET:
37717683Spst		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
37817683Spst			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
37917683Spst
38017683Spst	case BPF_LD:
38117683Spst	case BPF_LDX:
38217683Spst		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
38317683Spst			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
38417683Spst
38517683Spst	case BPF_ST:
38617683Spst		return A_ATOM;
38717683Spst
38817683Spst	case BPF_STX:
38917683Spst		return X_ATOM;
39017683Spst
39117683Spst	case BPF_JMP:
39217683Spst	case BPF_ALU:
39317683Spst		if (BPF_SRC(c) == BPF_X)
39417683Spst			return AX_ATOM;
39517683Spst		return A_ATOM;
39617683Spst
39717683Spst	case BPF_MISC:
39817683Spst		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
39917683Spst	}
40017683Spst	abort();
40117683Spst	/* NOTREACHED */
40217683Spst}
40317683Spst
40417683Spst/*
40517683Spst * Return the register number that is defined by 's'.  We assume that
40617683Spst * a single stmt cannot define more than one register.  If no register
40717683Spst * is defined, return -1.
40817683Spst *
40917683Spst * The implementation should probably change to an array access.
41017683Spst */
41117683Spststatic int
41217683Spstatomdef(s)
41317683Spst	struct stmt *s;
41417683Spst{
41517683Spst	if (s->code == NOP)
41617683Spst		return -1;
41717683Spst
41817683Spst	switch (BPF_CLASS(s->code)) {
41917683Spst
42017683Spst	case BPF_LD:
42117683Spst	case BPF_ALU:
42217683Spst		return A_ATOM;
42317683Spst
42417683Spst	case BPF_LDX:
42517683Spst		return X_ATOM;
42617683Spst
42717683Spst	case BPF_ST:
42817683Spst	case BPF_STX:
42917683Spst		return s->k;
43017683Spst
43117683Spst	case BPF_MISC:
43217683Spst		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
43317683Spst	}
43417683Spst	return -1;
43517683Spst}
43617683Spst
437146768Ssam/*
438146768Ssam * Compute the sets of registers used, defined, and killed by 'b'.
439146768Ssam *
440146768Ssam * "Used" means that a statement in 'b' uses the register before any
441146768Ssam * statement in 'b' defines it, i.e. it uses the value left in
442146768Ssam * that register by a predecessor block of this block.
443146768Ssam * "Defined" means that a statement in 'b' defines it.
444146768Ssam * "Killed" means that a statement in 'b' defines it before any
445146768Ssam * statement in 'b' uses it, i.e. it kills the value left in that
446146768Ssam * register by a predecessor block of this block.
447146768Ssam */
44817683Spststatic void
44917683Spstcompute_local_ud(b)
45017683Spst	struct block *b;
45117683Spst{
45217683Spst	struct slist *s;
45317683Spst	atomset def = 0, use = 0, kill = 0;
45417683Spst	int atom;
45517683Spst
45617683Spst	for (s = b->stmts; s; s = s->next) {
45717683Spst		if (s->s.code == NOP)
45817683Spst			continue;
45917683Spst		atom = atomuse(&s->s);
46017683Spst		if (atom >= 0) {
46117683Spst			if (atom == AX_ATOM) {
46217683Spst				if (!ATOMELEM(def, X_ATOM))
46317683Spst					use |= ATOMMASK(X_ATOM);
46417683Spst				if (!ATOMELEM(def, A_ATOM))
46517683Spst					use |= ATOMMASK(A_ATOM);
46617683Spst			}
46717683Spst			else if (atom < N_ATOMS) {
46817683Spst				if (!ATOMELEM(def, atom))
46917683Spst					use |= ATOMMASK(atom);
47017683Spst			}
47117683Spst			else
47217683Spst				abort();
47317683Spst		}
47417683Spst		atom = atomdef(&s->s);
47517683Spst		if (atom >= 0) {
47617683Spst			if (!ATOMELEM(use, atom))
47717683Spst				kill |= ATOMMASK(atom);
47817683Spst			def |= ATOMMASK(atom);
47917683Spst		}
48017683Spst	}
481146768Ssam	if (BPF_CLASS(b->s.code) == BPF_JMP) {
482146768Ssam		/*
483146768Ssam		 * XXX - what about RET?
484146768Ssam		 */
485146768Ssam		atom = atomuse(&b->s);
486146768Ssam		if (atom >= 0) {
487146768Ssam			if (atom == AX_ATOM) {
488146768Ssam				if (!ATOMELEM(def, X_ATOM))
489146768Ssam					use |= ATOMMASK(X_ATOM);
490146768Ssam				if (!ATOMELEM(def, A_ATOM))
491146768Ssam					use |= ATOMMASK(A_ATOM);
492146768Ssam			}
493146768Ssam			else if (atom < N_ATOMS) {
494146768Ssam				if (!ATOMELEM(def, atom))
495146768Ssam					use |= ATOMMASK(atom);
496146768Ssam			}
497146768Ssam			else
498146768Ssam				abort();
499146768Ssam		}
500146768Ssam	}
50117683Spst
50217683Spst	b->def = def;
50317683Spst	b->kill = kill;
50417683Spst	b->in_use = use;
50517683Spst}
50617683Spst
50717683Spst/*
50817683Spst * Assume graph is already leveled.
50917683Spst */
51017683Spststatic void
51117683Spstfind_ud(root)
51217683Spst	struct block *root;
51317683Spst{
51417683Spst	int i, maxlevel;
51517683Spst	struct block *p;
51617683Spst
51717683Spst	/*
51817683Spst	 * root->level is the highest level no found;
51917683Spst	 * count down from there.
52017683Spst	 */
52117683Spst	maxlevel = root->level;
52217683Spst	for (i = maxlevel; i >= 0; --i)
52317683Spst		for (p = levels[i]; p; p = p->link) {
52417683Spst			compute_local_ud(p);
52517683Spst			p->out_use = 0;
52617683Spst		}
52717683Spst
52817683Spst	for (i = 1; i <= maxlevel; ++i) {
52917683Spst		for (p = levels[i]; p; p = p->link) {
53017683Spst			p->out_use |= JT(p)->in_use | JF(p)->in_use;
53117683Spst			p->in_use |= p->out_use &~ p->kill;
53217683Spst		}
53317683Spst	}
53417683Spst}
53517683Spst
53617683Spst/*
53717683Spst * These data structures are used in a Cocke and Shwarz style
53817683Spst * value numbering scheme.  Since the flowgraph is acyclic,
53917683Spst * exit values can be propagated from a node's predecessors
54017683Spst * provided it is uniquely defined.
54117683Spst */
54217683Spststruct valnode {
54317683Spst	int code;
54417683Spst	int v0, v1;
54517683Spst	int val;
54617683Spst	struct valnode *next;
54717683Spst};
54817683Spst
54917683Spst#define MODULUS 213
55017683Spststatic struct valnode *hashtbl[MODULUS];
55117683Spststatic int curval;
55217683Spststatic int maxval;
55317683Spst
55417683Spst/* Integer constants mapped with the load immediate opcode. */
55517683Spst#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
55617683Spst
55717683Spststruct vmapinfo {
55817683Spst	int is_const;
55917683Spst	bpf_int32 const_val;
56017683Spst};
56117683Spst
56217683Spststruct vmapinfo *vmap;
56317683Spststruct valnode *vnode_base;
56417683Spststruct valnode *next_vnode;
56517683Spst
56617683Spststatic void
56717683Spstinit_val()
56817683Spst{
56917683Spst	curval = 0;
57017683Spst	next_vnode = vnode_base;
57117683Spst	memset((char *)vmap, 0, maxval * sizeof(*vmap));
57217683Spst	memset((char *)hashtbl, 0, sizeof hashtbl);
57317683Spst}
57417683Spst
57517683Spst/* Because we really don't have an IR, this stuff is a little messy. */
57617683Spststatic int
57717683SpstF(code, v0, v1)
57817683Spst	int code;
57917683Spst	int v0, v1;
58017683Spst{
58117683Spst	u_int hash;
58217683Spst	int val;
58317683Spst	struct valnode *p;
58417683Spst
58517683Spst	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
58617683Spst	hash %= MODULUS;
58717683Spst
58817683Spst	for (p = hashtbl[hash]; p; p = p->next)
58917683Spst		if (p->code == code && p->v0 == v0 && p->v1 == v1)
59017683Spst			return p->val;
59117683Spst
59217683Spst	val = ++curval;
59317683Spst	if (BPF_MODE(code) == BPF_IMM &&
59417683Spst	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
59517683Spst		vmap[val].const_val = v0;
59617683Spst		vmap[val].is_const = 1;
59717683Spst	}
59817683Spst	p = next_vnode++;
59917683Spst	p->val = val;
60017683Spst	p->code = code;
60117683Spst	p->v0 = v0;
60217683Spst	p->v1 = v1;
60317683Spst	p->next = hashtbl[hash];
60417683Spst	hashtbl[hash] = p;
60517683Spst
60617683Spst	return val;
60717683Spst}
60817683Spst
60917683Spststatic inline void
61017683Spstvstore(s, valp, newval, alter)
61117683Spst	struct stmt *s;
61217683Spst	int *valp;
61317683Spst	int newval;
61417683Spst	int alter;
61517683Spst{
61617683Spst	if (alter && *valp == newval)
61717683Spst		s->code = NOP;
61817683Spst	else
61917683Spst		*valp = newval;
62017683Spst}
62117683Spst
62217683Spststatic void
62317683Spstfold_op(s, v0, v1)
62417683Spst	struct stmt *s;
62517683Spst	int v0, v1;
62617683Spst{
627172677Smlaier	bpf_u_int32 a, b;
62817683Spst
62917683Spst	a = vmap[v0].const_val;
63017683Spst	b = vmap[v1].const_val;
63117683Spst
63217683Spst	switch (BPF_OP(s->code)) {
63317683Spst	case BPF_ADD:
63417683Spst		a += b;
63517683Spst		break;
63617683Spst
63717683Spst	case BPF_SUB:
63817683Spst		a -= b;
63917683Spst		break;
64017683Spst
64117683Spst	case BPF_MUL:
64217683Spst		a *= b;
64317683Spst		break;
64417683Spst
64517683Spst	case BPF_DIV:
64617683Spst		if (b == 0)
64717683Spst			bpf_error("division by zero");
64817683Spst		a /= b;
64917683Spst		break;
65017683Spst
65117683Spst	case BPF_AND:
65217683Spst		a &= b;
65317683Spst		break;
65417683Spst
65517683Spst	case BPF_OR:
65617683Spst		a |= b;
65717683Spst		break;
65817683Spst
65917683Spst	case BPF_LSH:
66017683Spst		a <<= b;
66117683Spst		break;
66217683Spst
66317683Spst	case BPF_RSH:
66417683Spst		a >>= b;
66517683Spst		break;
66617683Spst
66717683Spst	case BPF_NEG:
66817683Spst		a = -a;
66917683Spst		break;
67017683Spst
67117683Spst	default:
67217683Spst		abort();
67317683Spst	}
67417683Spst	s->k = a;
67517683Spst	s->code = BPF_LD|BPF_IMM;
67617683Spst	done = 0;
67717683Spst}
67817683Spst
67917683Spststatic inline struct slist *
68017683Spstthis_op(s)
68117683Spst	struct slist *s;
68217683Spst{
68317683Spst	while (s != 0 && s->s.code == NOP)
68417683Spst		s = s->next;
68517683Spst	return s;
68617683Spst}
68717683Spst
68817683Spststatic void
68917683Spstopt_not(b)
69017683Spst	struct block *b;
69117683Spst{
69217683Spst	struct block *tmp = JT(b);
69317683Spst
69417683Spst	JT(b) = JF(b);
69517683Spst	JF(b) = tmp;
69617683Spst}
69717683Spst
69817683Spststatic void
69917683Spstopt_peep(b)
70017683Spst	struct block *b;
70117683Spst{
70217683Spst	struct slist *s;
70317683Spst	struct slist *next, *last;
70417683Spst	int val;
70517683Spst
70617683Spst	s = b->stmts;
70717683Spst	if (s == 0)
70817683Spst		return;
70917683Spst
71017683Spst	last = s;
711146768Ssam	for (/*empty*/; /*empty*/; s = next) {
712146768Ssam		/*
713146768Ssam		 * Skip over nops.
714146768Ssam		 */
71517683Spst		s = this_op(s);
71617683Spst		if (s == 0)
717146768Ssam			break;	/* nothing left in the block */
718146768Ssam
719146768Ssam		/*
720146768Ssam		 * Find the next real instruction after that one
721146768Ssam		 * (skipping nops).
722146768Ssam		 */
72317683Spst		next = this_op(s->next);
72417683Spst		if (next == 0)
725146768Ssam			break;	/* no next instruction */
72617683Spst		last = next;
72717683Spst
72817683Spst		/*
72917683Spst		 * st  M[k]	-->	st  M[k]
73017683Spst		 * ldx M[k]		tax
73117683Spst		 */
73217683Spst		if (s->s.code == BPF_ST &&
73317683Spst		    next->s.code == (BPF_LDX|BPF_MEM) &&
73417683Spst		    s->s.k == next->s.k) {
73517683Spst			done = 0;
73617683Spst			next->s.code = BPF_MISC|BPF_TAX;
73717683Spst		}
73817683Spst		/*
73917683Spst		 * ld  #k	-->	ldx  #k
74017683Spst		 * tax			txa
74117683Spst		 */
74217683Spst		if (s->s.code == (BPF_LD|BPF_IMM) &&
74317683Spst		    next->s.code == (BPF_MISC|BPF_TAX)) {
74417683Spst			s->s.code = BPF_LDX|BPF_IMM;
74517683Spst			next->s.code = BPF_MISC|BPF_TXA;
74617683Spst			done = 0;
74717683Spst		}
74817683Spst		/*
74917683Spst		 * This is an ugly special case, but it happens
75017683Spst		 * when you say tcp[k] or udp[k] where k is a constant.
75117683Spst		 */
75217683Spst		if (s->s.code == (BPF_LD|BPF_IMM)) {
75317683Spst			struct slist *add, *tax, *ild;
75417683Spst
75517683Spst			/*
75617683Spst			 * Check that X isn't used on exit from this
75717683Spst			 * block (which the optimizer might cause).
75817683Spst			 * We know the code generator won't generate
75917683Spst			 * any local dependencies.
76017683Spst			 */
76117683Spst			if (ATOMELEM(b->out_use, X_ATOM))
762146768Ssam				continue;
76317683Spst
764146768Ssam			/*
765146768Ssam			 * Check that the instruction following the ldi
766146768Ssam			 * is an addx, or it's an ldxms with an addx
767146768Ssam			 * following it (with 0 or more nops between the
768146768Ssam			 * ldxms and addx).
769146768Ssam			 */
77017683Spst			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
77117683Spst				add = next;
77217683Spst			else
77317683Spst				add = this_op(next->next);
77417683Spst			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
775146768Ssam				continue;
77617683Spst
777146768Ssam			/*
778146768Ssam			 * Check that a tax follows that (with 0 or more
779146768Ssam			 * nops between them).
780146768Ssam			 */
78117683Spst			tax = this_op(add->next);
78217683Spst			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
783146768Ssam				continue;
78417683Spst
785146768Ssam			/*
786146768Ssam			 * Check that an ild follows that (with 0 or more
787146768Ssam			 * nops between them).
788146768Ssam			 */
78917683Spst			ild = this_op(tax->next);
79017683Spst			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
79117683Spst			    BPF_MODE(ild->s.code) != BPF_IND)
792146768Ssam				continue;
79317683Spst			/*
79417683Spst			 * We want to turn this sequence:
79517683Spst			 *
79617683Spst			 * (004) ldi     #0x2		{s}
79717683Spst			 * (005) ldxms   [14]		{next}  -- optional
79817683Spst			 * (006) addx			{add}
79917683Spst			 * (007) tax			{tax}
80017683Spst			 * (008) ild     [x+0]		{ild}
80117683Spst			 *
80217683Spst			 * into this sequence:
80317683Spst			 *
80417683Spst			 * (004) nop
80517683Spst			 * (005) ldxms   [14]
80617683Spst			 * (006) nop
80717683Spst			 * (007) nop
80817683Spst			 * (008) ild     [x+2]
80917683Spst			 *
810146768Ssam			 * XXX We need to check that X is not
811146768Ssam			 * subsequently used, because we want to change
812146768Ssam			 * what'll be in it after this sequence.
813146768Ssam			 *
814146768Ssam			 * We know we can eliminate the accumulator
815146768Ssam			 * modifications earlier in the sequence since
816146768Ssam			 * it is defined by the last stmt of this sequence
817146768Ssam			 * (i.e., the last statement of the sequence loads
818146768Ssam			 * a value into the accumulator, so we can eliminate
819146768Ssam			 * earlier operations on the accumulator).
82017683Spst			 */
82117683Spst			ild->s.k += s->s.k;
82217683Spst			s->s.code = NOP;
82317683Spst			add->s.code = NOP;
82417683Spst			tax->s.code = NOP;
82517683Spst			done = 0;
82617683Spst		}
82717683Spst	}
82817683Spst	/*
829146768Ssam	 * If the comparison at the end of a block is an equality
830146768Ssam	 * comparison against a constant, and nobody uses the value
831146768Ssam	 * we leave in the A register at the end of a block, and
832146768Ssam	 * the operation preceding the comparison is an arithmetic
833146768Ssam	 * operation, we can sometime optimize it away.
83417683Spst	 */
835146768Ssam	if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
836146768Ssam	    !ATOMELEM(b->out_use, A_ATOM)) {
837146768Ssam	    	/*
838146768Ssam	    	 * We can optimize away certain subtractions of the
839146768Ssam	    	 * X register.
840146768Ssam	    	 */
841146768Ssam		if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
842127664Sbms			val = b->val[X_ATOM];
843127664Sbms			if (vmap[val].is_const) {
844127664Sbms				/*
845146768Ssam				 * If we have a subtract to do a comparison,
846146768Ssam				 * and the X register is a known constant,
847146768Ssam				 * we can merge this value into the
848146768Ssam				 * comparison:
849146768Ssam				 *
850127664Sbms				 * sub x  ->	nop
851127664Sbms				 * jeq #y	jeq #(x+y)
852127664Sbms				 */
853127664Sbms				b->s.k += vmap[val].const_val;
854127664Sbms				last->s.code = NOP;
855127664Sbms				done = 0;
856127664Sbms			} else if (b->s.k == 0) {
857127664Sbms				/*
858146768Ssam				 * If the X register isn't a constant,
859146768Ssam				 * and the comparison in the test is
860146768Ssam				 * against 0, we can compare with the
861146768Ssam				 * X register, instead:
862146768Ssam				 *
863146768Ssam				 * sub x  ->	nop
864146768Ssam				 * jeq #0	jeq x
865127664Sbms				 */
866127664Sbms				last->s.code = NOP;
867146768Ssam				b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
868127664Sbms				done = 0;
869127664Sbms			}
870127664Sbms		}
871127664Sbms		/*
872146768Ssam		 * Likewise, a constant subtract can be simplified:
873146768Ssam		 *
874146768Ssam		 * sub #x ->	nop
875146768Ssam		 * jeq #y ->	jeq #(x+y)
876127664Sbms		 */
877146768Ssam		else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
87817683Spst			last->s.code = NOP;
879127664Sbms			b->s.k += last->s.k;
88017683Spst			done = 0;
88117683Spst		}
882146768Ssam		/*
883146768Ssam		 * And, similarly, a constant AND can be simplified
884146768Ssam		 * if we're testing against 0, i.e.:
885146768Ssam		 *
886146768Ssam		 * and #k	nop
887146768Ssam		 * jeq #0  ->	jset #k
888146768Ssam		 */
889146768Ssam		else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
890146768Ssam		    b->s.k == 0) {
891146768Ssam			b->s.k = last->s.k;
892146768Ssam			b->s.code = BPF_JMP|BPF_K|BPF_JSET;
893146768Ssam			last->s.code = NOP;
894146768Ssam			done = 0;
895146768Ssam			opt_not(b);
896146768Ssam		}
89717683Spst	}
89817683Spst	/*
899127664Sbms	 * jset #0        ->   never
900127664Sbms	 * jset #ffffffff ->   always
901127664Sbms	 */
902127664Sbms	if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
903127664Sbms		if (b->s.k == 0)
904127664Sbms			JT(b) = JF(b);
905127664Sbms		if (b->s.k == 0xffffffff)
906127664Sbms			JF(b) = JT(b);
907127664Sbms	}
908127664Sbms	/*
90917683Spst	 * If the accumulator is a known constant, we can compute the
91017683Spst	 * comparison result.
91117683Spst	 */
91217683Spst	val = b->val[A_ATOM];
91317683Spst	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
91417683Spst		bpf_int32 v = vmap[val].const_val;
91517683Spst		switch (BPF_OP(b->s.code)) {
91617683Spst
91717683Spst		case BPF_JEQ:
91817683Spst			v = v == b->s.k;
91917683Spst			break;
92017683Spst
92117683Spst		case BPF_JGT:
92217683Spst			v = (unsigned)v > b->s.k;
92317683Spst			break;
92417683Spst
92517683Spst		case BPF_JGE:
92617683Spst			v = (unsigned)v >= b->s.k;
92717683Spst			break;
92817683Spst
92917683Spst		case BPF_JSET:
93017683Spst			v &= b->s.k;
93117683Spst			break;
93217683Spst
93317683Spst		default:
93417683Spst			abort();
93517683Spst		}
93617683Spst		if (JF(b) != JT(b))
93717683Spst			done = 0;
93817683Spst		if (v)
93917683Spst			JF(b) = JT(b);
94017683Spst		else
94117683Spst			JT(b) = JF(b);
94217683Spst	}
94317683Spst}
94417683Spst
94517683Spst/*
94617683Spst * Compute the symbolic value of expression of 's', and update
94717683Spst * anything it defines in the value table 'val'.  If 'alter' is true,
94817683Spst * do various optimizations.  This code would be cleaner if symbolic
94917683Spst * evaluation and code transformations weren't folded together.
95017683Spst */
95117683Spststatic void
95217683Spstopt_stmt(s, val, alter)
95317683Spst	struct stmt *s;
95417683Spst	int val[];
95517683Spst	int alter;
95617683Spst{
95717683Spst	int op;
95817683Spst	int v;
95917683Spst
96017683Spst	switch (s->code) {
96117683Spst
96217683Spst	case BPF_LD|BPF_ABS|BPF_W:
96317683Spst	case BPF_LD|BPF_ABS|BPF_H:
96417683Spst	case BPF_LD|BPF_ABS|BPF_B:
96517683Spst		v = F(s->code, s->k, 0L);
96617683Spst		vstore(s, &val[A_ATOM], v, alter);
96717683Spst		break;
96817683Spst
96917683Spst	case BPF_LD|BPF_IND|BPF_W:
97017683Spst	case BPF_LD|BPF_IND|BPF_H:
97117683Spst	case BPF_LD|BPF_IND|BPF_B:
97217683Spst		v = val[X_ATOM];
97317683Spst		if (alter && vmap[v].is_const) {
97417683Spst			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
97517683Spst			s->k += vmap[v].const_val;
97617683Spst			v = F(s->code, s->k, 0L);
97717683Spst			done = 0;
97817683Spst		}
97917683Spst		else
98017683Spst			v = F(s->code, s->k, v);
98117683Spst		vstore(s, &val[A_ATOM], v, alter);
98217683Spst		break;
98317683Spst
98417683Spst	case BPF_LD|BPF_LEN:
98517683Spst		v = F(s->code, 0L, 0L);
98617683Spst		vstore(s, &val[A_ATOM], v, alter);
98717683Spst		break;
98817683Spst
98917683Spst	case BPF_LD|BPF_IMM:
99017683Spst		v = K(s->k);
99117683Spst		vstore(s, &val[A_ATOM], v, alter);
99217683Spst		break;
99317683Spst
99417683Spst	case BPF_LDX|BPF_IMM:
99517683Spst		v = K(s->k);
99617683Spst		vstore(s, &val[X_ATOM], v, alter);
99717683Spst		break;
99817683Spst
99917683Spst	case BPF_LDX|BPF_MSH|BPF_B:
100017683Spst		v = F(s->code, s->k, 0L);
100117683Spst		vstore(s, &val[X_ATOM], v, alter);
100217683Spst		break;
100317683Spst
100417683Spst	case BPF_ALU|BPF_NEG:
100517683Spst		if (alter && vmap[val[A_ATOM]].is_const) {
100617683Spst			s->code = BPF_LD|BPF_IMM;
100717683Spst			s->k = -vmap[val[A_ATOM]].const_val;
100817683Spst			val[A_ATOM] = K(s->k);
100917683Spst		}
101017683Spst		else
101117683Spst			val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
101217683Spst		break;
101317683Spst
101417683Spst	case BPF_ALU|BPF_ADD|BPF_K:
101517683Spst	case BPF_ALU|BPF_SUB|BPF_K:
101617683Spst	case BPF_ALU|BPF_MUL|BPF_K:
101717683Spst	case BPF_ALU|BPF_DIV|BPF_K:
101817683Spst	case BPF_ALU|BPF_AND|BPF_K:
101917683Spst	case BPF_ALU|BPF_OR|BPF_K:
102017683Spst	case BPF_ALU|BPF_LSH|BPF_K:
102117683Spst	case BPF_ALU|BPF_RSH|BPF_K:
102217683Spst		op = BPF_OP(s->code);
102317683Spst		if (alter) {
102417683Spst			if (s->k == 0) {
102598530Sfenner				/* don't optimize away "sub #0"
102698530Sfenner				 * as it may be needed later to
102798530Sfenner				 * fixup the generated math code */
102898530Sfenner				if (op == BPF_ADD ||
102917683Spst				    op == BPF_LSH || op == BPF_RSH ||
103017683Spst				    op == BPF_OR) {
103117683Spst					s->code = NOP;
103217683Spst					break;
103317683Spst				}
103417683Spst				if (op == BPF_MUL || op == BPF_AND) {
103517683Spst					s->code = BPF_LD|BPF_IMM;
103617683Spst					val[A_ATOM] = K(s->k);
103717683Spst					break;
103817683Spst				}
103917683Spst			}
104017683Spst			if (vmap[val[A_ATOM]].is_const) {
104117683Spst				fold_op(s, val[A_ATOM], K(s->k));
104217683Spst				val[A_ATOM] = K(s->k);
104317683Spst				break;
104417683Spst			}
104517683Spst		}
104617683Spst		val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
104717683Spst		break;
104817683Spst
104917683Spst	case BPF_ALU|BPF_ADD|BPF_X:
105017683Spst	case BPF_ALU|BPF_SUB|BPF_X:
105117683Spst	case BPF_ALU|BPF_MUL|BPF_X:
105217683Spst	case BPF_ALU|BPF_DIV|BPF_X:
105317683Spst	case BPF_ALU|BPF_AND|BPF_X:
105417683Spst	case BPF_ALU|BPF_OR|BPF_X:
105517683Spst	case BPF_ALU|BPF_LSH|BPF_X:
105617683Spst	case BPF_ALU|BPF_RSH|BPF_X:
105717683Spst		op = BPF_OP(s->code);
105817683Spst		if (alter && vmap[val[X_ATOM]].is_const) {
105917683Spst			if (vmap[val[A_ATOM]].is_const) {
106017683Spst				fold_op(s, val[A_ATOM], val[X_ATOM]);
106117683Spst				val[A_ATOM] = K(s->k);
106217683Spst			}
106317683Spst			else {
106417683Spst				s->code = BPF_ALU|BPF_K|op;
106517683Spst				s->k = vmap[val[X_ATOM]].const_val;
106617683Spst				done = 0;
106717683Spst				val[A_ATOM] =
106817683Spst					F(s->code, val[A_ATOM], K(s->k));
106917683Spst			}
107017683Spst			break;
107117683Spst		}
107217683Spst		/*
107317683Spst		 * Check if we're doing something to an accumulator
107417683Spst		 * that is 0, and simplify.  This may not seem like
107517683Spst		 * much of a simplification but it could open up further
107617683Spst		 * optimizations.
1077127664Sbms		 * XXX We could also check for mul by 1, etc.
107817683Spst		 */
107917683Spst		if (alter && vmap[val[A_ATOM]].is_const
108017683Spst		    && vmap[val[A_ATOM]].const_val == 0) {
1081127664Sbms			if (op == BPF_ADD || op == BPF_OR) {
108217683Spst				s->code = BPF_MISC|BPF_TXA;
108317683Spst				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
108417683Spst				break;
108517683Spst			}
108617683Spst			else if (op == BPF_MUL || op == BPF_DIV ||
1087127664Sbms				 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
108817683Spst				s->code = BPF_LD|BPF_IMM;
108917683Spst				s->k = 0;
109017683Spst				vstore(s, &val[A_ATOM], K(s->k), alter);
109117683Spst				break;
109217683Spst			}
109317683Spst			else if (op == BPF_NEG) {
109417683Spst				s->code = NOP;
109517683Spst				break;
109617683Spst			}
109717683Spst		}
109817683Spst		val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
109917683Spst		break;
110017683Spst
110117683Spst	case BPF_MISC|BPF_TXA:
110217683Spst		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
110317683Spst		break;
110417683Spst
110517683Spst	case BPF_LD|BPF_MEM:
110617683Spst		v = val[s->k];
110717683Spst		if (alter && vmap[v].is_const) {
110817683Spst			s->code = BPF_LD|BPF_IMM;
110917683Spst			s->k = vmap[v].const_val;
111017683Spst			done = 0;
111117683Spst		}
111217683Spst		vstore(s, &val[A_ATOM], v, alter);
111317683Spst		break;
111417683Spst
111517683Spst	case BPF_MISC|BPF_TAX:
111617683Spst		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
111717683Spst		break;
111817683Spst
111917683Spst	case BPF_LDX|BPF_MEM:
112017683Spst		v = val[s->k];
112117683Spst		if (alter && vmap[v].is_const) {
112217683Spst			s->code = BPF_LDX|BPF_IMM;
112317683Spst			s->k = vmap[v].const_val;
112417683Spst			done = 0;
112517683Spst		}
112617683Spst		vstore(s, &val[X_ATOM], v, alter);
112717683Spst		break;
112817683Spst
112917683Spst	case BPF_ST:
113017683Spst		vstore(s, &val[s->k], val[A_ATOM], alter);
113117683Spst		break;
113217683Spst
113317683Spst	case BPF_STX:
113417683Spst		vstore(s, &val[s->k], val[X_ATOM], alter);
113517683Spst		break;
113617683Spst	}
113717683Spst}
113817683Spst
113917683Spststatic void
114017683Spstdeadstmt(s, last)
114117683Spst	register struct stmt *s;
114217683Spst	register struct stmt *last[];
114317683Spst{
114417683Spst	register int atom;
114517683Spst
114617683Spst	atom = atomuse(s);
114717683Spst	if (atom >= 0) {
114817683Spst		if (atom == AX_ATOM) {
114917683Spst			last[X_ATOM] = 0;
115017683Spst			last[A_ATOM] = 0;
115117683Spst		}
115217683Spst		else
115317683Spst			last[atom] = 0;
115417683Spst	}
115517683Spst	atom = atomdef(s);
115617683Spst	if (atom >= 0) {
115717683Spst		if (last[atom]) {
115817683Spst			done = 0;
115917683Spst			last[atom]->code = NOP;
116017683Spst		}
116117683Spst		last[atom] = s;
116217683Spst	}
116317683Spst}
116417683Spst
116517683Spststatic void
116617683Spstopt_deadstores(b)
116717683Spst	register struct block *b;
116817683Spst{
116917683Spst	register struct slist *s;
117017683Spst	register int atom;
117117683Spst	struct stmt *last[N_ATOMS];
117217683Spst
117317683Spst	memset((char *)last, 0, sizeof last);
117417683Spst
117517683Spst	for (s = b->stmts; s != 0; s = s->next)
117617683Spst		deadstmt(&s->s, last);
117717683Spst	deadstmt(&b->s, last);
117817683Spst
117917683Spst	for (atom = 0; atom < N_ATOMS; ++atom)
118017683Spst		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
118117683Spst			last[atom]->code = NOP;
118217683Spst			done = 0;
118317683Spst		}
118417683Spst}
118517683Spst
118617683Spststatic void
118717683Spstopt_blk(b, do_stmts)
118817683Spst	struct block *b;
118917683Spst	int do_stmts;
119017683Spst{
119117683Spst	struct slist *s;
119217683Spst	struct edge *p;
119317683Spst	int i;
1194146768Ssam	bpf_int32 aval, xval;
119517683Spst
119656889Sfenner#if 0
119756889Sfenner	for (s = b->stmts; s && s->next; s = s->next)
119856889Sfenner		if (BPF_CLASS(s->s.code) == BPF_JMP) {
119956889Sfenner			do_stmts = 0;
120056889Sfenner			break;
120156889Sfenner		}
120256889Sfenner#endif
120356889Sfenner
120417683Spst	/*
120517683Spst	 * Initialize the atom values.
120617683Spst	 */
120717683Spst	p = b->in_edges;
1208146768Ssam	if (p == 0) {
1209146768Ssam		/*
1210146768Ssam		 * We have no predecessors, so everything is undefined
1211146768Ssam		 * upon entry to this block.
1212146768Ssam		 */
121317683Spst		memset((char *)b->val, 0, sizeof(b->val));
1214146768Ssam	} else {
1215146768Ssam		/*
1216146768Ssam		 * Inherit values from our predecessors.
1217146768Ssam		 *
1218146768Ssam		 * First, get the values from the predecessor along the
1219146768Ssam		 * first edge leading to this node.
1220146768Ssam		 */
122117683Spst		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1222146768Ssam		/*
1223146768Ssam		 * Now look at all the other nodes leading to this node.
1224146768Ssam		 * If, for the predecessor along that edge, a register
1225146768Ssam		 * has a different value from the one we have (i.e.,
1226146768Ssam		 * control paths are merging, and the merging paths
1227146768Ssam		 * assign different values to that register), give the
1228146768Ssam		 * register the undefined value of 0.
1229146768Ssam		 */
123017683Spst		while ((p = p->next) != NULL) {
123117683Spst			for (i = 0; i < N_ATOMS; ++i)
123217683Spst				if (b->val[i] != p->pred->val[i])
123317683Spst					b->val[i] = 0;
123417683Spst		}
123517683Spst	}
123617683Spst	aval = b->val[A_ATOM];
1237146768Ssam	xval = b->val[X_ATOM];
123817683Spst	for (s = b->stmts; s; s = s->next)
123917683Spst		opt_stmt(&s->s, b->val, do_stmts);
124017683Spst
124117683Spst	/*
124217683Spst	 * This is a special case: if we don't use anything from this
1243146768Ssam	 * block, and we load the accumulator or index register with a
1244146768Ssam	 * value that is already there, or if this block is a return,
124517683Spst	 * eliminate all the statements.
1246146768Ssam	 *
1247146768Ssam	 * XXX - what if it does a store?
1248146768Ssam	 *
1249146768Ssam	 * XXX - why does it matter whether we use anything from this
1250146768Ssam	 * block?  If the accumulator or index register doesn't change
1251146768Ssam	 * its value, isn't that OK even if we use that value?
1252146768Ssam	 *
1253146768Ssam	 * XXX - if we load the accumulator with a different value,
1254146768Ssam	 * and the block ends with a conditional branch, we obviously
1255146768Ssam	 * can't eliminate it, as the branch depends on that value.
1256146768Ssam	 * For the index register, the conditional branch only depends
1257146768Ssam	 * on the index register value if the test is against the index
1258146768Ssam	 * register value rather than a constant; if nothing uses the
1259146768Ssam	 * value we put into the index register, and we're not testing
1260146768Ssam	 * against the index register's value, and there aren't any
1261146768Ssam	 * other problems that would keep us from eliminating this
1262146768Ssam	 * block, can we eliminate it?
126317683Spst	 */
1264127664Sbms	if (do_stmts &&
1265146768Ssam	    ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1266146768Ssam	      xval != 0 && b->val[X_ATOM] == xval) ||
126717683Spst	     BPF_CLASS(b->s.code) == BPF_RET)) {
126817683Spst		if (b->stmts != 0) {
126917683Spst			b->stmts = 0;
127017683Spst			done = 0;
127117683Spst		}
127217683Spst	} else {
127317683Spst		opt_peep(b);
127417683Spst		opt_deadstores(b);
127517683Spst	}
127617683Spst	/*
127717683Spst	 * Set up values for branch optimizer.
127817683Spst	 */
127917683Spst	if (BPF_SRC(b->s.code) == BPF_K)
128017683Spst		b->oval = K(b->s.k);
128117683Spst	else
128217683Spst		b->oval = b->val[X_ATOM];
128317683Spst	b->et.code = b->s.code;
128417683Spst	b->ef.code = -b->s.code;
128517683Spst}
128617683Spst
128717683Spst/*
128817683Spst * Return true if any register that is used on exit from 'succ', has
128917683Spst * an exit value that is different from the corresponding exit value
129017683Spst * from 'b'.
129117683Spst */
129217683Spststatic int
129317683Spstuse_conflict(b, succ)
129417683Spst	struct block *b, *succ;
129517683Spst{
129617683Spst	int atom;
129717683Spst	atomset use = succ->out_use;
129817683Spst
129917683Spst	if (use == 0)
130017683Spst		return 0;
130117683Spst
130217683Spst	for (atom = 0; atom < N_ATOMS; ++atom)
130317683Spst		if (ATOMELEM(use, atom))
130417683Spst			if (b->val[atom] != succ->val[atom])
130517683Spst				return 1;
130617683Spst	return 0;
130717683Spst}
130817683Spst
130917683Spststatic struct block *
131017683Spstfold_edge(child, ep)
131117683Spst	struct block *child;
131217683Spst	struct edge *ep;
131317683Spst{
131417683Spst	int sense;
131517683Spst	int aval0, aval1, oval0, oval1;
131617683Spst	int code = ep->code;
131717683Spst
131817683Spst	if (code < 0) {
131917683Spst		code = -code;
132017683Spst		sense = 0;
132117683Spst	} else
132217683Spst		sense = 1;
132317683Spst
132417683Spst	if (child->s.code != code)
132517683Spst		return 0;
132617683Spst
132717683Spst	aval0 = child->val[A_ATOM];
132817683Spst	oval0 = child->oval;
132917683Spst	aval1 = ep->pred->val[A_ATOM];
133017683Spst	oval1 = ep->pred->oval;
133117683Spst
133217683Spst	if (aval0 != aval1)
133317683Spst		return 0;
133417683Spst
133517683Spst	if (oval0 == oval1)
133617683Spst		/*
1337146768Ssam		 * The operands of the branch instructions are
1338146768Ssam		 * identical, so the result is true if a true
1339146768Ssam		 * branch was taken to get here, otherwise false.
134017683Spst		 */
134117683Spst		return sense ? JT(child) : JF(child);
134217683Spst
134317683Spst	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
134417683Spst		/*
134517683Spst		 * At this point, we only know the comparison if we
134617683Spst		 * came down the true branch, and it was an equality
1347146768Ssam		 * comparison with a constant.
1348146768Ssam		 *
1349146768Ssam		 * I.e., if we came down the true branch, and the branch
1350146768Ssam		 * was an equality comparison with a constant, we know the
1351146768Ssam		 * accumulator contains that constant.  If we came down
1352146768Ssam		 * the false branch, or the comparison wasn't with a
1353146768Ssam		 * constant, we don't know what was in the accumulator.
1354146768Ssam		 *
1355146768Ssam		 * We rely on the fact that distinct constants have distinct
1356146768Ssam		 * value numbers.
135717683Spst		 */
135817683Spst		return JF(child);
135917683Spst
136017683Spst	return 0;
136117683Spst}
136217683Spst
136317683Spststatic void
136417683Spstopt_j(ep)
136517683Spst	struct edge *ep;
136617683Spst{
136717683Spst	register int i, k;
136817683Spst	register struct block *target;
136917683Spst
137017683Spst	if (JT(ep->succ) == 0)
137117683Spst		return;
137217683Spst
137317683Spst	if (JT(ep->succ) == JF(ep->succ)) {
137417683Spst		/*
137517683Spst		 * Common branch targets can be eliminated, provided
137617683Spst		 * there is no data dependency.
137717683Spst		 */
137817683Spst		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
137917683Spst			done = 0;
138017683Spst			ep->succ = JT(ep->succ);
138117683Spst		}
138217683Spst	}
138317683Spst	/*
138417683Spst	 * For each edge dominator that matches the successor of this
138517683Spst	 * edge, promote the edge successor to the its grandchild.
138617683Spst	 *
138717683Spst	 * XXX We violate the set abstraction here in favor a reasonably
138817683Spst	 * efficient loop.
138917683Spst	 */
139017683Spst top:
139117683Spst	for (i = 0; i < edgewords; ++i) {
139217683Spst		register bpf_u_int32 x = ep->edom[i];
139317683Spst
139417683Spst		while (x != 0) {
139517683Spst			k = ffs(x) - 1;
139617683Spst			x &=~ (1 << k);
139717683Spst			k += i * BITS_PER_WORD;
139817683Spst
139917683Spst			target = fold_edge(ep->succ, edges[k]);
140017683Spst			/*
140117683Spst			 * Check that there is no data dependency between
140217683Spst			 * nodes that will be violated if we move the edge.
140317683Spst			 */
140417683Spst			if (target != 0 && !use_conflict(ep->pred, target)) {
140517683Spst				done = 0;
140617683Spst				ep->succ = target;
140717683Spst				if (JT(target) != 0)
140817683Spst					/*
140917683Spst					 * Start over unless we hit a leaf.
141017683Spst					 */
141117683Spst					goto top;
141217683Spst				return;
141317683Spst			}
141417683Spst		}
141517683Spst	}
141617683Spst}
141717683Spst
141817683Spst
141917683Spststatic void
142017683Spstor_pullup(b)
142117683Spst	struct block *b;
142217683Spst{
142317683Spst	int val, at_top;
142417683Spst	struct block *pull;
142517683Spst	struct block **diffp, **samep;
142617683Spst	struct edge *ep;
142717683Spst
142817683Spst	ep = b->in_edges;
142917683Spst	if (ep == 0)
143017683Spst		return;
143117683Spst
143217683Spst	/*
143317683Spst	 * Make sure each predecessor loads the same value.
143417683Spst	 * XXX why?
143517683Spst	 */
143617683Spst	val = ep->pred->val[A_ATOM];
143717683Spst	for (ep = ep->next; ep != 0; ep = ep->next)
143817683Spst		if (val != ep->pred->val[A_ATOM])
143917683Spst			return;
144017683Spst
144117683Spst	if (JT(b->in_edges->pred) == b)
144217683Spst		diffp = &JT(b->in_edges->pred);
144317683Spst	else
144417683Spst		diffp = &JF(b->in_edges->pred);
144517683Spst
144617683Spst	at_top = 1;
144717683Spst	while (1) {
144817683Spst		if (*diffp == 0)
144917683Spst			return;
145017683Spst
145117683Spst		if (JT(*diffp) != JT(b))
145217683Spst			return;
145317683Spst
145417683Spst		if (!SET_MEMBER((*diffp)->dom, b->id))
145517683Spst			return;
145617683Spst
145717683Spst		if ((*diffp)->val[A_ATOM] != val)
145817683Spst			break;
145917683Spst
146017683Spst		diffp = &JF(*diffp);
146117683Spst		at_top = 0;
146217683Spst	}
146317683Spst	samep = &JF(*diffp);
146417683Spst	while (1) {
146517683Spst		if (*samep == 0)
146617683Spst			return;
146717683Spst
146817683Spst		if (JT(*samep) != JT(b))
146917683Spst			return;
147017683Spst
147117683Spst		if (!SET_MEMBER((*samep)->dom, b->id))
147217683Spst			return;
147317683Spst
147417683Spst		if ((*samep)->val[A_ATOM] == val)
147517683Spst			break;
147617683Spst
147717683Spst		/* XXX Need to check that there are no data dependencies
147817683Spst		   between dp0 and dp1.  Currently, the code generator
147917683Spst		   will not produce such dependencies. */
148017683Spst		samep = &JF(*samep);
148117683Spst	}
148217683Spst#ifdef notdef
148317683Spst	/* XXX This doesn't cover everything. */
148417683Spst	for (i = 0; i < N_ATOMS; ++i)
148517683Spst		if ((*samep)->val[i] != pred->val[i])
148617683Spst			return;
148717683Spst#endif
148817683Spst	/* Pull up the node. */
148917683Spst	pull = *samep;
149017683Spst	*samep = JF(pull);
149117683Spst	JF(pull) = *diffp;
149217683Spst
149317683Spst	/*
149417683Spst	 * At the top of the chain, each predecessor needs to point at the
149517683Spst	 * pulled up node.  Inside the chain, there is only one predecessor
149617683Spst	 * to worry about.
149717683Spst	 */
149817683Spst	if (at_top) {
149917683Spst		for (ep = b->in_edges; ep != 0; ep = ep->next) {
150017683Spst			if (JT(ep->pred) == b)
150117683Spst				JT(ep->pred) = pull;
150217683Spst			else
150317683Spst				JF(ep->pred) = pull;
150417683Spst		}
150517683Spst	}
150617683Spst	else
150717683Spst		*diffp = pull;
150817683Spst
150917683Spst	done = 0;
151017683Spst}
151117683Spst
151217683Spststatic void
151317683Spstand_pullup(b)
151417683Spst	struct block *b;
151517683Spst{
151617683Spst	int val, at_top;
151717683Spst	struct block *pull;
151817683Spst	struct block **diffp, **samep;
151917683Spst	struct edge *ep;
152017683Spst
152117683Spst	ep = b->in_edges;
152217683Spst	if (ep == 0)
152317683Spst		return;
152417683Spst
152517683Spst	/*
152617683Spst	 * Make sure each predecessor loads the same value.
152717683Spst	 */
152817683Spst	val = ep->pred->val[A_ATOM];
152917683Spst	for (ep = ep->next; ep != 0; ep = ep->next)
153017683Spst		if (val != ep->pred->val[A_ATOM])
153117683Spst			return;
153217683Spst
153317683Spst	if (JT(b->in_edges->pred) == b)
153417683Spst		diffp = &JT(b->in_edges->pred);
153517683Spst	else
153617683Spst		diffp = &JF(b->in_edges->pred);
153717683Spst
153817683Spst	at_top = 1;
153917683Spst	while (1) {
154017683Spst		if (*diffp == 0)
154117683Spst			return;
154217683Spst
154317683Spst		if (JF(*diffp) != JF(b))
154417683Spst			return;
154517683Spst
154617683Spst		if (!SET_MEMBER((*diffp)->dom, b->id))
154717683Spst			return;
154817683Spst
154917683Spst		if ((*diffp)->val[A_ATOM] != val)
155017683Spst			break;
155117683Spst
155217683Spst		diffp = &JT(*diffp);
155317683Spst		at_top = 0;
155417683Spst	}
155517683Spst	samep = &JT(*diffp);
155617683Spst	while (1) {
155717683Spst		if (*samep == 0)
155817683Spst			return;
155917683Spst
156017683Spst		if (JF(*samep) != JF(b))
156117683Spst			return;
156217683Spst
156317683Spst		if (!SET_MEMBER((*samep)->dom, b->id))
156417683Spst			return;
156517683Spst
156617683Spst		if ((*samep)->val[A_ATOM] == val)
156717683Spst			break;
156817683Spst
156917683Spst		/* XXX Need to check that there are no data dependencies
157017683Spst		   between diffp and samep.  Currently, the code generator
157117683Spst		   will not produce such dependencies. */
157217683Spst		samep = &JT(*samep);
157317683Spst	}
157417683Spst#ifdef notdef
157517683Spst	/* XXX This doesn't cover everything. */
157617683Spst	for (i = 0; i < N_ATOMS; ++i)
157717683Spst		if ((*samep)->val[i] != pred->val[i])
157817683Spst			return;
157917683Spst#endif
158017683Spst	/* Pull up the node. */
158117683Spst	pull = *samep;
158217683Spst	*samep = JT(pull);
158317683Spst	JT(pull) = *diffp;
158417683Spst
158517683Spst	/*
158617683Spst	 * At the top of the chain, each predecessor needs to point at the
158717683Spst	 * pulled up node.  Inside the chain, there is only one predecessor
158817683Spst	 * to worry about.
158917683Spst	 */
159017683Spst	if (at_top) {
159117683Spst		for (ep = b->in_edges; ep != 0; ep = ep->next) {
159217683Spst			if (JT(ep->pred) == b)
159317683Spst				JT(ep->pred) = pull;
159417683Spst			else
159517683Spst				JF(ep->pred) = pull;
159617683Spst		}
159717683Spst	}
159817683Spst	else
159917683Spst		*diffp = pull;
160017683Spst
160117683Spst	done = 0;
160217683Spst}
160317683Spst
160417683Spststatic void
160517683Spstopt_blks(root, do_stmts)
160617683Spst	struct block *root;
160717683Spst	int do_stmts;
160817683Spst{
160917683Spst	int i, maxlevel;
161017683Spst	struct block *p;
161117683Spst
161217683Spst	init_val();
161317683Spst	maxlevel = root->level;
161475107Sfenner
161575107Sfenner	find_inedges(root);
161617683Spst	for (i = maxlevel; i >= 0; --i)
161717683Spst		for (p = levels[i]; p; p = p->link)
161817683Spst			opt_blk(p, do_stmts);
161917683Spst
162017683Spst	if (do_stmts)
162117683Spst		/*
162217683Spst		 * No point trying to move branches; it can't possibly
162317683Spst		 * make a difference at this point.
162417683Spst		 */
162517683Spst		return;
162617683Spst
162717683Spst	for (i = 1; i <= maxlevel; ++i) {
162817683Spst		for (p = levels[i]; p; p = p->link) {
162917683Spst			opt_j(&p->et);
163017683Spst			opt_j(&p->ef);
163117683Spst		}
163217683Spst	}
163375107Sfenner
163475107Sfenner	find_inedges(root);
163517683Spst	for (i = 1; i <= maxlevel; ++i) {
163617683Spst		for (p = levels[i]; p; p = p->link) {
163717683Spst			or_pullup(p);
163817683Spst			and_pullup(p);
163917683Spst		}
164017683Spst	}
164117683Spst}
164217683Spst
164317683Spststatic inline void
164417683Spstlink_inedge(parent, child)
164517683Spst	struct edge *parent;
164617683Spst	struct block *child;
164717683Spst{
164817683Spst	parent->next = child->in_edges;
164917683Spst	child->in_edges = parent;
165017683Spst}
165117683Spst
165217683Spststatic void
165317683Spstfind_inedges(root)
165417683Spst	struct block *root;
165517683Spst{
165617683Spst	int i;
165717683Spst	struct block *b;
165817683Spst
165917683Spst	for (i = 0; i < n_blocks; ++i)
166017683Spst		blocks[i]->in_edges = 0;
166117683Spst
166217683Spst	/*
166317683Spst	 * Traverse the graph, adding each edge to the predecessor
166417683Spst	 * list of its successors.  Skip the leaves (i.e. level 0).
166517683Spst	 */
166617683Spst	for (i = root->level; i > 0; --i) {
166717683Spst		for (b = levels[i]; b != 0; b = b->link) {
166817683Spst			link_inedge(&b->et, JT(b));
166917683Spst			link_inedge(&b->ef, JF(b));
167017683Spst		}
167117683Spst	}
167217683Spst}
167317683Spst
167417683Spststatic void
167517683Spstopt_root(b)
167617683Spst	struct block **b;
167717683Spst{
167817683Spst	struct slist *tmp, *s;
167917683Spst
168017683Spst	s = (*b)->stmts;
168117683Spst	(*b)->stmts = 0;
168217683Spst	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
168317683Spst		*b = JT(*b);
168417683Spst
168517683Spst	tmp = (*b)->stmts;
168617683Spst	if (tmp != 0)
168717683Spst		sappend(s, tmp);
168817683Spst	(*b)->stmts = s;
168917683Spst
169017683Spst	/*
169117683Spst	 * If the root node is a return, then there is no
169217683Spst	 * point executing any statements (since the bpf machine
169317683Spst	 * has no side effects).
169417683Spst	 */
169517683Spst	if (BPF_CLASS((*b)->s.code) == BPF_RET)
169617683Spst		(*b)->stmts = 0;
169717683Spst}
169817683Spst
169917683Spststatic void
170017683Spstopt_loop(root, do_stmts)
170117683Spst	struct block *root;
170217683Spst	int do_stmts;
170317683Spst{
170417683Spst
170517683Spst#ifdef BDEBUG
170698530Sfenner	if (dflag > 1) {
170798530Sfenner		printf("opt_loop(root, %d) begin\n", do_stmts);
170817683Spst		opt_dump(root);
170998530Sfenner	}
171017683Spst#endif
171117683Spst	do {
171217683Spst		done = 1;
171317683Spst		find_levels(root);
171417683Spst		find_dom(root);
171517683Spst		find_closure(root);
171617683Spst		find_ud(root);
171717683Spst		find_edom(root);
171817683Spst		opt_blks(root, do_stmts);
171917683Spst#ifdef BDEBUG
172098530Sfenner		if (dflag > 1) {
172198530Sfenner			printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
172217683Spst			opt_dump(root);
172398530Sfenner		}
172417683Spst#endif
172517683Spst	} while (!done);
172617683Spst}
172717683Spst
172817683Spst/*
172917683Spst * Optimize the filter code in its dag representation.
173017683Spst */
173117683Spstvoid
173217683Spstbpf_optimize(rootp)
173317683Spst	struct block **rootp;
173417683Spst{
173517683Spst	struct block *root;
173617683Spst
173717683Spst	root = *rootp;
173817683Spst
173917683Spst	opt_init(root);
174017683Spst	opt_loop(root, 0);
174117683Spst	opt_loop(root, 1);
174217683Spst	intern_blocks(root);
174398530Sfenner#ifdef BDEBUG
174498530Sfenner	if (dflag > 1) {
174598530Sfenner		printf("after intern_blocks()\n");
174698530Sfenner		opt_dump(root);
174798530Sfenner	}
174898530Sfenner#endif
174917683Spst	opt_root(rootp);
175098530Sfenner#ifdef BDEBUG
175198530Sfenner	if (dflag > 1) {
175298530Sfenner		printf("after opt_root()\n");
175398530Sfenner		opt_dump(root);
175498530Sfenner	}
175598530Sfenner#endif
175617683Spst	opt_cleanup();
175717683Spst}
175817683Spst
175917683Spststatic void
176017683Spstmake_marks(p)
176117683Spst	struct block *p;
176217683Spst{
176317683Spst	if (!isMarked(p)) {
176417683Spst		Mark(p);
176517683Spst		if (BPF_CLASS(p->s.code) != BPF_RET) {
176617683Spst			make_marks(JT(p));
176717683Spst			make_marks(JF(p));
176817683Spst		}
176917683Spst	}
177017683Spst}
177117683Spst
177217683Spst/*
177317683Spst * Mark code array such that isMarked(i) is true
177417683Spst * only for nodes that are alive.
177517683Spst */
177617683Spststatic void
177717683Spstmark_code(p)
177817683Spst	struct block *p;
177917683Spst{
178017683Spst	cur_mark += 1;
178117683Spst	make_marks(p);
178217683Spst}
178317683Spst
178417683Spst/*
178517683Spst * True iff the two stmt lists load the same value from the packet into
178617683Spst * the accumulator.
178717683Spst */
178817683Spststatic int
178917683Spsteq_slist(x, y)
179017683Spst	struct slist *x, *y;
179117683Spst{
179217683Spst	while (1) {
179317683Spst		while (x && x->s.code == NOP)
179417683Spst			x = x->next;
179517683Spst		while (y && y->s.code == NOP)
179617683Spst			y = y->next;
179717683Spst		if (x == 0)
179817683Spst			return y == 0;
179917683Spst		if (y == 0)
180017683Spst			return x == 0;
180117683Spst		if (x->s.code != y->s.code || x->s.k != y->s.k)
180217683Spst			return 0;
180317683Spst		x = x->next;
180417683Spst		y = y->next;
180517683Spst	}
180617683Spst}
180717683Spst
180817683Spststatic inline int
180917683Spsteq_blk(b0, b1)
181017683Spst	struct block *b0, *b1;
181117683Spst{
181217683Spst	if (b0->s.code == b1->s.code &&
181317683Spst	    b0->s.k == b1->s.k &&
181417683Spst	    b0->et.succ == b1->et.succ &&
181517683Spst	    b0->ef.succ == b1->ef.succ)
181617683Spst		return eq_slist(b0->stmts, b1->stmts);
181717683Spst	return 0;
181817683Spst}
181917683Spst
182017683Spststatic void
182117683Spstintern_blocks(root)
182217683Spst	struct block *root;
182317683Spst{
182417683Spst	struct block *p;
182517683Spst	int i, j;
1826172677Smlaier	int done1; /* don't shadow global */
182717683Spst top:
1828172677Smlaier	done1 = 1;
182917683Spst	for (i = 0; i < n_blocks; ++i)
183017683Spst		blocks[i]->link = 0;
183117683Spst
183217683Spst	mark_code(root);
183317683Spst
183417683Spst	for (i = n_blocks - 1; --i >= 0; ) {
183517683Spst		if (!isMarked(blocks[i]))
183617683Spst			continue;
183717683Spst		for (j = i + 1; j < n_blocks; ++j) {
183817683Spst			if (!isMarked(blocks[j]))
183917683Spst				continue;
184017683Spst			if (eq_blk(blocks[i], blocks[j])) {
184117683Spst				blocks[i]->link = blocks[j]->link ?
184217683Spst					blocks[j]->link : blocks[j];
184317683Spst				break;
184417683Spst			}
184517683Spst		}
184617683Spst	}
184717683Spst	for (i = 0; i < n_blocks; ++i) {
184817683Spst		p = blocks[i];
184917683Spst		if (JT(p) == 0)
185017683Spst			continue;
185117683Spst		if (JT(p)->link) {
1852172677Smlaier			done1 = 0;
185317683Spst			JT(p) = JT(p)->link;
185417683Spst		}
185517683Spst		if (JF(p)->link) {
1856172677Smlaier			done1 = 0;
185717683Spst			JF(p) = JF(p)->link;
185817683Spst		}
185917683Spst	}
1860172677Smlaier	if (!done1)
186117683Spst		goto top;
186217683Spst}
186317683Spst
186417683Spststatic void
186517683Spstopt_cleanup()
186617683Spst{
186717683Spst	free((void *)vnode_base);
186817683Spst	free((void *)vmap);
186917683Spst	free((void *)edges);
187017683Spst	free((void *)space);
187117683Spst	free((void *)levels);
187217683Spst	free((void *)blocks);
187317683Spst}
187417683Spst
187517683Spst/*
187617683Spst * Return the number of stmts in 's'.
187717683Spst */
187817683Spststatic int
187917683Spstslength(s)
188017683Spst	struct slist *s;
188117683Spst{
188217683Spst	int n = 0;
188317683Spst
188417683Spst	for (; s; s = s->next)
188517683Spst		if (s->s.code != NOP)
188617683Spst			++n;
188717683Spst	return n;
188817683Spst}
188917683Spst
189017683Spst/*
189117683Spst * Return the number of nodes reachable by 'p'.
189217683Spst * All nodes should be initially unmarked.
189317683Spst */
189417683Spststatic int
189517683Spstcount_blocks(p)
189617683Spst	struct block *p;
189717683Spst{
189817683Spst	if (p == 0 || isMarked(p))
189917683Spst		return 0;
190017683Spst	Mark(p);
190117683Spst	return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
190217683Spst}
190317683Spst
190417683Spst/*
190517683Spst * Do a depth first search on the flow graph, numbering the
190617683Spst * the basic blocks, and entering them into the 'blocks' array.`
190717683Spst */
190817683Spststatic void
190917683Spstnumber_blks_r(p)
191017683Spst	struct block *p;
191117683Spst{
191217683Spst	int n;
191317683Spst
191417683Spst	if (p == 0 || isMarked(p))
191517683Spst		return;
191617683Spst
191717683Spst	Mark(p);
191817683Spst	n = n_blocks++;
191917683Spst	p->id = n;
192017683Spst	blocks[n] = p;
192117683Spst
192217683Spst	number_blks_r(JT(p));
192317683Spst	number_blks_r(JF(p));
192417683Spst}
192517683Spst
192617683Spst/*
192717683Spst * Return the number of stmts in the flowgraph reachable by 'p'.
192817683Spst * The nodes should be unmarked before calling.
192975107Sfenner *
193075107Sfenner * Note that "stmts" means "instructions", and that this includes
193175107Sfenner *
193275107Sfenner *	side-effect statements in 'p' (slength(p->stmts));
193375107Sfenner *
193475107Sfenner *	statements in the true branch from 'p' (count_stmts(JT(p)));
193575107Sfenner *
193675107Sfenner *	statements in the false branch from 'p' (count_stmts(JF(p)));
193775107Sfenner *
193875107Sfenner *	the conditional jump itself (1);
193975107Sfenner *
194075107Sfenner *	an extra long jump if the true branch requires it (p->longjt);
194175107Sfenner *
194275107Sfenner *	an extra long jump if the false branch requires it (p->longjf).
194317683Spst */
194417683Spststatic int
194517683Spstcount_stmts(p)
194617683Spst	struct block *p;
194717683Spst{
194817683Spst	int n;
194917683Spst
195017683Spst	if (p == 0 || isMarked(p))
195117683Spst		return 0;
195217683Spst	Mark(p);
195317683Spst	n = count_stmts(JT(p)) + count_stmts(JF(p));
195475107Sfenner	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
195517683Spst}
195617683Spst
195717683Spst/*
195817683Spst * Allocate memory.  All allocation is done before optimization
195917683Spst * is begun.  A linear bound on the size of all data structures is computed
196017683Spst * from the total number of blocks and/or statements.
196117683Spst */
196217683Spststatic void
196317683Spstopt_init(root)
196417683Spst	struct block *root;
196517683Spst{
196617683Spst	bpf_u_int32 *p;
196717683Spst	int i, n, max_stmts;
196817683Spst
196917683Spst	/*
197017683Spst	 * First, count the blocks, so we can malloc an array to map
197117683Spst	 * block number to block.  Then, put the blocks into the array.
197217683Spst	 */
197317683Spst	unMarkAll();
197417683Spst	n = count_blocks(root);
1975172677Smlaier	blocks = (struct block **)calloc(n, sizeof(*blocks));
1976127664Sbms	if (blocks == NULL)
1977127664Sbms		bpf_error("malloc");
197817683Spst	unMarkAll();
197917683Spst	n_blocks = 0;
198017683Spst	number_blks_r(root);
198117683Spst
198217683Spst	n_edges = 2 * n_blocks;
1983172677Smlaier	edges = (struct edge **)calloc(n_edges, sizeof(*edges));
1984127664Sbms	if (edges == NULL)
1985127664Sbms		bpf_error("malloc");
198617683Spst
198717683Spst	/*
198817683Spst	 * The number of levels is bounded by the number of nodes.
198917683Spst	 */
1990172677Smlaier	levels = (struct block **)calloc(n_blocks, sizeof(*levels));
1991127664Sbms	if (levels == NULL)
1992127664Sbms		bpf_error("malloc");
199317683Spst
199417683Spst	edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
199517683Spst	nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
199617683Spst
199717683Spst	/* XXX */
199817683Spst	space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
199917683Spst				 + n_edges * edgewords * sizeof(*space));
2000127664Sbms	if (space == NULL)
2001127664Sbms		bpf_error("malloc");
200217683Spst	p = space;
200317683Spst	all_dom_sets = p;
200417683Spst	for (i = 0; i < n; ++i) {
200517683Spst		blocks[i]->dom = p;
200617683Spst		p += nodewords;
200717683Spst	}
200817683Spst	all_closure_sets = p;
200917683Spst	for (i = 0; i < n; ++i) {
201017683Spst		blocks[i]->closure = p;
201117683Spst		p += nodewords;
201217683Spst	}
201317683Spst	all_edge_sets = p;
201417683Spst	for (i = 0; i < n; ++i) {
201517683Spst		register struct block *b = blocks[i];
201617683Spst
201717683Spst		b->et.edom = p;
201817683Spst		p += edgewords;
201917683Spst		b->ef.edom = p;
202017683Spst		p += edgewords;
202117683Spst		b->et.id = i;
202217683Spst		edges[i] = &b->et;
202317683Spst		b->ef.id = n_blocks + i;
202417683Spst		edges[n_blocks + i] = &b->ef;
202517683Spst		b->et.pred = b;
202617683Spst		b->ef.pred = b;
202717683Spst	}
202817683Spst	max_stmts = 0;
202917683Spst	for (i = 0; i < n; ++i)
203017683Spst		max_stmts += slength(blocks[i]->stmts) + 1;
203117683Spst	/*
203217683Spst	 * We allocate at most 3 value numbers per statement,
203317683Spst	 * so this is an upper bound on the number of valnodes
203417683Spst	 * we'll need.
203517683Spst	 */
203617683Spst	maxval = 3 * max_stmts;
2037172677Smlaier	vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap));
2038172677Smlaier	vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base));
2039127664Sbms	if (vmap == NULL || vnode_base == NULL)
2040127664Sbms		bpf_error("malloc");
204117683Spst}
204217683Spst
204317683Spst/*
204417683Spst * Some pointers used to convert the basic block form of the code,
204517683Spst * into the array form that BPF requires.  'fstart' will point to
204617683Spst * the malloc'd array while 'ftail' is used during the recursive traversal.
204717683Spst */
204817683Spststatic struct bpf_insn *fstart;
204917683Spststatic struct bpf_insn *ftail;
205017683Spst
205117683Spst#ifdef BDEBUG
205217683Spstint bids[1000];
205317683Spst#endif
205417683Spst
205517683Spst/*
205617683Spst * Returns true if successful.  Returns false if a branch has
205717683Spst * an offset that is too large.  If so, we have marked that
205817683Spst * branch so that on a subsequent iteration, it will be treated
205917683Spst * properly.
206017683Spst */
206117683Spststatic int
206217683Spstconvert_code_r(p)
206317683Spst	struct block *p;
206417683Spst{
206517683Spst	struct bpf_insn *dst;
206617683Spst	struct slist *src;
206717683Spst	int slen;
206817683Spst	u_int off;
206917683Spst	int extrajmps;		/* number of extra jumps inserted */
207056889Sfenner	struct slist **offset = NULL;
207117683Spst
207217683Spst	if (p == 0 || isMarked(p))
207317683Spst		return (1);
207417683Spst	Mark(p);
207517683Spst
207617683Spst	if (convert_code_r(JF(p)) == 0)
207717683Spst		return (0);
207817683Spst	if (convert_code_r(JT(p)) == 0)
207917683Spst		return (0);
208017683Spst
208117683Spst	slen = slength(p->stmts);
208217683Spst	dst = ftail -= (slen + 1 + p->longjt + p->longjf);
208317683Spst		/* inflate length by any extra jumps */
208417683Spst
208517683Spst	p->offset = dst - fstart;
208617683Spst
208756889Sfenner	/* generate offset[] for convenience  */
208856889Sfenner	if (slen) {
2089127664Sbms		offset = (struct slist **)calloc(slen, sizeof(struct slist *));
209056889Sfenner		if (!offset) {
209156889Sfenner			bpf_error("not enough core");
209256889Sfenner			/*NOTREACHED*/
209356889Sfenner		}
209456889Sfenner	}
209556889Sfenner	src = p->stmts;
209656889Sfenner	for (off = 0; off < slen && src; off++) {
209756889Sfenner#if 0
209856889Sfenner		printf("off=%d src=%x\n", off, src);
209956889Sfenner#endif
210056889Sfenner		offset[off] = src;
210156889Sfenner		src = src->next;
210256889Sfenner	}
210356889Sfenner
210456889Sfenner	off = 0;
210517683Spst	for (src = p->stmts; src; src = src->next) {
210617683Spst		if (src->s.code == NOP)
210717683Spst			continue;
210817683Spst		dst->code = (u_short)src->s.code;
210917683Spst		dst->k = src->s.k;
211056889Sfenner
211156889Sfenner		/* fill block-local relative jump */
211275107Sfenner		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
211356889Sfenner#if 0
211456889Sfenner			if (src->s.jt || src->s.jf) {
211556889Sfenner				bpf_error("illegal jmp destination");
211656889Sfenner				/*NOTREACHED*/
211756889Sfenner			}
211856889Sfenner#endif
211956889Sfenner			goto filled;
212056889Sfenner		}
212156889Sfenner		if (off == slen - 2)	/*???*/
212256889Sfenner			goto filled;
212356889Sfenner
212456889Sfenner	    {
212556889Sfenner		int i;
212656889Sfenner		int jt, jf;
2127172677Smlaier		const char *ljerr = "%s for block-local relative jump: off=%d";
212856889Sfenner
212956889Sfenner#if 0
213056889Sfenner		printf("code=%x off=%d %x %x\n", src->s.code,
213156889Sfenner			off, src->s.jt, src->s.jf);
213256889Sfenner#endif
213356889Sfenner
213456889Sfenner		if (!src->s.jt || !src->s.jf) {
213556889Sfenner			bpf_error(ljerr, "no jmp destination", off);
213656889Sfenner			/*NOTREACHED*/
213756889Sfenner		}
213856889Sfenner
213956889Sfenner		jt = jf = 0;
214056889Sfenner		for (i = 0; i < slen; i++) {
214156889Sfenner			if (offset[i] == src->s.jt) {
214256889Sfenner				if (jt) {
214356889Sfenner					bpf_error(ljerr, "multiple matches", off);
214456889Sfenner					/*NOTREACHED*/
214556889Sfenner				}
214656889Sfenner
214756889Sfenner				dst->jt = i - off - 1;
214856889Sfenner				jt++;
214956889Sfenner			}
215056889Sfenner			if (offset[i] == src->s.jf) {
215156889Sfenner				if (jf) {
215256889Sfenner					bpf_error(ljerr, "multiple matches", off);
215356889Sfenner					/*NOTREACHED*/
215456889Sfenner				}
215556889Sfenner				dst->jf = i - off - 1;
215656889Sfenner				jf++;
215756889Sfenner			}
215856889Sfenner		}
215956889Sfenner		if (!jt || !jf) {
216056889Sfenner			bpf_error(ljerr, "no destination found", off);
216156889Sfenner			/*NOTREACHED*/
216256889Sfenner		}
216356889Sfenner	    }
216456889Sfennerfilled:
216517683Spst		++dst;
216656889Sfenner		++off;
216717683Spst	}
216856889Sfenner	if (offset)
216956889Sfenner		free(offset);
217056889Sfenner
217117683Spst#ifdef BDEBUG
217217683Spst	bids[dst - fstart] = p->id + 1;
217317683Spst#endif
217417683Spst	dst->code = (u_short)p->s.code;
217517683Spst	dst->k = p->s.k;
217617683Spst	if (JT(p)) {
217717683Spst		extrajmps = 0;
217817683Spst		off = JT(p)->offset - (p->offset + slen) - 1;
217917683Spst		if (off >= 256) {
218017683Spst		    /* offset too large for branch, must add a jump */
218117683Spst		    if (p->longjt == 0) {
218217683Spst		    	/* mark this instruction and retry */
218317683Spst			p->longjt++;
218417683Spst			return(0);
218517683Spst		    }
218617683Spst		    /* branch if T to following jump */
218717683Spst		    dst->jt = extrajmps;
218817683Spst		    extrajmps++;
218917683Spst		    dst[extrajmps].code = BPF_JMP|BPF_JA;
219017683Spst		    dst[extrajmps].k = off - extrajmps;
219117683Spst		}
219217683Spst		else
219317683Spst		    dst->jt = off;
219417683Spst		off = JF(p)->offset - (p->offset + slen) - 1;
219517683Spst		if (off >= 256) {
219617683Spst		    /* offset too large for branch, must add a jump */
219717683Spst		    if (p->longjf == 0) {
219817683Spst		    	/* mark this instruction and retry */
219917683Spst			p->longjf++;
220017683Spst			return(0);
220117683Spst		    }
220217683Spst		    /* branch if F to following jump */
220317683Spst		    /* if two jumps are inserted, F goes to second one */
220417683Spst		    dst->jf = extrajmps;
220517683Spst		    extrajmps++;
220617683Spst		    dst[extrajmps].code = BPF_JMP|BPF_JA;
220717683Spst		    dst[extrajmps].k = off - extrajmps;
220817683Spst		}
220917683Spst		else
221017683Spst		    dst->jf = off;
221117683Spst	}
221217683Spst	return (1);
221317683Spst}
221417683Spst
221517683Spst
221617683Spst/*
221717683Spst * Convert flowgraph intermediate representation to the
221817683Spst * BPF array representation.  Set *lenp to the number of instructions.
2219172677Smlaier *
2220172677Smlaier * This routine does *NOT* leak the memory pointed to by fp.  It *must
2221172677Smlaier * not* do free(fp) before returning fp; doing so would make no sense,
2222172677Smlaier * as the BPF array pointed to by the return value of icode_to_fcode()
2223172677Smlaier * must be valid - it's being returned for use in a bpf_program structure.
2224172677Smlaier *
2225172677Smlaier * If it appears that icode_to_fcode() is leaking, the problem is that
2226172677Smlaier * the program using pcap_compile() is failing to free the memory in
2227172677Smlaier * the BPF program when it's done - the leak is in the program, not in
2228172677Smlaier * the routine that happens to be allocating the memory.  (By analogy, if
2229172677Smlaier * a program calls fopen() without ever calling fclose() on the FILE *,
2230172677Smlaier * it will leak the FILE structure; the leak is not in fopen(), it's in
2231172677Smlaier * the program.)  Change the program to use pcap_freecode() when it's
2232172677Smlaier * done with the filter program.  See the pcap man page.
223317683Spst */
223417683Spststruct bpf_insn *
223517683Spsticode_to_fcode(root, lenp)
223617683Spst	struct block *root;
223717683Spst	int *lenp;
223817683Spst{
223917683Spst	int n;
224017683Spst	struct bpf_insn *fp;
224117683Spst
224217683Spst	/*
224398530Sfenner	 * Loop doing convert_code_r() until no branches remain
224417683Spst	 * with too-large offsets.
224517683Spst	 */
224617683Spst	while (1) {
224717683Spst	    unMarkAll();
224817683Spst	    n = *lenp = count_stmts(root);
2249127664Sbms
225017683Spst	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2251127664Sbms	    if (fp == NULL)
2252127664Sbms		    bpf_error("malloc");
225317683Spst	    memset((char *)fp, 0, sizeof(*fp) * n);
225417683Spst	    fstart = fp;
225517683Spst	    ftail = fp + n;
2256127664Sbms
225717683Spst	    unMarkAll();
225817683Spst	    if (convert_code_r(root))
225917683Spst		break;
226017683Spst	    free(fp);
226117683Spst	}
226217683Spst
226317683Spst	return fp;
226417683Spst}
226517683Spst
226675107Sfenner/*
226775107Sfenner * Make a copy of a BPF program and put it in the "fcode" member of
226875107Sfenner * a "pcap_t".
226975107Sfenner *
227075107Sfenner * If we fail to allocate memory for the copy, fill in the "errbuf"
227175107Sfenner * member of the "pcap_t" with an error message, and return -1;
227275107Sfenner * otherwise, return 0.
227375107Sfenner */
227475107Sfennerint
227575107Sfennerinstall_bpf_program(pcap_t *p, struct bpf_program *fp)
227675107Sfenner{
227775107Sfenner	size_t prog_size;
227875107Sfenner
227975107Sfenner	/*
228075107Sfenner	 * Free up any already installed program.
228175107Sfenner	 */
228275107Sfenner	pcap_freecode(&p->fcode);
228375107Sfenner
228475107Sfenner	prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
228575107Sfenner	p->fcode.bf_len = fp->bf_len;
228675107Sfenner	p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
228775107Sfenner	if (p->fcode.bf_insns == NULL) {
228875107Sfenner		snprintf(p->errbuf, sizeof(p->errbuf),
228975107Sfenner			 "malloc: %s", pcap_strerror(errno));
229075107Sfenner		return (-1);
229175107Sfenner	}
229275107Sfenner	memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
229375107Sfenner	return (0);
229475107Sfenner}
229575107Sfenner
229617683Spst#ifdef BDEBUG
229717683Spststatic void
229817683Spstopt_dump(root)
229917683Spst	struct block *root;
230017683Spst{
230117683Spst	struct bpf_program f;
230217683Spst
230317683Spst	memset(bids, 0, sizeof bids);
230417683Spst	f.bf_insns = icode_to_fcode(root, &f.bf_len);
230517683Spst	bpf_dump(&f, 1);
230617683Spst	putchar('\n');
230717683Spst	free((char *)f.bf_insns);
230817683Spst}
230917683Spst#endif
2310