optimize.c revision 127664
117683Spst/*
217683Spst * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
317683Spst *	The Regents of the University of California.  All rights reserved.
417683Spst *
517683Spst * Redistribution and use in source and binary forms, with or without
617683Spst * modification, are permitted provided that: (1) source code distributions
717683Spst * retain the above copyright notice and this paragraph in its entirety, (2)
817683Spst * distributions including binary code include the above copyright notice and
917683Spst * this paragraph in its entirety in the documentation or other materials
1017683Spst * provided with the distribution, and (3) all advertising materials mentioning
1117683Spst * features or use of this software display the following acknowledgement:
1217683Spst * ``This product includes software developed by the University of California,
1317683Spst * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
1417683Spst * the University nor the names of its contributors may be used to endorse
1517683Spst * or promote products derived from this software without specific prior
1617683Spst * written permission.
1717683Spst * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
1817683Spst * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
1917683Spst * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
2017683Spst *
2117683Spst *  Optimization module for tcpdump intermediate representation.
2217683Spst */
2317683Spst#ifndef lint
24127664Sbmsstatic const char rcsid[] _U_ =
25127664Sbms    "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.76.2.3 2003/12/22 00:26:36 guy Exp $ (LBL)";
2617683Spst#endif
2717683Spst
2875107Sfenner#ifdef HAVE_CONFIG_H
2975107Sfenner#include "config.h"
3075107Sfenner#endif
3175107Sfenner
3217683Spst#include <stdio.h>
3317683Spst#include <stdlib.h>
3417683Spst#include <memory.h>
3517683Spst
3675107Sfenner#include <errno.h>
3775107Sfenner
3817683Spst#include "pcap-int.h"
3917683Spst
4017683Spst#include "gencode.h"
4117683Spst
4217683Spst#ifdef HAVE_OS_PROTO_H
4317683Spst#include "os-proto.h"
4417683Spst#endif
4517683Spst
4617683Spst#ifdef BDEBUG
4717683Spstextern int dflag;
4817683Spst#endif
4917683Spst
5017683Spst#define A_ATOM BPF_MEMWORDS
5117683Spst#define X_ATOM (BPF_MEMWORDS+1)
5217683Spst
5317683Spst#define NOP -1
5417683Spst
5517683Spst/*
5617683Spst * This define is used to represent *both* the accumulator and
5717683Spst * x register in use-def computations.
5817683Spst * Currently, the use-def code assumes only one definition per instruction.
5917683Spst */
6017683Spst#define AX_ATOM N_ATOMS
6117683Spst
6217683Spst/*
6317683Spst * A flag to indicate that further optimization is needed.
6417683Spst * Iterative passes are continued until a given pass yields no
6517683Spst * branch movement.
6617683Spst */
6717683Spststatic int done;
6817683Spst
6917683Spst/*
7017683Spst * A block is marked if only if its mark equals the current mark.
7117683Spst * Rather than traverse the code array, marking each item, 'cur_mark' is
7217683Spst * incremented.  This automatically makes each element unmarked.
7317683Spst */
7417683Spststatic int cur_mark;
7517683Spst#define isMarked(p) ((p)->mark == cur_mark)
7617683Spst#define unMarkAll() cur_mark += 1
7717683Spst#define Mark(p) ((p)->mark = cur_mark)
7817683Spst
7917683Spststatic void opt_init(struct block *);
8017683Spststatic void opt_cleanup(void);
8117683Spst
8217683Spststatic void make_marks(struct block *);
8317683Spststatic void mark_code(struct block *);
8417683Spst
8517683Spststatic void intern_blocks(struct block *);
8617683Spst
8717683Spststatic int eq_slist(struct slist *, struct slist *);
8817683Spst
8917683Spststatic void find_levels_r(struct block *);
9017683Spst
9117683Spststatic void find_levels(struct block *);
9217683Spststatic void find_dom(struct block *);
9317683Spststatic void propedom(struct edge *);
9417683Spststatic void find_edom(struct block *);
9517683Spststatic void find_closure(struct block *);
9617683Spststatic int atomuse(struct stmt *);
9717683Spststatic int atomdef(struct stmt *);
9817683Spststatic void compute_local_ud(struct block *);
9917683Spststatic void find_ud(struct block *);
10017683Spststatic void init_val(void);
10117683Spststatic int F(int, int, int);
10217683Spststatic inline void vstore(struct stmt *, int *, int, int);
10317683Spststatic void opt_blk(struct block *, int);
10417683Spststatic int use_conflict(struct block *, struct block *);
10517683Spststatic void opt_j(struct edge *);
10617683Spststatic void or_pullup(struct block *);
10717683Spststatic void and_pullup(struct block *);
10817683Spststatic void opt_blks(struct block *, int);
10917683Spststatic inline void link_inedge(struct edge *, struct block *);
11017683Spststatic void find_inedges(struct block *);
11117683Spststatic void opt_root(struct block **);
11217683Spststatic void opt_loop(struct block *, int);
11317683Spststatic void fold_op(struct stmt *, int, int);
11417683Spststatic inline struct slist *this_op(struct slist *);
11517683Spststatic void opt_not(struct block *);
11617683Spststatic void opt_peep(struct block *);
11717683Spststatic void opt_stmt(struct stmt *, int[], int);
11817683Spststatic void deadstmt(struct stmt *, struct stmt *[]);
11917683Spststatic void opt_deadstores(struct block *);
12017683Spststatic struct block *fold_edge(struct block *, struct edge *);
12117683Spststatic inline int eq_blk(struct block *, struct block *);
12217683Spststatic int slength(struct slist *);
12317683Spststatic int count_blocks(struct block *);
12417683Spststatic void number_blks_r(struct block *);
12517683Spststatic int count_stmts(struct block *);
12617683Spststatic int convert_code_r(struct block *);
12717683Spst#ifdef BDEBUG
12817683Spststatic void opt_dump(struct block *);
12917683Spst#endif
13017683Spst
13117683Spststatic int n_blocks;
13217683Spststruct block **blocks;
13317683Spststatic int n_edges;
13417683Spststruct edge **edges;
13517683Spst
13617683Spst/*
13717683Spst * A bit vector set representation of the dominators.
13817683Spst * We round up the set size to the next power of two.
13917683Spst */
14017683Spststatic int nodewords;
14117683Spststatic int edgewords;
14217683Spststruct block **levels;
14317683Spstbpf_u_int32 *space;
14417683Spst#define BITS_PER_WORD (8*sizeof(bpf_u_int32))
14517683Spst/*
14617683Spst * True if a is in uset {p}
14717683Spst */
14817683Spst#define SET_MEMBER(p, a) \
14917683Spst((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
15017683Spst
15117683Spst/*
15217683Spst * Add 'a' to uset p.
15317683Spst */
15417683Spst#define SET_INSERT(p, a) \
15517683Spst(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
15617683Spst
15717683Spst/*
15817683Spst * Delete 'a' from uset p.
15917683Spst */
16017683Spst#define SET_DELETE(p, a) \
16117683Spst(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
16217683Spst
16317683Spst/*
16417683Spst * a := a intersect b
16517683Spst */
16617683Spst#define SET_INTERSECT(a, b, n)\
16717683Spst{\
16817683Spst	register bpf_u_int32 *_x = a, *_y = b;\
16917683Spst	register int _n = n;\
17017683Spst	while (--_n >= 0) *_x++ &= *_y++;\
17117683Spst}
17217683Spst
17317683Spst/*
17417683Spst * a := a - b
17517683Spst */
17617683Spst#define SET_SUBTRACT(a, b, n)\
17717683Spst{\
17817683Spst	register bpf_u_int32 *_x = a, *_y = b;\
17917683Spst	register int _n = n;\
18017683Spst	while (--_n >= 0) *_x++ &=~ *_y++;\
18117683Spst}
18217683Spst
18317683Spst/*
18417683Spst * a := a union b
18517683Spst */
18617683Spst#define SET_UNION(a, b, n)\
18717683Spst{\
18817683Spst	register bpf_u_int32 *_x = a, *_y = b;\
18917683Spst	register int _n = n;\
19017683Spst	while (--_n >= 0) *_x++ |= *_y++;\
19117683Spst}
19217683Spst
19317683Spststatic uset all_dom_sets;
19417683Spststatic uset all_closure_sets;
19517683Spststatic uset all_edge_sets;
19617683Spst
19717683Spst#ifndef MAX
19817683Spst#define MAX(a,b) ((a)>(b)?(a):(b))
19917683Spst#endif
20017683Spst
20117683Spststatic void
20217683Spstfind_levels_r(b)
20317683Spst	struct block *b;
20417683Spst{
20517683Spst	int level;
20617683Spst
20717683Spst	if (isMarked(b))
20817683Spst		return;
20917683Spst
21017683Spst	Mark(b);
21117683Spst	b->link = 0;
21217683Spst
21317683Spst	if (JT(b)) {
21417683Spst		find_levels_r(JT(b));
21517683Spst		find_levels_r(JF(b));
21617683Spst		level = MAX(JT(b)->level, JF(b)->level) + 1;
21717683Spst	} else
21817683Spst		level = 0;
21917683Spst	b->level = level;
22017683Spst	b->link = levels[level];
22117683Spst	levels[level] = b;
22217683Spst}
22317683Spst
22417683Spst/*
22517683Spst * Level graph.  The levels go from 0 at the leaves to
22617683Spst * N_LEVELS at the root.  The levels[] array points to the
22717683Spst * first node of the level list, whose elements are linked
22817683Spst * with the 'link' field of the struct block.
22917683Spst */
23017683Spststatic void
23117683Spstfind_levels(root)
23217683Spst	struct block *root;
23317683Spst{
23417683Spst	memset((char *)levels, 0, n_blocks * sizeof(*levels));
23517683Spst	unMarkAll();
23617683Spst	find_levels_r(root);
23717683Spst}
23817683Spst
23917683Spst/*
24017683Spst * Find dominator relationships.
24117683Spst * Assumes graph has been leveled.
24217683Spst */
24317683Spststatic void
24417683Spstfind_dom(root)
24517683Spst	struct block *root;
24617683Spst{
24717683Spst	int i;
24817683Spst	struct block *b;
24917683Spst	bpf_u_int32 *x;
25017683Spst
25117683Spst	/*
25217683Spst	 * Initialize sets to contain all nodes.
25317683Spst	 */
25417683Spst	x = all_dom_sets;
25517683Spst	i = n_blocks * nodewords;
25617683Spst	while (--i >= 0)
25717683Spst		*x++ = ~0;
25817683Spst	/* Root starts off empty. */
25917683Spst	for (i = nodewords; --i >= 0;)
26017683Spst		root->dom[i] = 0;
26117683Spst
26217683Spst	/* root->level is the highest level no found. */
26317683Spst	for (i = root->level; i >= 0; --i) {
26417683Spst		for (b = levels[i]; b; b = b->link) {
26517683Spst			SET_INSERT(b->dom, b->id);
26617683Spst			if (JT(b) == 0)
26717683Spst				continue;
26817683Spst			SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
26917683Spst			SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
27017683Spst		}
27117683Spst	}
27217683Spst}
27317683Spst
27417683Spststatic void
27517683Spstpropedom(ep)
27617683Spst	struct edge *ep;
27717683Spst{
27817683Spst	SET_INSERT(ep->edom, ep->id);
27917683Spst	if (ep->succ) {
28017683Spst		SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
28117683Spst		SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
28217683Spst	}
28317683Spst}
28417683Spst
28517683Spst/*
28617683Spst * Compute edge dominators.
28717683Spst * Assumes graph has been leveled and predecessors established.
28817683Spst */
28917683Spststatic void
29017683Spstfind_edom(root)
29117683Spst	struct block *root;
29217683Spst{
29317683Spst	int i;
29417683Spst	uset x;
29517683Spst	struct block *b;
29617683Spst
29717683Spst	x = all_edge_sets;
29817683Spst	for (i = n_edges * edgewords; --i >= 0; )
29917683Spst		x[i] = ~0;
30017683Spst
30117683Spst	/* root->level is the highest level no found. */
30217683Spst	memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
30317683Spst	memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
30417683Spst	for (i = root->level; i >= 0; --i) {
30517683Spst		for (b = levels[i]; b != 0; b = b->link) {
30617683Spst			propedom(&b->et);
30717683Spst			propedom(&b->ef);
30817683Spst		}
30917683Spst	}
31017683Spst}
31117683Spst
31217683Spst/*
31317683Spst * Find the backwards transitive closure of the flow graph.  These sets
31417683Spst * are backwards in the sense that we find the set of nodes that reach
31517683Spst * a given node, not the set of nodes that can be reached by a node.
31617683Spst *
31717683Spst * Assumes graph has been leveled.
31817683Spst */
31917683Spststatic void
32017683Spstfind_closure(root)
32117683Spst	struct block *root;
32217683Spst{
32317683Spst	int i;
32417683Spst	struct block *b;
32517683Spst
32617683Spst	/*
32717683Spst	 * Initialize sets to contain no nodes.
32817683Spst	 */
32917683Spst	memset((char *)all_closure_sets, 0,
33017683Spst	      n_blocks * nodewords * sizeof(*all_closure_sets));
33117683Spst
33217683Spst	/* root->level is the highest level no found. */
33317683Spst	for (i = root->level; i >= 0; --i) {
33417683Spst		for (b = levels[i]; b; b = b->link) {
33517683Spst			SET_INSERT(b->closure, b->id);
33617683Spst			if (JT(b) == 0)
33717683Spst				continue;
33817683Spst			SET_UNION(JT(b)->closure, b->closure, nodewords);
33917683Spst			SET_UNION(JF(b)->closure, b->closure, nodewords);
34017683Spst		}
34117683Spst	}
34217683Spst}
34317683Spst
34417683Spst/*
34517683Spst * Return the register number that is used by s.  If A and X are both
34617683Spst * used, return AX_ATOM.  If no register is used, return -1.
34717683Spst *
34817683Spst * The implementation should probably change to an array access.
34917683Spst */
35017683Spststatic int
35117683Spstatomuse(s)
35217683Spst	struct stmt *s;
35317683Spst{
35417683Spst	register int c = s->code;
35517683Spst
35617683Spst	if (c == NOP)
35717683Spst		return -1;
35817683Spst
35917683Spst	switch (BPF_CLASS(c)) {
36017683Spst
36117683Spst	case BPF_RET:
36217683Spst		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
36317683Spst			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
36417683Spst
36517683Spst	case BPF_LD:
36617683Spst	case BPF_LDX:
36717683Spst		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
36817683Spst			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
36917683Spst
37017683Spst	case BPF_ST:
37117683Spst		return A_ATOM;
37217683Spst
37317683Spst	case BPF_STX:
37417683Spst		return X_ATOM;
37517683Spst
37617683Spst	case BPF_JMP:
37717683Spst	case BPF_ALU:
37817683Spst		if (BPF_SRC(c) == BPF_X)
37917683Spst			return AX_ATOM;
38017683Spst		return A_ATOM;
38117683Spst
38217683Spst	case BPF_MISC:
38317683Spst		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
38417683Spst	}
38517683Spst	abort();
38617683Spst	/* NOTREACHED */
38717683Spst}
38817683Spst
38917683Spst/*
39017683Spst * Return the register number that is defined by 's'.  We assume that
39117683Spst * a single stmt cannot define more than one register.  If no register
39217683Spst * is defined, return -1.
39317683Spst *
39417683Spst * The implementation should probably change to an array access.
39517683Spst */
39617683Spststatic int
39717683Spstatomdef(s)
39817683Spst	struct stmt *s;
39917683Spst{
40017683Spst	if (s->code == NOP)
40117683Spst		return -1;
40217683Spst
40317683Spst	switch (BPF_CLASS(s->code)) {
40417683Spst
40517683Spst	case BPF_LD:
40617683Spst	case BPF_ALU:
40717683Spst		return A_ATOM;
40817683Spst
40917683Spst	case BPF_LDX:
41017683Spst		return X_ATOM;
41117683Spst
41217683Spst	case BPF_ST:
41317683Spst	case BPF_STX:
41417683Spst		return s->k;
41517683Spst
41617683Spst	case BPF_MISC:
41717683Spst		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
41817683Spst	}
41917683Spst	return -1;
42017683Spst}
42117683Spst
42217683Spststatic void
42317683Spstcompute_local_ud(b)
42417683Spst	struct block *b;
42517683Spst{
42617683Spst	struct slist *s;
42717683Spst	atomset def = 0, use = 0, kill = 0;
42817683Spst	int atom;
42917683Spst
43017683Spst	for (s = b->stmts; s; s = s->next) {
43117683Spst		if (s->s.code == NOP)
43217683Spst			continue;
43317683Spst		atom = atomuse(&s->s);
43417683Spst		if (atom >= 0) {
43517683Spst			if (atom == AX_ATOM) {
43617683Spst				if (!ATOMELEM(def, X_ATOM))
43717683Spst					use |= ATOMMASK(X_ATOM);
43817683Spst				if (!ATOMELEM(def, A_ATOM))
43917683Spst					use |= ATOMMASK(A_ATOM);
44017683Spst			}
44117683Spst			else if (atom < N_ATOMS) {
44217683Spst				if (!ATOMELEM(def, atom))
44317683Spst					use |= ATOMMASK(atom);
44417683Spst			}
44517683Spst			else
44617683Spst				abort();
44717683Spst		}
44817683Spst		atom = atomdef(&s->s);
44917683Spst		if (atom >= 0) {
45017683Spst			if (!ATOMELEM(use, atom))
45117683Spst				kill |= ATOMMASK(atom);
45217683Spst			def |= ATOMMASK(atom);
45317683Spst		}
45417683Spst	}
45517683Spst	if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
45617683Spst		use |= ATOMMASK(A_ATOM);
45717683Spst
45817683Spst	b->def = def;
45917683Spst	b->kill = kill;
46017683Spst	b->in_use = use;
46117683Spst}
46217683Spst
46317683Spst/*
46417683Spst * Assume graph is already leveled.
46517683Spst */
46617683Spststatic void
46717683Spstfind_ud(root)
46817683Spst	struct block *root;
46917683Spst{
47017683Spst	int i, maxlevel;
47117683Spst	struct block *p;
47217683Spst
47317683Spst	/*
47417683Spst	 * root->level is the highest level no found;
47517683Spst	 * count down from there.
47617683Spst	 */
47717683Spst	maxlevel = root->level;
47817683Spst	for (i = maxlevel; i >= 0; --i)
47917683Spst		for (p = levels[i]; p; p = p->link) {
48017683Spst			compute_local_ud(p);
48117683Spst			p->out_use = 0;
48217683Spst		}
48317683Spst
48417683Spst	for (i = 1; i <= maxlevel; ++i) {
48517683Spst		for (p = levels[i]; p; p = p->link) {
48617683Spst			p->out_use |= JT(p)->in_use | JF(p)->in_use;
48717683Spst			p->in_use |= p->out_use &~ p->kill;
48817683Spst		}
48917683Spst	}
49017683Spst}
49117683Spst
49217683Spst/*
49317683Spst * These data structures are used in a Cocke and Shwarz style
49417683Spst * value numbering scheme.  Since the flowgraph is acyclic,
49517683Spst * exit values can be propagated from a node's predecessors
49617683Spst * provided it is uniquely defined.
49717683Spst */
49817683Spststruct valnode {
49917683Spst	int code;
50017683Spst	int v0, v1;
50117683Spst	int val;
50217683Spst	struct valnode *next;
50317683Spst};
50417683Spst
50517683Spst#define MODULUS 213
50617683Spststatic struct valnode *hashtbl[MODULUS];
50717683Spststatic int curval;
50817683Spststatic int maxval;
50917683Spst
51017683Spst/* Integer constants mapped with the load immediate opcode. */
51117683Spst#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
51217683Spst
51317683Spststruct vmapinfo {
51417683Spst	int is_const;
51517683Spst	bpf_int32 const_val;
51617683Spst};
51717683Spst
51817683Spststruct vmapinfo *vmap;
51917683Spststruct valnode *vnode_base;
52017683Spststruct valnode *next_vnode;
52117683Spst
52217683Spststatic void
52317683Spstinit_val()
52417683Spst{
52517683Spst	curval = 0;
52617683Spst	next_vnode = vnode_base;
52717683Spst	memset((char *)vmap, 0, maxval * sizeof(*vmap));
52817683Spst	memset((char *)hashtbl, 0, sizeof hashtbl);
52917683Spst}
53017683Spst
53117683Spst/* Because we really don't have an IR, this stuff is a little messy. */
53217683Spststatic int
53317683SpstF(code, v0, v1)
53417683Spst	int code;
53517683Spst	int v0, v1;
53617683Spst{
53717683Spst	u_int hash;
53817683Spst	int val;
53917683Spst	struct valnode *p;
54017683Spst
54117683Spst	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
54217683Spst	hash %= MODULUS;
54317683Spst
54417683Spst	for (p = hashtbl[hash]; p; p = p->next)
54517683Spst		if (p->code == code && p->v0 == v0 && p->v1 == v1)
54617683Spst			return p->val;
54717683Spst
54817683Spst	val = ++curval;
54917683Spst	if (BPF_MODE(code) == BPF_IMM &&
55017683Spst	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
55117683Spst		vmap[val].const_val = v0;
55217683Spst		vmap[val].is_const = 1;
55317683Spst	}
55417683Spst	p = next_vnode++;
55517683Spst	p->val = val;
55617683Spst	p->code = code;
55717683Spst	p->v0 = v0;
55817683Spst	p->v1 = v1;
55917683Spst	p->next = hashtbl[hash];
56017683Spst	hashtbl[hash] = p;
56117683Spst
56217683Spst	return val;
56317683Spst}
56417683Spst
56517683Spststatic inline void
56617683Spstvstore(s, valp, newval, alter)
56717683Spst	struct stmt *s;
56817683Spst	int *valp;
56917683Spst	int newval;
57017683Spst	int alter;
57117683Spst{
57217683Spst	if (alter && *valp == newval)
57317683Spst		s->code = NOP;
57417683Spst	else
57517683Spst		*valp = newval;
57617683Spst}
57717683Spst
57817683Spststatic void
57917683Spstfold_op(s, v0, v1)
58017683Spst	struct stmt *s;
58117683Spst	int v0, v1;
58217683Spst{
58317683Spst	bpf_int32 a, b;
58417683Spst
58517683Spst	a = vmap[v0].const_val;
58617683Spst	b = vmap[v1].const_val;
58717683Spst
58817683Spst	switch (BPF_OP(s->code)) {
58917683Spst	case BPF_ADD:
59017683Spst		a += b;
59117683Spst		break;
59217683Spst
59317683Spst	case BPF_SUB:
59417683Spst		a -= b;
59517683Spst		break;
59617683Spst
59717683Spst	case BPF_MUL:
59817683Spst		a *= b;
59917683Spst		break;
60017683Spst
60117683Spst	case BPF_DIV:
60217683Spst		if (b == 0)
60317683Spst			bpf_error("division by zero");
60417683Spst		a /= b;
60517683Spst		break;
60617683Spst
60717683Spst	case BPF_AND:
60817683Spst		a &= b;
60917683Spst		break;
61017683Spst
61117683Spst	case BPF_OR:
61217683Spst		a |= b;
61317683Spst		break;
61417683Spst
61517683Spst	case BPF_LSH:
61617683Spst		a <<= b;
61717683Spst		break;
61817683Spst
61917683Spst	case BPF_RSH:
62017683Spst		a >>= b;
62117683Spst		break;
62217683Spst
62317683Spst	case BPF_NEG:
62417683Spst		a = -a;
62517683Spst		break;
62617683Spst
62717683Spst	default:
62817683Spst		abort();
62917683Spst	}
63017683Spst	s->k = a;
63117683Spst	s->code = BPF_LD|BPF_IMM;
63217683Spst	done = 0;
63317683Spst}
63417683Spst
63517683Spststatic inline struct slist *
63617683Spstthis_op(s)
63717683Spst	struct slist *s;
63817683Spst{
63917683Spst	while (s != 0 && s->s.code == NOP)
64017683Spst		s = s->next;
64117683Spst	return s;
64217683Spst}
64317683Spst
64417683Spststatic void
64517683Spstopt_not(b)
64617683Spst	struct block *b;
64717683Spst{
64817683Spst	struct block *tmp = JT(b);
64917683Spst
65017683Spst	JT(b) = JF(b);
65117683Spst	JF(b) = tmp;
65217683Spst}
65317683Spst
65417683Spststatic void
65517683Spstopt_peep(b)
65617683Spst	struct block *b;
65717683Spst{
65817683Spst	struct slist *s;
65917683Spst	struct slist *next, *last;
66017683Spst	int val;
66117683Spst
66217683Spst	s = b->stmts;
66317683Spst	if (s == 0)
66417683Spst		return;
66517683Spst
66617683Spst	last = s;
66717683Spst	while (1) {
66817683Spst		s = this_op(s);
66917683Spst		if (s == 0)
67017683Spst			break;
67117683Spst		next = this_op(s->next);
67217683Spst		if (next == 0)
67317683Spst			break;
67417683Spst		last = next;
67517683Spst
67617683Spst		/*
67717683Spst		 * st  M[k]	-->	st  M[k]
67817683Spst		 * ldx M[k]		tax
67917683Spst		 */
68017683Spst		if (s->s.code == BPF_ST &&
68117683Spst		    next->s.code == (BPF_LDX|BPF_MEM) &&
68217683Spst		    s->s.k == next->s.k) {
68317683Spst			done = 0;
68417683Spst			next->s.code = BPF_MISC|BPF_TAX;
68517683Spst		}
68617683Spst		/*
68717683Spst		 * ld  #k	-->	ldx  #k
68817683Spst		 * tax			txa
68917683Spst		 */
69017683Spst		if (s->s.code == (BPF_LD|BPF_IMM) &&
69117683Spst		    next->s.code == (BPF_MISC|BPF_TAX)) {
69217683Spst			s->s.code = BPF_LDX|BPF_IMM;
69317683Spst			next->s.code = BPF_MISC|BPF_TXA;
69417683Spst			done = 0;
69517683Spst		}
69617683Spst		/*
69717683Spst		 * This is an ugly special case, but it happens
69817683Spst		 * when you say tcp[k] or udp[k] where k is a constant.
69917683Spst		 */
70017683Spst		if (s->s.code == (BPF_LD|BPF_IMM)) {
70117683Spst			struct slist *add, *tax, *ild;
70217683Spst
70317683Spst			/*
70417683Spst			 * Check that X isn't used on exit from this
70517683Spst			 * block (which the optimizer might cause).
70617683Spst			 * We know the code generator won't generate
70717683Spst			 * any local dependencies.
70817683Spst			 */
70917683Spst			if (ATOMELEM(b->out_use, X_ATOM))
71017683Spst				break;
71117683Spst
71217683Spst			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
71317683Spst				add = next;
71417683Spst			else
71517683Spst				add = this_op(next->next);
71617683Spst			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
71717683Spst				break;
71817683Spst
71917683Spst			tax = this_op(add->next);
72017683Spst			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
72117683Spst				break;
72217683Spst
72317683Spst			ild = this_op(tax->next);
72417683Spst			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
72517683Spst			    BPF_MODE(ild->s.code) != BPF_IND)
72617683Spst				break;
72717683Spst			/*
72817683Spst			 * XXX We need to check that X is not
72917683Spst			 * subsequently used.  We know we can eliminate the
73017683Spst			 * accumulator modifications since it is defined
73117683Spst			 * by the last stmt of this sequence.
73217683Spst			 *
73317683Spst			 * We want to turn this sequence:
73417683Spst			 *
73517683Spst			 * (004) ldi     #0x2		{s}
73617683Spst			 * (005) ldxms   [14]		{next}  -- optional
73717683Spst			 * (006) addx			{add}
73817683Spst			 * (007) tax			{tax}
73917683Spst			 * (008) ild     [x+0]		{ild}
74017683Spst			 *
74117683Spst			 * into this sequence:
74217683Spst			 *
74317683Spst			 * (004) nop
74417683Spst			 * (005) ldxms   [14]
74517683Spst			 * (006) nop
74617683Spst			 * (007) nop
74717683Spst			 * (008) ild     [x+2]
74817683Spst			 *
74917683Spst			 */
75017683Spst			ild->s.k += s->s.k;
75117683Spst			s->s.code = NOP;
75217683Spst			add->s.code = NOP;
75317683Spst			tax->s.code = NOP;
75417683Spst			done = 0;
75517683Spst		}
75617683Spst		s = next;
75717683Spst	}
75817683Spst	/*
75917683Spst	 * If we have a subtract to do a comparison, and the X register
76017683Spst	 * is a known constant, we can merge this value into the
76117683Spst	 * comparison.
76217683Spst	 */
763127664Sbms	if (BPF_OP(b->s.code) == BPF_JEQ) {
764127664Sbms		if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
765127664Sbms		    !ATOMELEM(b->out_use, A_ATOM)) {
766127664Sbms			val = b->val[X_ATOM];
767127664Sbms			if (vmap[val].is_const) {
768127664Sbms				/*
769127664Sbms				 * sub x  ->	nop
770127664Sbms				 * jeq #y	jeq #(x+y)
771127664Sbms				 */
772127664Sbms				b->s.k += vmap[val].const_val;
773127664Sbms				last->s.code = NOP;
774127664Sbms				done = 0;
775127664Sbms			} else if (b->s.k == 0) {
776127664Sbms				/*
777127664Sbms				 * sub #x  ->	nop
778127664Sbms				 * jeq #0	jeq #x
779127664Sbms				 */
780127664Sbms				last->s.code = NOP;
781127664Sbms				b->s.code = BPF_CLASS(b->s.code) |
782127664Sbms					BPF_OP(b->s.code) | BPF_X;
783127664Sbms				done = 0;
784127664Sbms			}
785127664Sbms		}
786127664Sbms		/*
787127664Sbms		 * Likewise, a constant subtract can be simplified.
788127664Sbms		 */
789127664Sbms		else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
790127664Sbms			 !ATOMELEM(b->out_use, A_ATOM)) {
79117683Spst
79217683Spst			last->s.code = NOP;
793127664Sbms			b->s.k += last->s.k;
79417683Spst			done = 0;
79517683Spst		}
79617683Spst	}
79717683Spst	/*
79817683Spst	 * and #k	nop
79917683Spst	 * jeq #0  ->	jset #k
80017683Spst	 */
80117683Spst	if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
80217683Spst	    !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
80317683Spst		b->s.k = last->s.k;
80417683Spst		b->s.code = BPF_JMP|BPF_K|BPF_JSET;
80517683Spst		last->s.code = NOP;
80617683Spst		done = 0;
80717683Spst		opt_not(b);
80817683Spst	}
80917683Spst	/*
810127664Sbms	 * jset #0        ->   never
811127664Sbms	 * jset #ffffffff ->   always
812127664Sbms	 */
813127664Sbms	if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
814127664Sbms		if (b->s.k == 0)
815127664Sbms			JT(b) = JF(b);
816127664Sbms		if (b->s.k == 0xffffffff)
817127664Sbms			JF(b) = JT(b);
818127664Sbms	}
819127664Sbms	/*
82017683Spst	 * If the accumulator is a known constant, we can compute the
82117683Spst	 * comparison result.
82217683Spst	 */
82317683Spst	val = b->val[A_ATOM];
82417683Spst	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
82517683Spst		bpf_int32 v = vmap[val].const_val;
82617683Spst		switch (BPF_OP(b->s.code)) {
82717683Spst
82817683Spst		case BPF_JEQ:
82917683Spst			v = v == b->s.k;
83017683Spst			break;
83117683Spst
83217683Spst		case BPF_JGT:
83317683Spst			v = (unsigned)v > b->s.k;
83417683Spst			break;
83517683Spst
83617683Spst		case BPF_JGE:
83717683Spst			v = (unsigned)v >= b->s.k;
83817683Spst			break;
83917683Spst
84017683Spst		case BPF_JSET:
84117683Spst			v &= b->s.k;
84217683Spst			break;
84317683Spst
84417683Spst		default:
84517683Spst			abort();
84617683Spst		}
84717683Spst		if (JF(b) != JT(b))
84817683Spst			done = 0;
84917683Spst		if (v)
85017683Spst			JF(b) = JT(b);
85117683Spst		else
85217683Spst			JT(b) = JF(b);
85317683Spst	}
85417683Spst}
85517683Spst
85617683Spst/*
85717683Spst * Compute the symbolic value of expression of 's', and update
85817683Spst * anything it defines in the value table 'val'.  If 'alter' is true,
85917683Spst * do various optimizations.  This code would be cleaner if symbolic
86017683Spst * evaluation and code transformations weren't folded together.
86117683Spst */
86217683Spststatic void
86317683Spstopt_stmt(s, val, alter)
86417683Spst	struct stmt *s;
86517683Spst	int val[];
86617683Spst	int alter;
86717683Spst{
86817683Spst	int op;
86917683Spst	int v;
87017683Spst
87117683Spst	switch (s->code) {
87217683Spst
87317683Spst	case BPF_LD|BPF_ABS|BPF_W:
87417683Spst	case BPF_LD|BPF_ABS|BPF_H:
87517683Spst	case BPF_LD|BPF_ABS|BPF_B:
87617683Spst		v = F(s->code, s->k, 0L);
87717683Spst		vstore(s, &val[A_ATOM], v, alter);
87817683Spst		break;
87917683Spst
88017683Spst	case BPF_LD|BPF_IND|BPF_W:
88117683Spst	case BPF_LD|BPF_IND|BPF_H:
88217683Spst	case BPF_LD|BPF_IND|BPF_B:
88317683Spst		v = val[X_ATOM];
88417683Spst		if (alter && vmap[v].is_const) {
88517683Spst			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
88617683Spst			s->k += vmap[v].const_val;
88717683Spst			v = F(s->code, s->k, 0L);
88817683Spst			done = 0;
88917683Spst		}
89017683Spst		else
89117683Spst			v = F(s->code, s->k, v);
89217683Spst		vstore(s, &val[A_ATOM], v, alter);
89317683Spst		break;
89417683Spst
89517683Spst	case BPF_LD|BPF_LEN:
89617683Spst		v = F(s->code, 0L, 0L);
89717683Spst		vstore(s, &val[A_ATOM], v, alter);
89817683Spst		break;
89917683Spst
90017683Spst	case BPF_LD|BPF_IMM:
90117683Spst		v = K(s->k);
90217683Spst		vstore(s, &val[A_ATOM], v, alter);
90317683Spst		break;
90417683Spst
90517683Spst	case BPF_LDX|BPF_IMM:
90617683Spst		v = K(s->k);
90717683Spst		vstore(s, &val[X_ATOM], v, alter);
90817683Spst		break;
90917683Spst
91017683Spst	case BPF_LDX|BPF_MSH|BPF_B:
91117683Spst		v = F(s->code, s->k, 0L);
91217683Spst		vstore(s, &val[X_ATOM], v, alter);
91317683Spst		break;
91417683Spst
91517683Spst	case BPF_ALU|BPF_NEG:
91617683Spst		if (alter && vmap[val[A_ATOM]].is_const) {
91717683Spst			s->code = BPF_LD|BPF_IMM;
91817683Spst			s->k = -vmap[val[A_ATOM]].const_val;
91917683Spst			val[A_ATOM] = K(s->k);
92017683Spst		}
92117683Spst		else
92217683Spst			val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
92317683Spst		break;
92417683Spst
92517683Spst	case BPF_ALU|BPF_ADD|BPF_K:
92617683Spst	case BPF_ALU|BPF_SUB|BPF_K:
92717683Spst	case BPF_ALU|BPF_MUL|BPF_K:
92817683Spst	case BPF_ALU|BPF_DIV|BPF_K:
92917683Spst	case BPF_ALU|BPF_AND|BPF_K:
93017683Spst	case BPF_ALU|BPF_OR|BPF_K:
93117683Spst	case BPF_ALU|BPF_LSH|BPF_K:
93217683Spst	case BPF_ALU|BPF_RSH|BPF_K:
93317683Spst		op = BPF_OP(s->code);
93417683Spst		if (alter) {
93517683Spst			if (s->k == 0) {
93698530Sfenner				/* don't optimize away "sub #0"
93798530Sfenner				 * as it may be needed later to
93898530Sfenner				 * fixup the generated math code */
93998530Sfenner				if (op == BPF_ADD ||
94017683Spst				    op == BPF_LSH || op == BPF_RSH ||
94117683Spst				    op == BPF_OR) {
94217683Spst					s->code = NOP;
94317683Spst					break;
94417683Spst				}
94517683Spst				if (op == BPF_MUL || op == BPF_AND) {
94617683Spst					s->code = BPF_LD|BPF_IMM;
94717683Spst					val[A_ATOM] = K(s->k);
94817683Spst					break;
94917683Spst				}
95017683Spst			}
95117683Spst			if (vmap[val[A_ATOM]].is_const) {
95217683Spst				fold_op(s, val[A_ATOM], K(s->k));
95317683Spst				val[A_ATOM] = K(s->k);
95417683Spst				break;
95517683Spst			}
95617683Spst		}
95717683Spst		val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
95817683Spst		break;
95917683Spst
96017683Spst	case BPF_ALU|BPF_ADD|BPF_X:
96117683Spst	case BPF_ALU|BPF_SUB|BPF_X:
96217683Spst	case BPF_ALU|BPF_MUL|BPF_X:
96317683Spst	case BPF_ALU|BPF_DIV|BPF_X:
96417683Spst	case BPF_ALU|BPF_AND|BPF_X:
96517683Spst	case BPF_ALU|BPF_OR|BPF_X:
96617683Spst	case BPF_ALU|BPF_LSH|BPF_X:
96717683Spst	case BPF_ALU|BPF_RSH|BPF_X:
96817683Spst		op = BPF_OP(s->code);
96917683Spst		if (alter && vmap[val[X_ATOM]].is_const) {
97017683Spst			if (vmap[val[A_ATOM]].is_const) {
97117683Spst				fold_op(s, val[A_ATOM], val[X_ATOM]);
97217683Spst				val[A_ATOM] = K(s->k);
97317683Spst			}
97417683Spst			else {
97517683Spst				s->code = BPF_ALU|BPF_K|op;
97617683Spst				s->k = vmap[val[X_ATOM]].const_val;
97717683Spst				done = 0;
97817683Spst				val[A_ATOM] =
97917683Spst					F(s->code, val[A_ATOM], K(s->k));
98017683Spst			}
98117683Spst			break;
98217683Spst		}
98317683Spst		/*
98417683Spst		 * Check if we're doing something to an accumulator
98517683Spst		 * that is 0, and simplify.  This may not seem like
98617683Spst		 * much of a simplification but it could open up further
98717683Spst		 * optimizations.
988127664Sbms		 * XXX We could also check for mul by 1, etc.
98917683Spst		 */
99017683Spst		if (alter && vmap[val[A_ATOM]].is_const
99117683Spst		    && vmap[val[A_ATOM]].const_val == 0) {
992127664Sbms			if (op == BPF_ADD || op == BPF_OR) {
99317683Spst				s->code = BPF_MISC|BPF_TXA;
99417683Spst				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
99517683Spst				break;
99617683Spst			}
99717683Spst			else if (op == BPF_MUL || op == BPF_DIV ||
998127664Sbms				 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
99917683Spst				s->code = BPF_LD|BPF_IMM;
100017683Spst				s->k = 0;
100117683Spst				vstore(s, &val[A_ATOM], K(s->k), alter);
100217683Spst				break;
100317683Spst			}
100417683Spst			else if (op == BPF_NEG) {
100517683Spst				s->code = NOP;
100617683Spst				break;
100717683Spst			}
100817683Spst		}
100917683Spst		val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
101017683Spst		break;
101117683Spst
101217683Spst	case BPF_MISC|BPF_TXA:
101317683Spst		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
101417683Spst		break;
101517683Spst
101617683Spst	case BPF_LD|BPF_MEM:
101717683Spst		v = val[s->k];
101817683Spst		if (alter && vmap[v].is_const) {
101917683Spst			s->code = BPF_LD|BPF_IMM;
102017683Spst			s->k = vmap[v].const_val;
102117683Spst			done = 0;
102217683Spst		}
102317683Spst		vstore(s, &val[A_ATOM], v, alter);
102417683Spst		break;
102517683Spst
102617683Spst	case BPF_MISC|BPF_TAX:
102717683Spst		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
102817683Spst		break;
102917683Spst
103017683Spst	case BPF_LDX|BPF_MEM:
103117683Spst		v = val[s->k];
103217683Spst		if (alter && vmap[v].is_const) {
103317683Spst			s->code = BPF_LDX|BPF_IMM;
103417683Spst			s->k = vmap[v].const_val;
103517683Spst			done = 0;
103617683Spst		}
103717683Spst		vstore(s, &val[X_ATOM], v, alter);
103817683Spst		break;
103917683Spst
104017683Spst	case BPF_ST:
104117683Spst		vstore(s, &val[s->k], val[A_ATOM], alter);
104217683Spst		break;
104317683Spst
104417683Spst	case BPF_STX:
104517683Spst		vstore(s, &val[s->k], val[X_ATOM], alter);
104617683Spst		break;
104717683Spst	}
104817683Spst}
104917683Spst
105017683Spststatic void
105117683Spstdeadstmt(s, last)
105217683Spst	register struct stmt *s;
105317683Spst	register struct stmt *last[];
105417683Spst{
105517683Spst	register int atom;
105617683Spst
105717683Spst	atom = atomuse(s);
105817683Spst	if (atom >= 0) {
105917683Spst		if (atom == AX_ATOM) {
106017683Spst			last[X_ATOM] = 0;
106117683Spst			last[A_ATOM] = 0;
106217683Spst		}
106317683Spst		else
106417683Spst			last[atom] = 0;
106517683Spst	}
106617683Spst	atom = atomdef(s);
106717683Spst	if (atom >= 0) {
106817683Spst		if (last[atom]) {
106917683Spst			done = 0;
107017683Spst			last[atom]->code = NOP;
107117683Spst		}
107217683Spst		last[atom] = s;
107317683Spst	}
107417683Spst}
107517683Spst
107617683Spststatic void
107717683Spstopt_deadstores(b)
107817683Spst	register struct block *b;
107917683Spst{
108017683Spst	register struct slist *s;
108117683Spst	register int atom;
108217683Spst	struct stmt *last[N_ATOMS];
108317683Spst
108417683Spst	memset((char *)last, 0, sizeof last);
108517683Spst
108617683Spst	for (s = b->stmts; s != 0; s = s->next)
108717683Spst		deadstmt(&s->s, last);
108817683Spst	deadstmt(&b->s, last);
108917683Spst
109017683Spst	for (atom = 0; atom < N_ATOMS; ++atom)
109117683Spst		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
109217683Spst			last[atom]->code = NOP;
109317683Spst			done = 0;
109417683Spst		}
109517683Spst}
109617683Spst
109717683Spststatic void
109817683Spstopt_blk(b, do_stmts)
109917683Spst	struct block *b;
110017683Spst	int do_stmts;
110117683Spst{
110217683Spst	struct slist *s;
110317683Spst	struct edge *p;
110417683Spst	int i;
110517683Spst	bpf_int32 aval;
110617683Spst
110756889Sfenner#if 0
110856889Sfenner	for (s = b->stmts; s && s->next; s = s->next)
110956889Sfenner		if (BPF_CLASS(s->s.code) == BPF_JMP) {
111056889Sfenner			do_stmts = 0;
111156889Sfenner			break;
111256889Sfenner		}
111356889Sfenner#endif
111456889Sfenner
111517683Spst	/*
111617683Spst	 * Initialize the atom values.
111717683Spst	 * If we have no predecessors, everything is undefined.
111817683Spst	 * Otherwise, we inherent our values from our predecessors.
111917683Spst	 * If any register has an ambiguous value (i.e. control paths are
112017683Spst	 * merging) give it the undefined value of 0.
112117683Spst	 */
112217683Spst	p = b->in_edges;
112317683Spst	if (p == 0)
112417683Spst		memset((char *)b->val, 0, sizeof(b->val));
112517683Spst	else {
112617683Spst		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
112717683Spst		while ((p = p->next) != NULL) {
112817683Spst			for (i = 0; i < N_ATOMS; ++i)
112917683Spst				if (b->val[i] != p->pred->val[i])
113017683Spst					b->val[i] = 0;
113117683Spst		}
113217683Spst	}
113317683Spst	aval = b->val[A_ATOM];
113417683Spst	for (s = b->stmts; s; s = s->next)
113517683Spst		opt_stmt(&s->s, b->val, do_stmts);
113617683Spst
113717683Spst	/*
113817683Spst	 * This is a special case: if we don't use anything from this
113917683Spst	 * block, and we load the accumulator with value that is
114017683Spst	 * already there, or if this block is a return,
114117683Spst	 * eliminate all the statements.
114217683Spst	 */
1143127664Sbms	if (do_stmts &&
114417683Spst	    ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
114517683Spst	     BPF_CLASS(b->s.code) == BPF_RET)) {
114617683Spst		if (b->stmts != 0) {
114717683Spst			b->stmts = 0;
114817683Spst			done = 0;
114917683Spst		}
115017683Spst	} else {
115117683Spst		opt_peep(b);
115217683Spst		opt_deadstores(b);
115317683Spst	}
115417683Spst	/*
115517683Spst	 * Set up values for branch optimizer.
115617683Spst	 */
115717683Spst	if (BPF_SRC(b->s.code) == BPF_K)
115817683Spst		b->oval = K(b->s.k);
115917683Spst	else
116017683Spst		b->oval = b->val[X_ATOM];
116117683Spst	b->et.code = b->s.code;
116217683Spst	b->ef.code = -b->s.code;
116317683Spst}
116417683Spst
116517683Spst/*
116617683Spst * Return true if any register that is used on exit from 'succ', has
116717683Spst * an exit value that is different from the corresponding exit value
116817683Spst * from 'b'.
116917683Spst */
117017683Spststatic int
117117683Spstuse_conflict(b, succ)
117217683Spst	struct block *b, *succ;
117317683Spst{
117417683Spst	int atom;
117517683Spst	atomset use = succ->out_use;
117617683Spst
117717683Spst	if (use == 0)
117817683Spst		return 0;
117917683Spst
118017683Spst	for (atom = 0; atom < N_ATOMS; ++atom)
118117683Spst		if (ATOMELEM(use, atom))
118217683Spst			if (b->val[atom] != succ->val[atom])
118317683Spst				return 1;
118417683Spst	return 0;
118517683Spst}
118617683Spst
118717683Spststatic struct block *
118817683Spstfold_edge(child, ep)
118917683Spst	struct block *child;
119017683Spst	struct edge *ep;
119117683Spst{
119217683Spst	int sense;
119317683Spst	int aval0, aval1, oval0, oval1;
119417683Spst	int code = ep->code;
119517683Spst
119617683Spst	if (code < 0) {
119717683Spst		code = -code;
119817683Spst		sense = 0;
119917683Spst	} else
120017683Spst		sense = 1;
120117683Spst
120217683Spst	if (child->s.code != code)
120317683Spst		return 0;
120417683Spst
120517683Spst	aval0 = child->val[A_ATOM];
120617683Spst	oval0 = child->oval;
120717683Spst	aval1 = ep->pred->val[A_ATOM];
120817683Spst	oval1 = ep->pred->oval;
120917683Spst
121017683Spst	if (aval0 != aval1)
121117683Spst		return 0;
121217683Spst
121317683Spst	if (oval0 == oval1)
121417683Spst		/*
121517683Spst		 * The operands are identical, so the
121617683Spst		 * result is true if a true branch was
121717683Spst		 * taken to get here, otherwise false.
121817683Spst		 */
121917683Spst		return sense ? JT(child) : JF(child);
122017683Spst
122117683Spst	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
122217683Spst		/*
122317683Spst		 * At this point, we only know the comparison if we
122417683Spst		 * came down the true branch, and it was an equality
122517683Spst		 * comparison with a constant.  We rely on the fact that
122617683Spst		 * distinct constants have distinct value numbers.
122717683Spst		 */
122817683Spst		return JF(child);
122917683Spst
123017683Spst	return 0;
123117683Spst}
123217683Spst
123317683Spststatic void
123417683Spstopt_j(ep)
123517683Spst	struct edge *ep;
123617683Spst{
123717683Spst	register int i, k;
123817683Spst	register struct block *target;
123917683Spst
124017683Spst	if (JT(ep->succ) == 0)
124117683Spst		return;
124217683Spst
124317683Spst	if (JT(ep->succ) == JF(ep->succ)) {
124417683Spst		/*
124517683Spst		 * Common branch targets can be eliminated, provided
124617683Spst		 * there is no data dependency.
124717683Spst		 */
124817683Spst		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
124917683Spst			done = 0;
125017683Spst			ep->succ = JT(ep->succ);
125117683Spst		}
125217683Spst	}
125317683Spst	/*
125417683Spst	 * For each edge dominator that matches the successor of this
125517683Spst	 * edge, promote the edge successor to the its grandchild.
125617683Spst	 *
125717683Spst	 * XXX We violate the set abstraction here in favor a reasonably
125817683Spst	 * efficient loop.
125917683Spst	 */
126017683Spst top:
126117683Spst	for (i = 0; i < edgewords; ++i) {
126217683Spst		register bpf_u_int32 x = ep->edom[i];
126317683Spst
126417683Spst		while (x != 0) {
126517683Spst			k = ffs(x) - 1;
126617683Spst			x &=~ (1 << k);
126717683Spst			k += i * BITS_PER_WORD;
126817683Spst
126917683Spst			target = fold_edge(ep->succ, edges[k]);
127017683Spst			/*
127117683Spst			 * Check that there is no data dependency between
127217683Spst			 * nodes that will be violated if we move the edge.
127317683Spst			 */
127417683Spst			if (target != 0 && !use_conflict(ep->pred, target)) {
127517683Spst				done = 0;
127617683Spst				ep->succ = target;
127717683Spst				if (JT(target) != 0)
127817683Spst					/*
127917683Spst					 * Start over unless we hit a leaf.
128017683Spst					 */
128117683Spst					goto top;
128217683Spst				return;
128317683Spst			}
128417683Spst		}
128517683Spst	}
128617683Spst}
128717683Spst
128817683Spst
128917683Spststatic void
129017683Spstor_pullup(b)
129117683Spst	struct block *b;
129217683Spst{
129317683Spst	int val, at_top;
129417683Spst	struct block *pull;
129517683Spst	struct block **diffp, **samep;
129617683Spst	struct edge *ep;
129717683Spst
129817683Spst	ep = b->in_edges;
129917683Spst	if (ep == 0)
130017683Spst		return;
130117683Spst
130217683Spst	/*
130317683Spst	 * Make sure each predecessor loads the same value.
130417683Spst	 * XXX why?
130517683Spst	 */
130617683Spst	val = ep->pred->val[A_ATOM];
130717683Spst	for (ep = ep->next; ep != 0; ep = ep->next)
130817683Spst		if (val != ep->pred->val[A_ATOM])
130917683Spst			return;
131017683Spst
131117683Spst	if (JT(b->in_edges->pred) == b)
131217683Spst		diffp = &JT(b->in_edges->pred);
131317683Spst	else
131417683Spst		diffp = &JF(b->in_edges->pred);
131517683Spst
131617683Spst	at_top = 1;
131717683Spst	while (1) {
131817683Spst		if (*diffp == 0)
131917683Spst			return;
132017683Spst
132117683Spst		if (JT(*diffp) != JT(b))
132217683Spst			return;
132317683Spst
132417683Spst		if (!SET_MEMBER((*diffp)->dom, b->id))
132517683Spst			return;
132617683Spst
132717683Spst		if ((*diffp)->val[A_ATOM] != val)
132817683Spst			break;
132917683Spst
133017683Spst		diffp = &JF(*diffp);
133117683Spst		at_top = 0;
133217683Spst	}
133317683Spst	samep = &JF(*diffp);
133417683Spst	while (1) {
133517683Spst		if (*samep == 0)
133617683Spst			return;
133717683Spst
133817683Spst		if (JT(*samep) != JT(b))
133917683Spst			return;
134017683Spst
134117683Spst		if (!SET_MEMBER((*samep)->dom, b->id))
134217683Spst			return;
134317683Spst
134417683Spst		if ((*samep)->val[A_ATOM] == val)
134517683Spst			break;
134617683Spst
134717683Spst		/* XXX Need to check that there are no data dependencies
134817683Spst		   between dp0 and dp1.  Currently, the code generator
134917683Spst		   will not produce such dependencies. */
135017683Spst		samep = &JF(*samep);
135117683Spst	}
135217683Spst#ifdef notdef
135317683Spst	/* XXX This doesn't cover everything. */
135417683Spst	for (i = 0; i < N_ATOMS; ++i)
135517683Spst		if ((*samep)->val[i] != pred->val[i])
135617683Spst			return;
135717683Spst#endif
135817683Spst	/* Pull up the node. */
135917683Spst	pull = *samep;
136017683Spst	*samep = JF(pull);
136117683Spst	JF(pull) = *diffp;
136217683Spst
136317683Spst	/*
136417683Spst	 * At the top of the chain, each predecessor needs to point at the
136517683Spst	 * pulled up node.  Inside the chain, there is only one predecessor
136617683Spst	 * to worry about.
136717683Spst	 */
136817683Spst	if (at_top) {
136917683Spst		for (ep = b->in_edges; ep != 0; ep = ep->next) {
137017683Spst			if (JT(ep->pred) == b)
137117683Spst				JT(ep->pred) = pull;
137217683Spst			else
137317683Spst				JF(ep->pred) = pull;
137417683Spst		}
137517683Spst	}
137617683Spst	else
137717683Spst		*diffp = pull;
137817683Spst
137917683Spst	done = 0;
138017683Spst}
138117683Spst
138217683Spststatic void
138317683Spstand_pullup(b)
138417683Spst	struct block *b;
138517683Spst{
138617683Spst	int val, at_top;
138717683Spst	struct block *pull;
138817683Spst	struct block **diffp, **samep;
138917683Spst	struct edge *ep;
139017683Spst
139117683Spst	ep = b->in_edges;
139217683Spst	if (ep == 0)
139317683Spst		return;
139417683Spst
139517683Spst	/*
139617683Spst	 * Make sure each predecessor loads the same value.
139717683Spst	 */
139817683Spst	val = ep->pred->val[A_ATOM];
139917683Spst	for (ep = ep->next; ep != 0; ep = ep->next)
140017683Spst		if (val != ep->pred->val[A_ATOM])
140117683Spst			return;
140217683Spst
140317683Spst	if (JT(b->in_edges->pred) == b)
140417683Spst		diffp = &JT(b->in_edges->pred);
140517683Spst	else
140617683Spst		diffp = &JF(b->in_edges->pred);
140717683Spst
140817683Spst	at_top = 1;
140917683Spst	while (1) {
141017683Spst		if (*diffp == 0)
141117683Spst			return;
141217683Spst
141317683Spst		if (JF(*diffp) != JF(b))
141417683Spst			return;
141517683Spst
141617683Spst		if (!SET_MEMBER((*diffp)->dom, b->id))
141717683Spst			return;
141817683Spst
141917683Spst		if ((*diffp)->val[A_ATOM] != val)
142017683Spst			break;
142117683Spst
142217683Spst		diffp = &JT(*diffp);
142317683Spst		at_top = 0;
142417683Spst	}
142517683Spst	samep = &JT(*diffp);
142617683Spst	while (1) {
142717683Spst		if (*samep == 0)
142817683Spst			return;
142917683Spst
143017683Spst		if (JF(*samep) != JF(b))
143117683Spst			return;
143217683Spst
143317683Spst		if (!SET_MEMBER((*samep)->dom, b->id))
143417683Spst			return;
143517683Spst
143617683Spst		if ((*samep)->val[A_ATOM] == val)
143717683Spst			break;
143817683Spst
143917683Spst		/* XXX Need to check that there are no data dependencies
144017683Spst		   between diffp and samep.  Currently, the code generator
144117683Spst		   will not produce such dependencies. */
144217683Spst		samep = &JT(*samep);
144317683Spst	}
144417683Spst#ifdef notdef
144517683Spst	/* XXX This doesn't cover everything. */
144617683Spst	for (i = 0; i < N_ATOMS; ++i)
144717683Spst		if ((*samep)->val[i] != pred->val[i])
144817683Spst			return;
144917683Spst#endif
145017683Spst	/* Pull up the node. */
145117683Spst	pull = *samep;
145217683Spst	*samep = JT(pull);
145317683Spst	JT(pull) = *diffp;
145417683Spst
145517683Spst	/*
145617683Spst	 * At the top of the chain, each predecessor needs to point at the
145717683Spst	 * pulled up node.  Inside the chain, there is only one predecessor
145817683Spst	 * to worry about.
145917683Spst	 */
146017683Spst	if (at_top) {
146117683Spst		for (ep = b->in_edges; ep != 0; ep = ep->next) {
146217683Spst			if (JT(ep->pred) == b)
146317683Spst				JT(ep->pred) = pull;
146417683Spst			else
146517683Spst				JF(ep->pred) = pull;
146617683Spst		}
146717683Spst	}
146817683Spst	else
146917683Spst		*diffp = pull;
147017683Spst
147117683Spst	done = 0;
147217683Spst}
147317683Spst
147417683Spststatic void
147517683Spstopt_blks(root, do_stmts)
147617683Spst	struct block *root;
147717683Spst	int do_stmts;
147817683Spst{
147917683Spst	int i, maxlevel;
148017683Spst	struct block *p;
148117683Spst
148217683Spst	init_val();
148317683Spst	maxlevel = root->level;
148475107Sfenner
148575107Sfenner	find_inedges(root);
148617683Spst	for (i = maxlevel; i >= 0; --i)
148717683Spst		for (p = levels[i]; p; p = p->link)
148817683Spst			opt_blk(p, do_stmts);
148917683Spst
149017683Spst	if (do_stmts)
149117683Spst		/*
149217683Spst		 * No point trying to move branches; it can't possibly
149317683Spst		 * make a difference at this point.
149417683Spst		 */
149517683Spst		return;
149617683Spst
149717683Spst	for (i = 1; i <= maxlevel; ++i) {
149817683Spst		for (p = levels[i]; p; p = p->link) {
149917683Spst			opt_j(&p->et);
150017683Spst			opt_j(&p->ef);
150117683Spst		}
150217683Spst	}
150375107Sfenner
150475107Sfenner	find_inedges(root);
150517683Spst	for (i = 1; i <= maxlevel; ++i) {
150617683Spst		for (p = levels[i]; p; p = p->link) {
150717683Spst			or_pullup(p);
150817683Spst			and_pullup(p);
150917683Spst		}
151017683Spst	}
151117683Spst}
151217683Spst
151317683Spststatic inline void
151417683Spstlink_inedge(parent, child)
151517683Spst	struct edge *parent;
151617683Spst	struct block *child;
151717683Spst{
151817683Spst	parent->next = child->in_edges;
151917683Spst	child->in_edges = parent;
152017683Spst}
152117683Spst
152217683Spststatic void
152317683Spstfind_inedges(root)
152417683Spst	struct block *root;
152517683Spst{
152617683Spst	int i;
152717683Spst	struct block *b;
152817683Spst
152917683Spst	for (i = 0; i < n_blocks; ++i)
153017683Spst		blocks[i]->in_edges = 0;
153117683Spst
153217683Spst	/*
153317683Spst	 * Traverse the graph, adding each edge to the predecessor
153417683Spst	 * list of its successors.  Skip the leaves (i.e. level 0).
153517683Spst	 */
153617683Spst	for (i = root->level; i > 0; --i) {
153717683Spst		for (b = levels[i]; b != 0; b = b->link) {
153817683Spst			link_inedge(&b->et, JT(b));
153917683Spst			link_inedge(&b->ef, JF(b));
154017683Spst		}
154117683Spst	}
154217683Spst}
154317683Spst
154417683Spststatic void
154517683Spstopt_root(b)
154617683Spst	struct block **b;
154717683Spst{
154817683Spst	struct slist *tmp, *s;
154917683Spst
155017683Spst	s = (*b)->stmts;
155117683Spst	(*b)->stmts = 0;
155217683Spst	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
155317683Spst		*b = JT(*b);
155417683Spst
155517683Spst	tmp = (*b)->stmts;
155617683Spst	if (tmp != 0)
155717683Spst		sappend(s, tmp);
155817683Spst	(*b)->stmts = s;
155917683Spst
156017683Spst	/*
156117683Spst	 * If the root node is a return, then there is no
156217683Spst	 * point executing any statements (since the bpf machine
156317683Spst	 * has no side effects).
156417683Spst	 */
156517683Spst	if (BPF_CLASS((*b)->s.code) == BPF_RET)
156617683Spst		(*b)->stmts = 0;
156717683Spst}
156817683Spst
156917683Spststatic void
157017683Spstopt_loop(root, do_stmts)
157117683Spst	struct block *root;
157217683Spst	int do_stmts;
157317683Spst{
157417683Spst
157517683Spst#ifdef BDEBUG
157698530Sfenner	if (dflag > 1) {
157798530Sfenner		printf("opt_loop(root, %d) begin\n", do_stmts);
157817683Spst		opt_dump(root);
157998530Sfenner	}
158017683Spst#endif
158117683Spst	do {
158217683Spst		done = 1;
158317683Spst		find_levels(root);
158417683Spst		find_dom(root);
158517683Spst		find_closure(root);
158617683Spst		find_ud(root);
158717683Spst		find_edom(root);
158817683Spst		opt_blks(root, do_stmts);
158917683Spst#ifdef BDEBUG
159098530Sfenner		if (dflag > 1) {
159198530Sfenner			printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
159217683Spst			opt_dump(root);
159398530Sfenner		}
159417683Spst#endif
159517683Spst	} while (!done);
159617683Spst}
159717683Spst
159817683Spst/*
159917683Spst * Optimize the filter code in its dag representation.
160017683Spst */
160117683Spstvoid
160217683Spstbpf_optimize(rootp)
160317683Spst	struct block **rootp;
160417683Spst{
160517683Spst	struct block *root;
160617683Spst
160717683Spst	root = *rootp;
160817683Spst
160917683Spst	opt_init(root);
161017683Spst	opt_loop(root, 0);
161117683Spst	opt_loop(root, 1);
161217683Spst	intern_blocks(root);
161398530Sfenner#ifdef BDEBUG
161498530Sfenner	if (dflag > 1) {
161598530Sfenner		printf("after intern_blocks()\n");
161698530Sfenner		opt_dump(root);
161798530Sfenner	}
161898530Sfenner#endif
161917683Spst	opt_root(rootp);
162098530Sfenner#ifdef BDEBUG
162198530Sfenner	if (dflag > 1) {
162298530Sfenner		printf("after opt_root()\n");
162398530Sfenner		opt_dump(root);
162498530Sfenner	}
162598530Sfenner#endif
162617683Spst	opt_cleanup();
162717683Spst}
162817683Spst
162917683Spststatic void
163017683Spstmake_marks(p)
163117683Spst	struct block *p;
163217683Spst{
163317683Spst	if (!isMarked(p)) {
163417683Spst		Mark(p);
163517683Spst		if (BPF_CLASS(p->s.code) != BPF_RET) {
163617683Spst			make_marks(JT(p));
163717683Spst			make_marks(JF(p));
163817683Spst		}
163917683Spst	}
164017683Spst}
164117683Spst
164217683Spst/*
164317683Spst * Mark code array such that isMarked(i) is true
164417683Spst * only for nodes that are alive.
164517683Spst */
164617683Spststatic void
164717683Spstmark_code(p)
164817683Spst	struct block *p;
164917683Spst{
165017683Spst	cur_mark += 1;
165117683Spst	make_marks(p);
165217683Spst}
165317683Spst
165417683Spst/*
165517683Spst * True iff the two stmt lists load the same value from the packet into
165617683Spst * the accumulator.
165717683Spst */
165817683Spststatic int
165917683Spsteq_slist(x, y)
166017683Spst	struct slist *x, *y;
166117683Spst{
166217683Spst	while (1) {
166317683Spst		while (x && x->s.code == NOP)
166417683Spst			x = x->next;
166517683Spst		while (y && y->s.code == NOP)
166617683Spst			y = y->next;
166717683Spst		if (x == 0)
166817683Spst			return y == 0;
166917683Spst		if (y == 0)
167017683Spst			return x == 0;
167117683Spst		if (x->s.code != y->s.code || x->s.k != y->s.k)
167217683Spst			return 0;
167317683Spst		x = x->next;
167417683Spst		y = y->next;
167517683Spst	}
167617683Spst}
167717683Spst
167817683Spststatic inline int
167917683Spsteq_blk(b0, b1)
168017683Spst	struct block *b0, *b1;
168117683Spst{
168217683Spst	if (b0->s.code == b1->s.code &&
168317683Spst	    b0->s.k == b1->s.k &&
168417683Spst	    b0->et.succ == b1->et.succ &&
168517683Spst	    b0->ef.succ == b1->ef.succ)
168617683Spst		return eq_slist(b0->stmts, b1->stmts);
168717683Spst	return 0;
168817683Spst}
168917683Spst
169017683Spststatic void
169117683Spstintern_blocks(root)
169217683Spst	struct block *root;
169317683Spst{
169417683Spst	struct block *p;
169517683Spst	int i, j;
169617683Spst	int done;
169717683Spst top:
169817683Spst	done = 1;
169917683Spst	for (i = 0; i < n_blocks; ++i)
170017683Spst		blocks[i]->link = 0;
170117683Spst
170217683Spst	mark_code(root);
170317683Spst
170417683Spst	for (i = n_blocks - 1; --i >= 0; ) {
170517683Spst		if (!isMarked(blocks[i]))
170617683Spst			continue;
170717683Spst		for (j = i + 1; j < n_blocks; ++j) {
170817683Spst			if (!isMarked(blocks[j]))
170917683Spst				continue;
171017683Spst			if (eq_blk(blocks[i], blocks[j])) {
171117683Spst				blocks[i]->link = blocks[j]->link ?
171217683Spst					blocks[j]->link : blocks[j];
171317683Spst				break;
171417683Spst			}
171517683Spst		}
171617683Spst	}
171717683Spst	for (i = 0; i < n_blocks; ++i) {
171817683Spst		p = blocks[i];
171917683Spst		if (JT(p) == 0)
172017683Spst			continue;
172117683Spst		if (JT(p)->link) {
172217683Spst			done = 0;
172317683Spst			JT(p) = JT(p)->link;
172417683Spst		}
172517683Spst		if (JF(p)->link) {
172617683Spst			done = 0;
172717683Spst			JF(p) = JF(p)->link;
172817683Spst		}
172917683Spst	}
173017683Spst	if (!done)
173117683Spst		goto top;
173217683Spst}
173317683Spst
173417683Spststatic void
173517683Spstopt_cleanup()
173617683Spst{
173717683Spst	free((void *)vnode_base);
173817683Spst	free((void *)vmap);
173917683Spst	free((void *)edges);
174017683Spst	free((void *)space);
174117683Spst	free((void *)levels);
174217683Spst	free((void *)blocks);
174317683Spst}
174417683Spst
174517683Spst/*
174617683Spst * Return the number of stmts in 's'.
174717683Spst */
174817683Spststatic int
174917683Spstslength(s)
175017683Spst	struct slist *s;
175117683Spst{
175217683Spst	int n = 0;
175317683Spst
175417683Spst	for (; s; s = s->next)
175517683Spst		if (s->s.code != NOP)
175617683Spst			++n;
175717683Spst	return n;
175817683Spst}
175917683Spst
176017683Spst/*
176117683Spst * Return the number of nodes reachable by 'p'.
176217683Spst * All nodes should be initially unmarked.
176317683Spst */
176417683Spststatic int
176517683Spstcount_blocks(p)
176617683Spst	struct block *p;
176717683Spst{
176817683Spst	if (p == 0 || isMarked(p))
176917683Spst		return 0;
177017683Spst	Mark(p);
177117683Spst	return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
177217683Spst}
177317683Spst
177417683Spst/*
177517683Spst * Do a depth first search on the flow graph, numbering the
177617683Spst * the basic blocks, and entering them into the 'blocks' array.`
177717683Spst */
177817683Spststatic void
177917683Spstnumber_blks_r(p)
178017683Spst	struct block *p;
178117683Spst{
178217683Spst	int n;
178317683Spst
178417683Spst	if (p == 0 || isMarked(p))
178517683Spst		return;
178617683Spst
178717683Spst	Mark(p);
178817683Spst	n = n_blocks++;
178917683Spst	p->id = n;
179017683Spst	blocks[n] = p;
179117683Spst
179217683Spst	number_blks_r(JT(p));
179317683Spst	number_blks_r(JF(p));
179417683Spst}
179517683Spst
179617683Spst/*
179717683Spst * Return the number of stmts in the flowgraph reachable by 'p'.
179817683Spst * The nodes should be unmarked before calling.
179975107Sfenner *
180075107Sfenner * Note that "stmts" means "instructions", and that this includes
180175107Sfenner *
180275107Sfenner *	side-effect statements in 'p' (slength(p->stmts));
180375107Sfenner *
180475107Sfenner *	statements in the true branch from 'p' (count_stmts(JT(p)));
180575107Sfenner *
180675107Sfenner *	statements in the false branch from 'p' (count_stmts(JF(p)));
180775107Sfenner *
180875107Sfenner *	the conditional jump itself (1);
180975107Sfenner *
181075107Sfenner *	an extra long jump if the true branch requires it (p->longjt);
181175107Sfenner *
181275107Sfenner *	an extra long jump if the false branch requires it (p->longjf).
181317683Spst */
181417683Spststatic int
181517683Spstcount_stmts(p)
181617683Spst	struct block *p;
181717683Spst{
181817683Spst	int n;
181917683Spst
182017683Spst	if (p == 0 || isMarked(p))
182117683Spst		return 0;
182217683Spst	Mark(p);
182317683Spst	n = count_stmts(JT(p)) + count_stmts(JF(p));
182475107Sfenner	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
182517683Spst}
182617683Spst
182717683Spst/*
182817683Spst * Allocate memory.  All allocation is done before optimization
182917683Spst * is begun.  A linear bound on the size of all data structures is computed
183017683Spst * from the total number of blocks and/or statements.
183117683Spst */
183217683Spststatic void
183317683Spstopt_init(root)
183417683Spst	struct block *root;
183517683Spst{
183617683Spst	bpf_u_int32 *p;
183717683Spst	int i, n, max_stmts;
183817683Spst
183917683Spst	/*
184017683Spst	 * First, count the blocks, so we can malloc an array to map
184117683Spst	 * block number to block.  Then, put the blocks into the array.
184217683Spst	 */
184317683Spst	unMarkAll();
184417683Spst	n = count_blocks(root);
184517683Spst	blocks = (struct block **)malloc(n * sizeof(*blocks));
1846127664Sbms	if (blocks == NULL)
1847127664Sbms		bpf_error("malloc");
184817683Spst	unMarkAll();
184917683Spst	n_blocks = 0;
185017683Spst	number_blks_r(root);
185117683Spst
185217683Spst	n_edges = 2 * n_blocks;
185317683Spst	edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1854127664Sbms	if (edges == NULL)
1855127664Sbms		bpf_error("malloc");
185617683Spst
185717683Spst	/*
185817683Spst	 * The number of levels is bounded by the number of nodes.
185917683Spst	 */
186017683Spst	levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1861127664Sbms	if (levels == NULL)
1862127664Sbms		bpf_error("malloc");
186317683Spst
186417683Spst	edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
186517683Spst	nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
186617683Spst
186717683Spst	/* XXX */
186817683Spst	space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
186917683Spst				 + n_edges * edgewords * sizeof(*space));
1870127664Sbms	if (space == NULL)
1871127664Sbms		bpf_error("malloc");
187217683Spst	p = space;
187317683Spst	all_dom_sets = p;
187417683Spst	for (i = 0; i < n; ++i) {
187517683Spst		blocks[i]->dom = p;
187617683Spst		p += nodewords;
187717683Spst	}
187817683Spst	all_closure_sets = p;
187917683Spst	for (i = 0; i < n; ++i) {
188017683Spst		blocks[i]->closure = p;
188117683Spst		p += nodewords;
188217683Spst	}
188317683Spst	all_edge_sets = p;
188417683Spst	for (i = 0; i < n; ++i) {
188517683Spst		register struct block *b = blocks[i];
188617683Spst
188717683Spst		b->et.edom = p;
188817683Spst		p += edgewords;
188917683Spst		b->ef.edom = p;
189017683Spst		p += edgewords;
189117683Spst		b->et.id = i;
189217683Spst		edges[i] = &b->et;
189317683Spst		b->ef.id = n_blocks + i;
189417683Spst		edges[n_blocks + i] = &b->ef;
189517683Spst		b->et.pred = b;
189617683Spst		b->ef.pred = b;
189717683Spst	}
189817683Spst	max_stmts = 0;
189917683Spst	for (i = 0; i < n; ++i)
190017683Spst		max_stmts += slength(blocks[i]->stmts) + 1;
190117683Spst	/*
190217683Spst	 * We allocate at most 3 value numbers per statement,
190317683Spst	 * so this is an upper bound on the number of valnodes
190417683Spst	 * we'll need.
190517683Spst	 */
190617683Spst	maxval = 3 * max_stmts;
190717683Spst	vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
190875107Sfenner	vnode_base = (struct valnode *)malloc(maxval * sizeof(*vnode_base));
1909127664Sbms	if (vmap == NULL || vnode_base == NULL)
1910127664Sbms		bpf_error("malloc");
191117683Spst}
191217683Spst
191317683Spst/*
191417683Spst * Some pointers used to convert the basic block form of the code,
191517683Spst * into the array form that BPF requires.  'fstart' will point to
191617683Spst * the malloc'd array while 'ftail' is used during the recursive traversal.
191717683Spst */
191817683Spststatic struct bpf_insn *fstart;
191917683Spststatic struct bpf_insn *ftail;
192017683Spst
192117683Spst#ifdef BDEBUG
192217683Spstint bids[1000];
192317683Spst#endif
192417683Spst
192517683Spst/*
192617683Spst * Returns true if successful.  Returns false if a branch has
192717683Spst * an offset that is too large.  If so, we have marked that
192817683Spst * branch so that on a subsequent iteration, it will be treated
192917683Spst * properly.
193017683Spst */
193117683Spststatic int
193217683Spstconvert_code_r(p)
193317683Spst	struct block *p;
193417683Spst{
193517683Spst	struct bpf_insn *dst;
193617683Spst	struct slist *src;
193717683Spst	int slen;
193817683Spst	u_int off;
193917683Spst	int extrajmps;		/* number of extra jumps inserted */
194056889Sfenner	struct slist **offset = NULL;
194117683Spst
194217683Spst	if (p == 0 || isMarked(p))
194317683Spst		return (1);
194417683Spst	Mark(p);
194517683Spst
194617683Spst	if (convert_code_r(JF(p)) == 0)
194717683Spst		return (0);
194817683Spst	if (convert_code_r(JT(p)) == 0)
194917683Spst		return (0);
195017683Spst
195117683Spst	slen = slength(p->stmts);
195217683Spst	dst = ftail -= (slen + 1 + p->longjt + p->longjf);
195317683Spst		/* inflate length by any extra jumps */
195417683Spst
195517683Spst	p->offset = dst - fstart;
195617683Spst
195756889Sfenner	/* generate offset[] for convenience  */
195856889Sfenner	if (slen) {
1959127664Sbms		offset = (struct slist **)calloc(slen, sizeof(struct slist *));
196056889Sfenner		if (!offset) {
196156889Sfenner			bpf_error("not enough core");
196256889Sfenner			/*NOTREACHED*/
196356889Sfenner		}
196456889Sfenner	}
196556889Sfenner	src = p->stmts;
196656889Sfenner	for (off = 0; off < slen && src; off++) {
196756889Sfenner#if 0
196856889Sfenner		printf("off=%d src=%x\n", off, src);
196956889Sfenner#endif
197056889Sfenner		offset[off] = src;
197156889Sfenner		src = src->next;
197256889Sfenner	}
197356889Sfenner
197456889Sfenner	off = 0;
197517683Spst	for (src = p->stmts; src; src = src->next) {
197617683Spst		if (src->s.code == NOP)
197717683Spst			continue;
197817683Spst		dst->code = (u_short)src->s.code;
197917683Spst		dst->k = src->s.k;
198056889Sfenner
198156889Sfenner		/* fill block-local relative jump */
198275107Sfenner		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
198356889Sfenner#if 0
198456889Sfenner			if (src->s.jt || src->s.jf) {
198556889Sfenner				bpf_error("illegal jmp destination");
198656889Sfenner				/*NOTREACHED*/
198756889Sfenner			}
198856889Sfenner#endif
198956889Sfenner			goto filled;
199056889Sfenner		}
199156889Sfenner		if (off == slen - 2)	/*???*/
199256889Sfenner			goto filled;
199356889Sfenner
199456889Sfenner	    {
199556889Sfenner		int i;
199656889Sfenner		int jt, jf;
199756889Sfenner		char *ljerr = "%s for block-local relative jump: off=%d";
199856889Sfenner
199956889Sfenner#if 0
200056889Sfenner		printf("code=%x off=%d %x %x\n", src->s.code,
200156889Sfenner			off, src->s.jt, src->s.jf);
200256889Sfenner#endif
200356889Sfenner
200456889Sfenner		if (!src->s.jt || !src->s.jf) {
200556889Sfenner			bpf_error(ljerr, "no jmp destination", off);
200656889Sfenner			/*NOTREACHED*/
200756889Sfenner		}
200856889Sfenner
200956889Sfenner		jt = jf = 0;
201056889Sfenner		for (i = 0; i < slen; i++) {
201156889Sfenner			if (offset[i] == src->s.jt) {
201256889Sfenner				if (jt) {
201356889Sfenner					bpf_error(ljerr, "multiple matches", off);
201456889Sfenner					/*NOTREACHED*/
201556889Sfenner				}
201656889Sfenner
201756889Sfenner				dst->jt = i - off - 1;
201856889Sfenner				jt++;
201956889Sfenner			}
202056889Sfenner			if (offset[i] == src->s.jf) {
202156889Sfenner				if (jf) {
202256889Sfenner					bpf_error(ljerr, "multiple matches", off);
202356889Sfenner					/*NOTREACHED*/
202456889Sfenner				}
202556889Sfenner				dst->jf = i - off - 1;
202656889Sfenner				jf++;
202756889Sfenner			}
202856889Sfenner		}
202956889Sfenner		if (!jt || !jf) {
203056889Sfenner			bpf_error(ljerr, "no destination found", off);
203156889Sfenner			/*NOTREACHED*/
203256889Sfenner		}
203356889Sfenner	    }
203456889Sfennerfilled:
203517683Spst		++dst;
203656889Sfenner		++off;
203717683Spst	}
203856889Sfenner	if (offset)
203956889Sfenner		free(offset);
204056889Sfenner
204117683Spst#ifdef BDEBUG
204217683Spst	bids[dst - fstart] = p->id + 1;
204317683Spst#endif
204417683Spst	dst->code = (u_short)p->s.code;
204517683Spst	dst->k = p->s.k;
204617683Spst	if (JT(p)) {
204717683Spst		extrajmps = 0;
204817683Spst		off = JT(p)->offset - (p->offset + slen) - 1;
204917683Spst		if (off >= 256) {
205017683Spst		    /* offset too large for branch, must add a jump */
205117683Spst		    if (p->longjt == 0) {
205217683Spst		    	/* mark this instruction and retry */
205317683Spst			p->longjt++;
205417683Spst			return(0);
205517683Spst		    }
205617683Spst		    /* branch if T to following jump */
205717683Spst		    dst->jt = extrajmps;
205817683Spst		    extrajmps++;
205917683Spst		    dst[extrajmps].code = BPF_JMP|BPF_JA;
206017683Spst		    dst[extrajmps].k = off - extrajmps;
206117683Spst		}
206217683Spst		else
206317683Spst		    dst->jt = off;
206417683Spst		off = JF(p)->offset - (p->offset + slen) - 1;
206517683Spst		if (off >= 256) {
206617683Spst		    /* offset too large for branch, must add a jump */
206717683Spst		    if (p->longjf == 0) {
206817683Spst		    	/* mark this instruction and retry */
206917683Spst			p->longjf++;
207017683Spst			return(0);
207117683Spst		    }
207217683Spst		    /* branch if F to following jump */
207317683Spst		    /* if two jumps are inserted, F goes to second one */
207417683Spst		    dst->jf = extrajmps;
207517683Spst		    extrajmps++;
207617683Spst		    dst[extrajmps].code = BPF_JMP|BPF_JA;
207717683Spst		    dst[extrajmps].k = off - extrajmps;
207817683Spst		}
207917683Spst		else
208017683Spst		    dst->jf = off;
208117683Spst	}
208217683Spst	return (1);
208317683Spst}
208417683Spst
208517683Spst
208617683Spst/*
208717683Spst * Convert flowgraph intermediate representation to the
208817683Spst * BPF array representation.  Set *lenp to the number of instructions.
208917683Spst */
209017683Spststruct bpf_insn *
209117683Spsticode_to_fcode(root, lenp)
209217683Spst	struct block *root;
209317683Spst	int *lenp;
209417683Spst{
209517683Spst	int n;
209617683Spst	struct bpf_insn *fp;
209717683Spst
209817683Spst	/*
209998530Sfenner	 * Loop doing convert_code_r() until no branches remain
210017683Spst	 * with too-large offsets.
210117683Spst	 */
210217683Spst	while (1) {
210317683Spst	    unMarkAll();
210417683Spst	    n = *lenp = count_stmts(root);
2105127664Sbms
210617683Spst	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2107127664Sbms	    if (fp == NULL)
2108127664Sbms		    bpf_error("malloc");
210917683Spst	    memset((char *)fp, 0, sizeof(*fp) * n);
211017683Spst	    fstart = fp;
211117683Spst	    ftail = fp + n;
2112127664Sbms
211317683Spst	    unMarkAll();
211417683Spst	    if (convert_code_r(root))
211517683Spst		break;
211617683Spst	    free(fp);
211717683Spst	}
211817683Spst
211917683Spst	return fp;
212017683Spst}
212117683Spst
212275107Sfenner/*
212375107Sfenner * Make a copy of a BPF program and put it in the "fcode" member of
212475107Sfenner * a "pcap_t".
212575107Sfenner *
212675107Sfenner * If we fail to allocate memory for the copy, fill in the "errbuf"
212775107Sfenner * member of the "pcap_t" with an error message, and return -1;
212875107Sfenner * otherwise, return 0.
212975107Sfenner */
213075107Sfennerint
213175107Sfennerinstall_bpf_program(pcap_t *p, struct bpf_program *fp)
213275107Sfenner{
213375107Sfenner	size_t prog_size;
213475107Sfenner
213575107Sfenner	/*
213675107Sfenner	 * Free up any already installed program.
213775107Sfenner	 */
213875107Sfenner	pcap_freecode(&p->fcode);
213975107Sfenner
214075107Sfenner	prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
214175107Sfenner	p->fcode.bf_len = fp->bf_len;
214275107Sfenner	p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
214375107Sfenner	if (p->fcode.bf_insns == NULL) {
214475107Sfenner		snprintf(p->errbuf, sizeof(p->errbuf),
214575107Sfenner			 "malloc: %s", pcap_strerror(errno));
214675107Sfenner		return (-1);
214775107Sfenner	}
214875107Sfenner	memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
214975107Sfenner	return (0);
215075107Sfenner}
215175107Sfenner
215217683Spst#ifdef BDEBUG
215317683Spststatic void
215417683Spstopt_dump(root)
215517683Spst	struct block *root;
215617683Spst{
215717683Spst	struct bpf_program f;
215817683Spst
215917683Spst	memset(bids, 0, sizeof bids);
216017683Spst	f.bf_insns = icode_to_fcode(root, &f.bf_len);
216117683Spst	bpf_dump(&f, 1);
216217683Spst	putchar('\n');
216317683Spst	free((char *)f.bf_insns);
216417683Spst}
216517683Spst#endif
2166