optimize.c revision 56889
117683Spst/*
217683Spst * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
317683Spst *	The Regents of the University of California.  All rights reserved.
417683Spst *
517683Spst * Redistribution and use in source and binary forms, with or without
617683Spst * modification, are permitted provided that: (1) source code distributions
717683Spst * retain the above copyright notice and this paragraph in its entirety, (2)
817683Spst * distributions including binary code include the above copyright notice and
917683Spst * this paragraph in its entirety in the documentation or other materials
1017683Spst * provided with the distribution, and (3) all advertising materials mentioning
1117683Spst * features or use of this software display the following acknowledgement:
1217683Spst * ``This product includes software developed by the University of California,
1317683Spst * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
1417683Spst * the University nor the names of its contributors may be used to endorse
1517683Spst * or promote products derived from this software without specific prior
1617683Spst * written permission.
1717683Spst * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
1817683Spst * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
1917683Spst * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
2017683Spst *
2117683Spst *  Optimization module for tcpdump intermediate representation.
2217683Spst */
2317683Spst#ifndef lint
2426175Sfennerstatic const char rcsid[] =
2556889Sfenner    "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.61 1999/10/19 15:18:30 itojun Exp $ (LBL)";
2617683Spst#endif
2717683Spst
2817683Spst#include <sys/types.h>
2917683Spst#include <sys/time.h>
3017683Spst
3117683Spst#include <stdio.h>
3217683Spst#include <stdlib.h>
3317683Spst#include <memory.h>
3417683Spst
3517683Spst#include "pcap-int.h"
3617683Spst
3717683Spst#include "gencode.h"
3817683Spst
3917683Spst#include "gnuc.h"
4017683Spst#ifdef HAVE_OS_PROTO_H
4117683Spst#include "os-proto.h"
4217683Spst#endif
4317683Spst
4417683Spst#ifdef BDEBUG
4517683Spstextern int dflag;
4617683Spst#endif
4717683Spst
4817683Spst#define A_ATOM BPF_MEMWORDS
4917683Spst#define X_ATOM (BPF_MEMWORDS+1)
5017683Spst
5117683Spst#define NOP -1
5217683Spst
5317683Spst/*
5417683Spst * This define is used to represent *both* the accumulator and
5517683Spst * x register in use-def computations.
5617683Spst * Currently, the use-def code assumes only one definition per instruction.
5717683Spst */
5817683Spst#define AX_ATOM N_ATOMS
5917683Spst
6017683Spst/*
6117683Spst * A flag to indicate that further optimization is needed.
6217683Spst * Iterative passes are continued until a given pass yields no
6317683Spst * branch movement.
6417683Spst */
6517683Spststatic int done;
6617683Spst
6717683Spst/*
6817683Spst * A block is marked if only if its mark equals the current mark.
6917683Spst * Rather than traverse the code array, marking each item, 'cur_mark' is
7017683Spst * incremented.  This automatically makes each element unmarked.
7117683Spst */
7217683Spststatic int cur_mark;
7317683Spst#define isMarked(p) ((p)->mark == cur_mark)
7417683Spst#define unMarkAll() cur_mark += 1
7517683Spst#define Mark(p) ((p)->mark = cur_mark)
7617683Spst
7717683Spststatic void opt_init(struct block *);
7817683Spststatic void opt_cleanup(void);
7917683Spst
8017683Spststatic void make_marks(struct block *);
8117683Spststatic void mark_code(struct block *);
8217683Spst
8317683Spststatic void intern_blocks(struct block *);
8417683Spst
8517683Spststatic int eq_slist(struct slist *, struct slist *);
8617683Spst
8717683Spststatic void find_levels_r(struct block *);
8817683Spst
8917683Spststatic void find_levels(struct block *);
9017683Spststatic void find_dom(struct block *);
9117683Spststatic void propedom(struct edge *);
9217683Spststatic void find_edom(struct block *);
9317683Spststatic void find_closure(struct block *);
9417683Spststatic int atomuse(struct stmt *);
9517683Spststatic int atomdef(struct stmt *);
9617683Spststatic void compute_local_ud(struct block *);
9717683Spststatic void find_ud(struct block *);
9817683Spststatic void init_val(void);
9917683Spststatic int F(int, int, int);
10017683Spststatic inline void vstore(struct stmt *, int *, int, int);
10117683Spststatic void opt_blk(struct block *, int);
10217683Spststatic int use_conflict(struct block *, struct block *);
10317683Spststatic void opt_j(struct edge *);
10417683Spststatic void or_pullup(struct block *);
10517683Spststatic void and_pullup(struct block *);
10617683Spststatic void opt_blks(struct block *, int);
10717683Spststatic inline void link_inedge(struct edge *, struct block *);
10817683Spststatic void find_inedges(struct block *);
10917683Spststatic void opt_root(struct block **);
11017683Spststatic void opt_loop(struct block *, int);
11117683Spststatic void fold_op(struct stmt *, int, int);
11217683Spststatic inline struct slist *this_op(struct slist *);
11317683Spststatic void opt_not(struct block *);
11417683Spststatic void opt_peep(struct block *);
11517683Spststatic void opt_stmt(struct stmt *, int[], int);
11617683Spststatic void deadstmt(struct stmt *, struct stmt *[]);
11717683Spststatic void opt_deadstores(struct block *);
11817683Spststatic void opt_blk(struct block *, int);
11917683Spststatic int use_conflict(struct block *, struct block *);
12017683Spststatic void opt_j(struct edge *);
12117683Spststatic struct block *fold_edge(struct block *, struct edge *);
12217683Spststatic inline int eq_blk(struct block *, struct block *);
12317683Spststatic int slength(struct slist *);
12417683Spststatic int count_blocks(struct block *);
12517683Spststatic void number_blks_r(struct block *);
12617683Spststatic int count_stmts(struct block *);
12717683Spststatic int convert_code_r(struct block *);
12817683Spst#ifdef BDEBUG
12917683Spststatic void opt_dump(struct block *);
13017683Spst#endif
13117683Spst
13217683Spststatic int n_blocks;
13317683Spststruct block **blocks;
13417683Spststatic int n_edges;
13517683Spststruct edge **edges;
13617683Spst
13717683Spst/*
13817683Spst * A bit vector set representation of the dominators.
13917683Spst * We round up the set size to the next power of two.
14017683Spst */
14117683Spststatic int nodewords;
14217683Spststatic int edgewords;
14317683Spststruct block **levels;
14417683Spstbpf_u_int32 *space;
14517683Spst#define BITS_PER_WORD (8*sizeof(bpf_u_int32))
14617683Spst/*
14717683Spst * True if a is in uset {p}
14817683Spst */
14917683Spst#define SET_MEMBER(p, a) \
15017683Spst((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
15117683Spst
15217683Spst/*
15317683Spst * Add 'a' to uset p.
15417683Spst */
15517683Spst#define SET_INSERT(p, a) \
15617683Spst(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
15717683Spst
15817683Spst/*
15917683Spst * Delete 'a' from uset p.
16017683Spst */
16117683Spst#define SET_DELETE(p, a) \
16217683Spst(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
16317683Spst
16417683Spst/*
16517683Spst * a := a intersect b
16617683Spst */
16717683Spst#define SET_INTERSECT(a, b, n)\
16817683Spst{\
16917683Spst	register bpf_u_int32 *_x = a, *_y = b;\
17017683Spst	register int _n = n;\
17117683Spst	while (--_n >= 0) *_x++ &= *_y++;\
17217683Spst}
17317683Spst
17417683Spst/*
17517683Spst * a := a - b
17617683Spst */
17717683Spst#define SET_SUBTRACT(a, b, n)\
17817683Spst{\
17917683Spst	register bpf_u_int32 *_x = a, *_y = b;\
18017683Spst	register int _n = n;\
18117683Spst	while (--_n >= 0) *_x++ &=~ *_y++;\
18217683Spst}
18317683Spst
18417683Spst/*
18517683Spst * a := a union b
18617683Spst */
18717683Spst#define SET_UNION(a, b, n)\
18817683Spst{\
18917683Spst	register bpf_u_int32 *_x = a, *_y = b;\
19017683Spst	register int _n = n;\
19117683Spst	while (--_n >= 0) *_x++ |= *_y++;\
19217683Spst}
19317683Spst
19417683Spststatic uset all_dom_sets;
19517683Spststatic uset all_closure_sets;
19617683Spststatic uset all_edge_sets;
19717683Spst
19817683Spst#ifndef MAX
19917683Spst#define MAX(a,b) ((a)>(b)?(a):(b))
20017683Spst#endif
20117683Spst
20217683Spststatic void
20317683Spstfind_levels_r(b)
20417683Spst	struct block *b;
20517683Spst{
20617683Spst	int level;
20717683Spst
20817683Spst	if (isMarked(b))
20917683Spst		return;
21017683Spst
21117683Spst	Mark(b);
21217683Spst	b->link = 0;
21317683Spst
21417683Spst	if (JT(b)) {
21517683Spst		find_levels_r(JT(b));
21617683Spst		find_levels_r(JF(b));
21717683Spst		level = MAX(JT(b)->level, JF(b)->level) + 1;
21817683Spst	} else
21917683Spst		level = 0;
22017683Spst	b->level = level;
22117683Spst	b->link = levels[level];
22217683Spst	levels[level] = b;
22317683Spst}
22417683Spst
22517683Spst/*
22617683Spst * Level graph.  The levels go from 0 at the leaves to
22717683Spst * N_LEVELS at the root.  The levels[] array points to the
22817683Spst * first node of the level list, whose elements are linked
22917683Spst * with the 'link' field of the struct block.
23017683Spst */
23117683Spststatic void
23217683Spstfind_levels(root)
23317683Spst	struct block *root;
23417683Spst{
23517683Spst	memset((char *)levels, 0, n_blocks * sizeof(*levels));
23617683Spst	unMarkAll();
23717683Spst	find_levels_r(root);
23817683Spst}
23917683Spst
24017683Spst/*
24117683Spst * Find dominator relationships.
24217683Spst * Assumes graph has been leveled.
24317683Spst */
24417683Spststatic void
24517683Spstfind_dom(root)
24617683Spst	struct block *root;
24717683Spst{
24817683Spst	int i;
24917683Spst	struct block *b;
25017683Spst	bpf_u_int32 *x;
25117683Spst
25217683Spst	/*
25317683Spst	 * Initialize sets to contain all nodes.
25417683Spst	 */
25517683Spst	x = all_dom_sets;
25617683Spst	i = n_blocks * nodewords;
25717683Spst	while (--i >= 0)
25817683Spst		*x++ = ~0;
25917683Spst	/* Root starts off empty. */
26017683Spst	for (i = nodewords; --i >= 0;)
26117683Spst		root->dom[i] = 0;
26217683Spst
26317683Spst	/* root->level is the highest level no found. */
26417683Spst	for (i = root->level; i >= 0; --i) {
26517683Spst		for (b = levels[i]; b; b = b->link) {
26617683Spst			SET_INSERT(b->dom, b->id);
26717683Spst			if (JT(b) == 0)
26817683Spst				continue;
26917683Spst			SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
27017683Spst			SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
27117683Spst		}
27217683Spst	}
27317683Spst}
27417683Spst
27517683Spststatic void
27617683Spstpropedom(ep)
27717683Spst	struct edge *ep;
27817683Spst{
27917683Spst	SET_INSERT(ep->edom, ep->id);
28017683Spst	if (ep->succ) {
28117683Spst		SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
28217683Spst		SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
28317683Spst	}
28417683Spst}
28517683Spst
28617683Spst/*
28717683Spst * Compute edge dominators.
28817683Spst * Assumes graph has been leveled and predecessors established.
28917683Spst */
29017683Spststatic void
29117683Spstfind_edom(root)
29217683Spst	struct block *root;
29317683Spst{
29417683Spst	int i;
29517683Spst	uset x;
29617683Spst	struct block *b;
29717683Spst
29817683Spst	x = all_edge_sets;
29917683Spst	for (i = n_edges * edgewords; --i >= 0; )
30017683Spst		x[i] = ~0;
30117683Spst
30217683Spst	/* root->level is the highest level no found. */
30317683Spst	memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
30417683Spst	memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
30517683Spst	for (i = root->level; i >= 0; --i) {
30617683Spst		for (b = levels[i]; b != 0; b = b->link) {
30717683Spst			propedom(&b->et);
30817683Spst			propedom(&b->ef);
30917683Spst		}
31017683Spst	}
31117683Spst}
31217683Spst
31317683Spst/*
31417683Spst * Find the backwards transitive closure of the flow graph.  These sets
31517683Spst * are backwards in the sense that we find the set of nodes that reach
31617683Spst * a given node, not the set of nodes that can be reached by a node.
31717683Spst *
31817683Spst * Assumes graph has been leveled.
31917683Spst */
32017683Spststatic void
32117683Spstfind_closure(root)
32217683Spst	struct block *root;
32317683Spst{
32417683Spst	int i;
32517683Spst	struct block *b;
32617683Spst
32717683Spst	/*
32817683Spst	 * Initialize sets to contain no nodes.
32917683Spst	 */
33017683Spst	memset((char *)all_closure_sets, 0,
33117683Spst	      n_blocks * nodewords * sizeof(*all_closure_sets));
33217683Spst
33317683Spst	/* root->level is the highest level no found. */
33417683Spst	for (i = root->level; i >= 0; --i) {
33517683Spst		for (b = levels[i]; b; b = b->link) {
33617683Spst			SET_INSERT(b->closure, b->id);
33717683Spst			if (JT(b) == 0)
33817683Spst				continue;
33917683Spst			SET_UNION(JT(b)->closure, b->closure, nodewords);
34017683Spst			SET_UNION(JF(b)->closure, b->closure, nodewords);
34117683Spst		}
34217683Spst	}
34317683Spst}
34417683Spst
34517683Spst/*
34617683Spst * Return the register number that is used by s.  If A and X are both
34717683Spst * used, return AX_ATOM.  If no register is used, return -1.
34817683Spst *
34917683Spst * The implementation should probably change to an array access.
35017683Spst */
35117683Spststatic int
35217683Spstatomuse(s)
35317683Spst	struct stmt *s;
35417683Spst{
35517683Spst	register int c = s->code;
35617683Spst
35717683Spst	if (c == NOP)
35817683Spst		return -1;
35917683Spst
36017683Spst	switch (BPF_CLASS(c)) {
36117683Spst
36217683Spst	case BPF_RET:
36317683Spst		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
36417683Spst			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
36517683Spst
36617683Spst	case BPF_LD:
36717683Spst	case BPF_LDX:
36817683Spst		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
36917683Spst			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
37017683Spst
37117683Spst	case BPF_ST:
37217683Spst		return A_ATOM;
37317683Spst
37417683Spst	case BPF_STX:
37517683Spst		return X_ATOM;
37617683Spst
37717683Spst	case BPF_JMP:
37817683Spst	case BPF_ALU:
37917683Spst		if (BPF_SRC(c) == BPF_X)
38017683Spst			return AX_ATOM;
38117683Spst		return A_ATOM;
38217683Spst
38317683Spst	case BPF_MISC:
38417683Spst		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
38517683Spst	}
38617683Spst	abort();
38717683Spst	/* NOTREACHED */
38817683Spst}
38917683Spst
39017683Spst/*
39117683Spst * Return the register number that is defined by 's'.  We assume that
39217683Spst * a single stmt cannot define more than one register.  If no register
39317683Spst * is defined, return -1.
39417683Spst *
39517683Spst * The implementation should probably change to an array access.
39617683Spst */
39717683Spststatic int
39817683Spstatomdef(s)
39917683Spst	struct stmt *s;
40017683Spst{
40117683Spst	if (s->code == NOP)
40217683Spst		return -1;
40317683Spst
40417683Spst	switch (BPF_CLASS(s->code)) {
40517683Spst
40617683Spst	case BPF_LD:
40717683Spst	case BPF_ALU:
40817683Spst		return A_ATOM;
40917683Spst
41017683Spst	case BPF_LDX:
41117683Spst		return X_ATOM;
41217683Spst
41317683Spst	case BPF_ST:
41417683Spst	case BPF_STX:
41517683Spst		return s->k;
41617683Spst
41717683Spst	case BPF_MISC:
41817683Spst		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
41917683Spst	}
42017683Spst	return -1;
42117683Spst}
42217683Spst
42317683Spststatic void
42417683Spstcompute_local_ud(b)
42517683Spst	struct block *b;
42617683Spst{
42717683Spst	struct slist *s;
42817683Spst	atomset def = 0, use = 0, kill = 0;
42917683Spst	int atom;
43017683Spst
43117683Spst	for (s = b->stmts; s; s = s->next) {
43217683Spst		if (s->s.code == NOP)
43317683Spst			continue;
43417683Spst		atom = atomuse(&s->s);
43517683Spst		if (atom >= 0) {
43617683Spst			if (atom == AX_ATOM) {
43717683Spst				if (!ATOMELEM(def, X_ATOM))
43817683Spst					use |= ATOMMASK(X_ATOM);
43917683Spst				if (!ATOMELEM(def, A_ATOM))
44017683Spst					use |= ATOMMASK(A_ATOM);
44117683Spst			}
44217683Spst			else if (atom < N_ATOMS) {
44317683Spst				if (!ATOMELEM(def, atom))
44417683Spst					use |= ATOMMASK(atom);
44517683Spst			}
44617683Spst			else
44717683Spst				abort();
44817683Spst		}
44917683Spst		atom = atomdef(&s->s);
45017683Spst		if (atom >= 0) {
45117683Spst			if (!ATOMELEM(use, atom))
45217683Spst				kill |= ATOMMASK(atom);
45317683Spst			def |= ATOMMASK(atom);
45417683Spst		}
45517683Spst	}
45617683Spst	if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
45717683Spst		use |= ATOMMASK(A_ATOM);
45817683Spst
45917683Spst	b->def = def;
46017683Spst	b->kill = kill;
46117683Spst	b->in_use = use;
46217683Spst}
46317683Spst
46417683Spst/*
46517683Spst * Assume graph is already leveled.
46617683Spst */
46717683Spststatic void
46817683Spstfind_ud(root)
46917683Spst	struct block *root;
47017683Spst{
47117683Spst	int i, maxlevel;
47217683Spst	struct block *p;
47317683Spst
47417683Spst	/*
47517683Spst	 * root->level is the highest level no found;
47617683Spst	 * count down from there.
47717683Spst	 */
47817683Spst	maxlevel = root->level;
47917683Spst	for (i = maxlevel; i >= 0; --i)
48017683Spst		for (p = levels[i]; p; p = p->link) {
48117683Spst			compute_local_ud(p);
48217683Spst			p->out_use = 0;
48317683Spst		}
48417683Spst
48517683Spst	for (i = 1; i <= maxlevel; ++i) {
48617683Spst		for (p = levels[i]; p; p = p->link) {
48717683Spst			p->out_use |= JT(p)->in_use | JF(p)->in_use;
48817683Spst			p->in_use |= p->out_use &~ p->kill;
48917683Spst		}
49017683Spst	}
49117683Spst}
49217683Spst
49317683Spst/*
49417683Spst * These data structures are used in a Cocke and Shwarz style
49517683Spst * value numbering scheme.  Since the flowgraph is acyclic,
49617683Spst * exit values can be propagated from a node's predecessors
49717683Spst * provided it is uniquely defined.
49817683Spst */
49917683Spststruct valnode {
50017683Spst	int code;
50117683Spst	int v0, v1;
50217683Spst	int val;
50317683Spst	struct valnode *next;
50417683Spst};
50517683Spst
50617683Spst#define MODULUS 213
50717683Spststatic struct valnode *hashtbl[MODULUS];
50817683Spststatic int curval;
50917683Spststatic int maxval;
51017683Spst
51117683Spst/* Integer constants mapped with the load immediate opcode. */
51217683Spst#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
51317683Spst
51417683Spststruct vmapinfo {
51517683Spst	int is_const;
51617683Spst	bpf_int32 const_val;
51717683Spst};
51817683Spst
51917683Spststruct vmapinfo *vmap;
52017683Spststruct valnode *vnode_base;
52117683Spststruct valnode *next_vnode;
52217683Spst
52317683Spststatic void
52417683Spstinit_val()
52517683Spst{
52617683Spst	curval = 0;
52717683Spst	next_vnode = vnode_base;
52817683Spst	memset((char *)vmap, 0, maxval * sizeof(*vmap));
52917683Spst	memset((char *)hashtbl, 0, sizeof hashtbl);
53017683Spst}
53117683Spst
53217683Spst/* Because we really don't have an IR, this stuff is a little messy. */
53317683Spststatic int
53417683SpstF(code, v0, v1)
53517683Spst	int code;
53617683Spst	int v0, v1;
53717683Spst{
53817683Spst	u_int hash;
53917683Spst	int val;
54017683Spst	struct valnode *p;
54117683Spst
54217683Spst	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
54317683Spst	hash %= MODULUS;
54417683Spst
54517683Spst	for (p = hashtbl[hash]; p; p = p->next)
54617683Spst		if (p->code == code && p->v0 == v0 && p->v1 == v1)
54717683Spst			return p->val;
54817683Spst
54917683Spst	val = ++curval;
55017683Spst	if (BPF_MODE(code) == BPF_IMM &&
55117683Spst	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
55217683Spst		vmap[val].const_val = v0;
55317683Spst		vmap[val].is_const = 1;
55417683Spst	}
55517683Spst	p = next_vnode++;
55617683Spst	p->val = val;
55717683Spst	p->code = code;
55817683Spst	p->v0 = v0;
55917683Spst	p->v1 = v1;
56017683Spst	p->next = hashtbl[hash];
56117683Spst	hashtbl[hash] = p;
56217683Spst
56317683Spst	return val;
56417683Spst}
56517683Spst
56617683Spststatic inline void
56717683Spstvstore(s, valp, newval, alter)
56817683Spst	struct stmt *s;
56917683Spst	int *valp;
57017683Spst	int newval;
57117683Spst	int alter;
57217683Spst{
57317683Spst	if (alter && *valp == newval)
57417683Spst		s->code = NOP;
57517683Spst	else
57617683Spst		*valp = newval;
57717683Spst}
57817683Spst
57917683Spststatic void
58017683Spstfold_op(s, v0, v1)
58117683Spst	struct stmt *s;
58217683Spst	int v0, v1;
58317683Spst{
58417683Spst	bpf_int32 a, b;
58517683Spst
58617683Spst	a = vmap[v0].const_val;
58717683Spst	b = vmap[v1].const_val;
58817683Spst
58917683Spst	switch (BPF_OP(s->code)) {
59017683Spst	case BPF_ADD:
59117683Spst		a += b;
59217683Spst		break;
59317683Spst
59417683Spst	case BPF_SUB:
59517683Spst		a -= b;
59617683Spst		break;
59717683Spst
59817683Spst	case BPF_MUL:
59917683Spst		a *= b;
60017683Spst		break;
60117683Spst
60217683Spst	case BPF_DIV:
60317683Spst		if (b == 0)
60417683Spst			bpf_error("division by zero");
60517683Spst		a /= b;
60617683Spst		break;
60717683Spst
60817683Spst	case BPF_AND:
60917683Spst		a &= b;
61017683Spst		break;
61117683Spst
61217683Spst	case BPF_OR:
61317683Spst		a |= b;
61417683Spst		break;
61517683Spst
61617683Spst	case BPF_LSH:
61717683Spst		a <<= b;
61817683Spst		break;
61917683Spst
62017683Spst	case BPF_RSH:
62117683Spst		a >>= b;
62217683Spst		break;
62317683Spst
62417683Spst	case BPF_NEG:
62517683Spst		a = -a;
62617683Spst		break;
62717683Spst
62817683Spst	default:
62917683Spst		abort();
63017683Spst	}
63117683Spst	s->k = a;
63217683Spst	s->code = BPF_LD|BPF_IMM;
63317683Spst	done = 0;
63417683Spst}
63517683Spst
63617683Spststatic inline struct slist *
63717683Spstthis_op(s)
63817683Spst	struct slist *s;
63917683Spst{
64017683Spst	while (s != 0 && s->s.code == NOP)
64117683Spst		s = s->next;
64217683Spst	return s;
64317683Spst}
64417683Spst
64517683Spststatic void
64617683Spstopt_not(b)
64717683Spst	struct block *b;
64817683Spst{
64917683Spst	struct block *tmp = JT(b);
65017683Spst
65117683Spst	JT(b) = JF(b);
65217683Spst	JF(b) = tmp;
65317683Spst}
65417683Spst
65517683Spststatic void
65617683Spstopt_peep(b)
65717683Spst	struct block *b;
65817683Spst{
65917683Spst	struct slist *s;
66017683Spst	struct slist *next, *last;
66117683Spst	int val;
66217683Spst
66317683Spst	s = b->stmts;
66417683Spst	if (s == 0)
66517683Spst		return;
66617683Spst
66717683Spst	last = s;
66817683Spst	while (1) {
66917683Spst		s = this_op(s);
67017683Spst		if (s == 0)
67117683Spst			break;
67217683Spst		next = this_op(s->next);
67317683Spst		if (next == 0)
67417683Spst			break;
67517683Spst		last = next;
67617683Spst
67717683Spst		/*
67817683Spst		 * st  M[k]	-->	st  M[k]
67917683Spst		 * ldx M[k]		tax
68017683Spst		 */
68117683Spst		if (s->s.code == BPF_ST &&
68217683Spst		    next->s.code == (BPF_LDX|BPF_MEM) &&
68317683Spst		    s->s.k == next->s.k) {
68417683Spst			done = 0;
68517683Spst			next->s.code = BPF_MISC|BPF_TAX;
68617683Spst		}
68717683Spst		/*
68817683Spst		 * ld  #k	-->	ldx  #k
68917683Spst		 * tax			txa
69017683Spst		 */
69117683Spst		if (s->s.code == (BPF_LD|BPF_IMM) &&
69217683Spst		    next->s.code == (BPF_MISC|BPF_TAX)) {
69317683Spst			s->s.code = BPF_LDX|BPF_IMM;
69417683Spst			next->s.code = BPF_MISC|BPF_TXA;
69517683Spst			done = 0;
69617683Spst		}
69717683Spst		/*
69817683Spst		 * This is an ugly special case, but it happens
69917683Spst		 * when you say tcp[k] or udp[k] where k is a constant.
70017683Spst		 */
70117683Spst		if (s->s.code == (BPF_LD|BPF_IMM)) {
70217683Spst			struct slist *add, *tax, *ild;
70317683Spst
70417683Spst			/*
70517683Spst			 * Check that X isn't used on exit from this
70617683Spst			 * block (which the optimizer might cause).
70717683Spst			 * We know the code generator won't generate
70817683Spst			 * any local dependencies.
70917683Spst			 */
71017683Spst			if (ATOMELEM(b->out_use, X_ATOM))
71117683Spst				break;
71217683Spst
71317683Spst			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
71417683Spst				add = next;
71517683Spst			else
71617683Spst				add = this_op(next->next);
71717683Spst			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
71817683Spst				break;
71917683Spst
72017683Spst			tax = this_op(add->next);
72117683Spst			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
72217683Spst				break;
72317683Spst
72417683Spst			ild = this_op(tax->next);
72517683Spst			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
72617683Spst			    BPF_MODE(ild->s.code) != BPF_IND)
72717683Spst				break;
72817683Spst			/*
72917683Spst			 * XXX We need to check that X is not
73017683Spst			 * subsequently used.  We know we can eliminate the
73117683Spst			 * accumulator modifications since it is defined
73217683Spst			 * by the last stmt of this sequence.
73317683Spst			 *
73417683Spst			 * We want to turn this sequence:
73517683Spst			 *
73617683Spst			 * (004) ldi     #0x2		{s}
73717683Spst			 * (005) ldxms   [14]		{next}  -- optional
73817683Spst			 * (006) addx			{add}
73917683Spst			 * (007) tax			{tax}
74017683Spst			 * (008) ild     [x+0]		{ild}
74117683Spst			 *
74217683Spst			 * into this sequence:
74317683Spst			 *
74417683Spst			 * (004) nop
74517683Spst			 * (005) ldxms   [14]
74617683Spst			 * (006) nop
74717683Spst			 * (007) nop
74817683Spst			 * (008) ild     [x+2]
74917683Spst			 *
75017683Spst			 */
75117683Spst			ild->s.k += s->s.k;
75217683Spst			s->s.code = NOP;
75317683Spst			add->s.code = NOP;
75417683Spst			tax->s.code = NOP;
75517683Spst			done = 0;
75617683Spst		}
75717683Spst		s = next;
75817683Spst	}
75917683Spst	/*
76017683Spst	 * If we have a subtract to do a comparison, and the X register
76117683Spst	 * is a known constant, we can merge this value into the
76217683Spst	 * comparison.
76317683Spst	 */
76417683Spst	if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
76517683Spst	    !ATOMELEM(b->out_use, A_ATOM)) {
76617683Spst		val = b->val[X_ATOM];
76717683Spst		if (vmap[val].is_const) {
76817683Spst			int op;
76917683Spst
77017683Spst			b->s.k += vmap[val].const_val;
77117683Spst			op = BPF_OP(b->s.code);
77217683Spst			if (op == BPF_JGT || op == BPF_JGE) {
77317683Spst				struct block *t = JT(b);
77417683Spst				JT(b) = JF(b);
77517683Spst				JF(b) = t;
77617683Spst				b->s.k += 0x80000000;
77717683Spst			}
77817683Spst			last->s.code = NOP;
77917683Spst			done = 0;
78017683Spst		} else if (b->s.k == 0) {
78117683Spst			/*
78217683Spst			 * sub x  ->	nop
78317683Spst			 * j  #0	j  x
78417683Spst			 */
78517683Spst			last->s.code = NOP;
78617683Spst			b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
78717683Spst				BPF_X;
78817683Spst			done = 0;
78917683Spst		}
79017683Spst	}
79117683Spst	/*
79217683Spst	 * Likewise, a constant subtract can be simplified.
79317683Spst	 */
79417683Spst	else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
79517683Spst		 !ATOMELEM(b->out_use, A_ATOM)) {
79617683Spst		int op;
79717683Spst
79817683Spst		b->s.k += last->s.k;
79917683Spst		last->s.code = NOP;
80017683Spst		op = BPF_OP(b->s.code);
80117683Spst		if (op == BPF_JGT || op == BPF_JGE) {
80217683Spst			struct block *t = JT(b);
80317683Spst			JT(b) = JF(b);
80417683Spst			JF(b) = t;
80517683Spst			b->s.k += 0x80000000;
80617683Spst		}
80717683Spst		done = 0;
80817683Spst	}
80917683Spst	/*
81017683Spst	 * and #k	nop
81117683Spst	 * jeq #0  ->	jset #k
81217683Spst	 */
81317683Spst	if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
81417683Spst	    !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
81517683Spst		b->s.k = last->s.k;
81617683Spst		b->s.code = BPF_JMP|BPF_K|BPF_JSET;
81717683Spst		last->s.code = NOP;
81817683Spst		done = 0;
81917683Spst		opt_not(b);
82017683Spst	}
82117683Spst	/*
82217683Spst	 * If the accumulator is a known constant, we can compute the
82317683Spst	 * comparison result.
82417683Spst	 */
82517683Spst	val = b->val[A_ATOM];
82617683Spst	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
82717683Spst		bpf_int32 v = vmap[val].const_val;
82817683Spst		switch (BPF_OP(b->s.code)) {
82917683Spst
83017683Spst		case BPF_JEQ:
83117683Spst			v = v == b->s.k;
83217683Spst			break;
83317683Spst
83417683Spst		case BPF_JGT:
83517683Spst			v = (unsigned)v > b->s.k;
83617683Spst			break;
83717683Spst
83817683Spst		case BPF_JGE:
83917683Spst			v = (unsigned)v >= b->s.k;
84017683Spst			break;
84117683Spst
84217683Spst		case BPF_JSET:
84317683Spst			v &= b->s.k;
84417683Spst			break;
84517683Spst
84617683Spst		default:
84717683Spst			abort();
84817683Spst		}
84917683Spst		if (JF(b) != JT(b))
85017683Spst			done = 0;
85117683Spst		if (v)
85217683Spst			JF(b) = JT(b);
85317683Spst		else
85417683Spst			JT(b) = JF(b);
85517683Spst	}
85617683Spst}
85717683Spst
85817683Spst/*
85917683Spst * Compute the symbolic value of expression of 's', and update
86017683Spst * anything it defines in the value table 'val'.  If 'alter' is true,
86117683Spst * do various optimizations.  This code would be cleaner if symbolic
86217683Spst * evaluation and code transformations weren't folded together.
86317683Spst */
86417683Spststatic void
86517683Spstopt_stmt(s, val, alter)
86617683Spst	struct stmt *s;
86717683Spst	int val[];
86817683Spst	int alter;
86917683Spst{
87017683Spst	int op;
87117683Spst	int v;
87217683Spst
87317683Spst	switch (s->code) {
87417683Spst
87517683Spst	case BPF_LD|BPF_ABS|BPF_W:
87617683Spst	case BPF_LD|BPF_ABS|BPF_H:
87717683Spst	case BPF_LD|BPF_ABS|BPF_B:
87817683Spst		v = F(s->code, s->k, 0L);
87917683Spst		vstore(s, &val[A_ATOM], v, alter);
88017683Spst		break;
88117683Spst
88217683Spst	case BPF_LD|BPF_IND|BPF_W:
88317683Spst	case BPF_LD|BPF_IND|BPF_H:
88417683Spst	case BPF_LD|BPF_IND|BPF_B:
88517683Spst		v = val[X_ATOM];
88617683Spst		if (alter && vmap[v].is_const) {
88717683Spst			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
88817683Spst			s->k += vmap[v].const_val;
88917683Spst			v = F(s->code, s->k, 0L);
89017683Spst			done = 0;
89117683Spst		}
89217683Spst		else
89317683Spst			v = F(s->code, s->k, v);
89417683Spst		vstore(s, &val[A_ATOM], v, alter);
89517683Spst		break;
89617683Spst
89717683Spst	case BPF_LD|BPF_LEN:
89817683Spst		v = F(s->code, 0L, 0L);
89917683Spst		vstore(s, &val[A_ATOM], v, alter);
90017683Spst		break;
90117683Spst
90217683Spst	case BPF_LD|BPF_IMM:
90317683Spst		v = K(s->k);
90417683Spst		vstore(s, &val[A_ATOM], v, alter);
90517683Spst		break;
90617683Spst
90717683Spst	case BPF_LDX|BPF_IMM:
90817683Spst		v = K(s->k);
90917683Spst		vstore(s, &val[X_ATOM], v, alter);
91017683Spst		break;
91117683Spst
91217683Spst	case BPF_LDX|BPF_MSH|BPF_B:
91317683Spst		v = F(s->code, s->k, 0L);
91417683Spst		vstore(s, &val[X_ATOM], v, alter);
91517683Spst		break;
91617683Spst
91717683Spst	case BPF_ALU|BPF_NEG:
91817683Spst		if (alter && vmap[val[A_ATOM]].is_const) {
91917683Spst			s->code = BPF_LD|BPF_IMM;
92017683Spst			s->k = -vmap[val[A_ATOM]].const_val;
92117683Spst			val[A_ATOM] = K(s->k);
92217683Spst		}
92317683Spst		else
92417683Spst			val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
92517683Spst		break;
92617683Spst
92717683Spst	case BPF_ALU|BPF_ADD|BPF_K:
92817683Spst	case BPF_ALU|BPF_SUB|BPF_K:
92917683Spst	case BPF_ALU|BPF_MUL|BPF_K:
93017683Spst	case BPF_ALU|BPF_DIV|BPF_K:
93117683Spst	case BPF_ALU|BPF_AND|BPF_K:
93217683Spst	case BPF_ALU|BPF_OR|BPF_K:
93317683Spst	case BPF_ALU|BPF_LSH|BPF_K:
93417683Spst	case BPF_ALU|BPF_RSH|BPF_K:
93517683Spst		op = BPF_OP(s->code);
93617683Spst		if (alter) {
93717683Spst			if (s->k == 0) {
93817683Spst				if (op == BPF_ADD || op == BPF_SUB ||
93917683Spst				    op == BPF_LSH || op == BPF_RSH ||
94017683Spst				    op == BPF_OR) {
94117683Spst					s->code = NOP;
94217683Spst					break;
94317683Spst				}
94417683Spst				if (op == BPF_MUL || op == BPF_AND) {
94517683Spst					s->code = BPF_LD|BPF_IMM;
94617683Spst					val[A_ATOM] = K(s->k);
94717683Spst					break;
94817683Spst				}
94917683Spst			}
95017683Spst			if (vmap[val[A_ATOM]].is_const) {
95117683Spst				fold_op(s, val[A_ATOM], K(s->k));
95217683Spst				val[A_ATOM] = K(s->k);
95317683Spst				break;
95417683Spst			}
95517683Spst		}
95617683Spst		val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
95717683Spst		break;
95817683Spst
95917683Spst	case BPF_ALU|BPF_ADD|BPF_X:
96017683Spst	case BPF_ALU|BPF_SUB|BPF_X:
96117683Spst	case BPF_ALU|BPF_MUL|BPF_X:
96217683Spst	case BPF_ALU|BPF_DIV|BPF_X:
96317683Spst	case BPF_ALU|BPF_AND|BPF_X:
96417683Spst	case BPF_ALU|BPF_OR|BPF_X:
96517683Spst	case BPF_ALU|BPF_LSH|BPF_X:
96617683Spst	case BPF_ALU|BPF_RSH|BPF_X:
96717683Spst		op = BPF_OP(s->code);
96817683Spst		if (alter && vmap[val[X_ATOM]].is_const) {
96917683Spst			if (vmap[val[A_ATOM]].is_const) {
97017683Spst				fold_op(s, val[A_ATOM], val[X_ATOM]);
97117683Spst				val[A_ATOM] = K(s->k);
97217683Spst			}
97317683Spst			else {
97417683Spst				s->code = BPF_ALU|BPF_K|op;
97517683Spst				s->k = vmap[val[X_ATOM]].const_val;
97617683Spst				done = 0;
97717683Spst				val[A_ATOM] =
97817683Spst					F(s->code, val[A_ATOM], K(s->k));
97917683Spst			}
98017683Spst			break;
98117683Spst		}
98217683Spst		/*
98317683Spst		 * Check if we're doing something to an accumulator
98417683Spst		 * that is 0, and simplify.  This may not seem like
98517683Spst		 * much of a simplification but it could open up further
98617683Spst		 * optimizations.
98717683Spst		 * XXX We could also check for mul by 1, and -1, etc.
98817683Spst		 */
98917683Spst		if (alter && vmap[val[A_ATOM]].is_const
99017683Spst		    && vmap[val[A_ATOM]].const_val == 0) {
99117683Spst			if (op == BPF_ADD || op == BPF_OR ||
99217683Spst			    op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
99317683Spst				s->code = BPF_MISC|BPF_TXA;
99417683Spst				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
99517683Spst				break;
99617683Spst			}
99717683Spst			else if (op == BPF_MUL || op == BPF_DIV ||
99817683Spst				 op == BPF_AND) {
99917683Spst				s->code = BPF_LD|BPF_IMM;
100017683Spst				s->k = 0;
100117683Spst				vstore(s, &val[A_ATOM], K(s->k), alter);
100217683Spst				break;
100317683Spst			}
100417683Spst			else if (op == BPF_NEG) {
100517683Spst				s->code = NOP;
100617683Spst				break;
100717683Spst			}
100817683Spst		}
100917683Spst		val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
101017683Spst		break;
101117683Spst
101217683Spst	case BPF_MISC|BPF_TXA:
101317683Spst		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
101417683Spst		break;
101517683Spst
101617683Spst	case BPF_LD|BPF_MEM:
101717683Spst		v = val[s->k];
101817683Spst		if (alter && vmap[v].is_const) {
101917683Spst			s->code = BPF_LD|BPF_IMM;
102017683Spst			s->k = vmap[v].const_val;
102117683Spst			done = 0;
102217683Spst		}
102317683Spst		vstore(s, &val[A_ATOM], v, alter);
102417683Spst		break;
102517683Spst
102617683Spst	case BPF_MISC|BPF_TAX:
102717683Spst		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
102817683Spst		break;
102917683Spst
103017683Spst	case BPF_LDX|BPF_MEM:
103117683Spst		v = val[s->k];
103217683Spst		if (alter && vmap[v].is_const) {
103317683Spst			s->code = BPF_LDX|BPF_IMM;
103417683Spst			s->k = vmap[v].const_val;
103517683Spst			done = 0;
103617683Spst		}
103717683Spst		vstore(s, &val[X_ATOM], v, alter);
103817683Spst		break;
103917683Spst
104017683Spst	case BPF_ST:
104117683Spst		vstore(s, &val[s->k], val[A_ATOM], alter);
104217683Spst		break;
104317683Spst
104417683Spst	case BPF_STX:
104517683Spst		vstore(s, &val[s->k], val[X_ATOM], alter);
104617683Spst		break;
104717683Spst	}
104817683Spst}
104917683Spst
105017683Spststatic void
105117683Spstdeadstmt(s, last)
105217683Spst	register struct stmt *s;
105317683Spst	register struct stmt *last[];
105417683Spst{
105517683Spst	register int atom;
105617683Spst
105717683Spst	atom = atomuse(s);
105817683Spst	if (atom >= 0) {
105917683Spst		if (atom == AX_ATOM) {
106017683Spst			last[X_ATOM] = 0;
106117683Spst			last[A_ATOM] = 0;
106217683Spst		}
106317683Spst		else
106417683Spst			last[atom] = 0;
106517683Spst	}
106617683Spst	atom = atomdef(s);
106717683Spst	if (atom >= 0) {
106817683Spst		if (last[atom]) {
106917683Spst			done = 0;
107017683Spst			last[atom]->code = NOP;
107117683Spst		}
107217683Spst		last[atom] = s;
107317683Spst	}
107417683Spst}
107517683Spst
107617683Spststatic void
107717683Spstopt_deadstores(b)
107817683Spst	register struct block *b;
107917683Spst{
108017683Spst	register struct slist *s;
108117683Spst	register int atom;
108217683Spst	struct stmt *last[N_ATOMS];
108317683Spst
108417683Spst	memset((char *)last, 0, sizeof last);
108517683Spst
108617683Spst	for (s = b->stmts; s != 0; s = s->next)
108717683Spst		deadstmt(&s->s, last);
108817683Spst	deadstmt(&b->s, last);
108917683Spst
109017683Spst	for (atom = 0; atom < N_ATOMS; ++atom)
109117683Spst		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
109217683Spst			last[atom]->code = NOP;
109317683Spst			done = 0;
109417683Spst		}
109517683Spst}
109617683Spst
109717683Spststatic void
109817683Spstopt_blk(b, do_stmts)
109917683Spst	struct block *b;
110017683Spst	int do_stmts;
110117683Spst{
110217683Spst	struct slist *s;
110317683Spst	struct edge *p;
110417683Spst	int i;
110517683Spst	bpf_int32 aval;
110617683Spst
110756889Sfenner#if 0
110856889Sfenner	for (s = b->stmts; s && s->next; s = s->next)
110956889Sfenner		if (BPF_CLASS(s->s.code) == BPF_JMP) {
111056889Sfenner			do_stmts = 0;
111156889Sfenner			break;
111256889Sfenner		}
111356889Sfenner#endif
111456889Sfenner
111517683Spst	/*
111617683Spst	 * Initialize the atom values.
111717683Spst	 * If we have no predecessors, everything is undefined.
111817683Spst	 * Otherwise, we inherent our values from our predecessors.
111917683Spst	 * If any register has an ambiguous value (i.e. control paths are
112017683Spst	 * merging) give it the undefined value of 0.
112117683Spst	 */
112217683Spst	p = b->in_edges;
112317683Spst	if (p == 0)
112417683Spst		memset((char *)b->val, 0, sizeof(b->val));
112517683Spst	else {
112617683Spst		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
112717683Spst		while ((p = p->next) != NULL) {
112817683Spst			for (i = 0; i < N_ATOMS; ++i)
112917683Spst				if (b->val[i] != p->pred->val[i])
113017683Spst					b->val[i] = 0;
113117683Spst		}
113217683Spst	}
113317683Spst	aval = b->val[A_ATOM];
113417683Spst	for (s = b->stmts; s; s = s->next)
113517683Spst		opt_stmt(&s->s, b->val, do_stmts);
113617683Spst
113717683Spst	/*
113817683Spst	 * This is a special case: if we don't use anything from this
113917683Spst	 * block, and we load the accumulator with value that is
114017683Spst	 * already there, or if this block is a return,
114117683Spst	 * eliminate all the statements.
114217683Spst	 */
114317683Spst	if (do_stmts &&
114417683Spst	    ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
114517683Spst	     BPF_CLASS(b->s.code) == BPF_RET)) {
114617683Spst		if (b->stmts != 0) {
114717683Spst			b->stmts = 0;
114817683Spst			done = 0;
114917683Spst		}
115017683Spst	} else {
115117683Spst		opt_peep(b);
115217683Spst		opt_deadstores(b);
115317683Spst	}
115417683Spst	/*
115517683Spst	 * Set up values for branch optimizer.
115617683Spst	 */
115717683Spst	if (BPF_SRC(b->s.code) == BPF_K)
115817683Spst		b->oval = K(b->s.k);
115917683Spst	else
116017683Spst		b->oval = b->val[X_ATOM];
116117683Spst	b->et.code = b->s.code;
116217683Spst	b->ef.code = -b->s.code;
116317683Spst}
116417683Spst
116517683Spst/*
116617683Spst * Return true if any register that is used on exit from 'succ', has
116717683Spst * an exit value that is different from the corresponding exit value
116817683Spst * from 'b'.
116917683Spst */
117017683Spststatic int
117117683Spstuse_conflict(b, succ)
117217683Spst	struct block *b, *succ;
117317683Spst{
117417683Spst	int atom;
117517683Spst	atomset use = succ->out_use;
117617683Spst
117717683Spst	if (use == 0)
117817683Spst		return 0;
117917683Spst
118017683Spst	for (atom = 0; atom < N_ATOMS; ++atom)
118117683Spst		if (ATOMELEM(use, atom))
118217683Spst			if (b->val[atom] != succ->val[atom])
118317683Spst				return 1;
118417683Spst	return 0;
118517683Spst}
118617683Spst
118717683Spststatic struct block *
118817683Spstfold_edge(child, ep)
118917683Spst	struct block *child;
119017683Spst	struct edge *ep;
119117683Spst{
119217683Spst	int sense;
119317683Spst	int aval0, aval1, oval0, oval1;
119417683Spst	int code = ep->code;
119517683Spst
119617683Spst	if (code < 0) {
119717683Spst		code = -code;
119817683Spst		sense = 0;
119917683Spst	} else
120017683Spst		sense = 1;
120117683Spst
120217683Spst	if (child->s.code != code)
120317683Spst		return 0;
120417683Spst
120517683Spst	aval0 = child->val[A_ATOM];
120617683Spst	oval0 = child->oval;
120717683Spst	aval1 = ep->pred->val[A_ATOM];
120817683Spst	oval1 = ep->pred->oval;
120917683Spst
121017683Spst	if (aval0 != aval1)
121117683Spst		return 0;
121217683Spst
121317683Spst	if (oval0 == oval1)
121417683Spst		/*
121517683Spst		 * The operands are identical, so the
121617683Spst		 * result is true if a true branch was
121717683Spst		 * taken to get here, otherwise false.
121817683Spst		 */
121917683Spst		return sense ? JT(child) : JF(child);
122017683Spst
122117683Spst	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
122217683Spst		/*
122317683Spst		 * At this point, we only know the comparison if we
122417683Spst		 * came down the true branch, and it was an equality
122517683Spst		 * comparison with a constant.  We rely on the fact that
122617683Spst		 * distinct constants have distinct value numbers.
122717683Spst		 */
122817683Spst		return JF(child);
122917683Spst
123017683Spst	return 0;
123117683Spst}
123217683Spst
123317683Spststatic void
123417683Spstopt_j(ep)
123517683Spst	struct edge *ep;
123617683Spst{
123717683Spst	register int i, k;
123817683Spst	register struct block *target;
123917683Spst
124017683Spst	if (JT(ep->succ) == 0)
124117683Spst		return;
124217683Spst
124317683Spst	if (JT(ep->succ) == JF(ep->succ)) {
124417683Spst		/*
124517683Spst		 * Common branch targets can be eliminated, provided
124617683Spst		 * there is no data dependency.
124717683Spst		 */
124817683Spst		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
124917683Spst			done = 0;
125017683Spst			ep->succ = JT(ep->succ);
125117683Spst		}
125217683Spst	}
125317683Spst	/*
125417683Spst	 * For each edge dominator that matches the successor of this
125517683Spst	 * edge, promote the edge successor to the its grandchild.
125617683Spst	 *
125717683Spst	 * XXX We violate the set abstraction here in favor a reasonably
125817683Spst	 * efficient loop.
125917683Spst	 */
126017683Spst top:
126117683Spst	for (i = 0; i < edgewords; ++i) {
126217683Spst		register bpf_u_int32 x = ep->edom[i];
126317683Spst
126417683Spst		while (x != 0) {
126517683Spst			k = ffs(x) - 1;
126617683Spst			x &=~ (1 << k);
126717683Spst			k += i * BITS_PER_WORD;
126817683Spst
126917683Spst			target = fold_edge(ep->succ, edges[k]);
127017683Spst			/*
127117683Spst			 * Check that there is no data dependency between
127217683Spst			 * nodes that will be violated if we move the edge.
127317683Spst			 */
127417683Spst			if (target != 0 && !use_conflict(ep->pred, target)) {
127517683Spst				done = 0;
127617683Spst				ep->succ = target;
127717683Spst				if (JT(target) != 0)
127817683Spst					/*
127917683Spst					 * Start over unless we hit a leaf.
128017683Spst					 */
128117683Spst					goto top;
128217683Spst				return;
128317683Spst			}
128417683Spst		}
128517683Spst	}
128617683Spst}
128717683Spst
128817683Spst
128917683Spststatic void
129017683Spstor_pullup(b)
129117683Spst	struct block *b;
129217683Spst{
129317683Spst	int val, at_top;
129417683Spst	struct block *pull;
129517683Spst	struct block **diffp, **samep;
129617683Spst	struct edge *ep;
129717683Spst
129817683Spst	ep = b->in_edges;
129917683Spst	if (ep == 0)
130017683Spst		return;
130117683Spst
130217683Spst	/*
130317683Spst	 * Make sure each predecessor loads the same value.
130417683Spst	 * XXX why?
130517683Spst	 */
130617683Spst	val = ep->pred->val[A_ATOM];
130717683Spst	for (ep = ep->next; ep != 0; ep = ep->next)
130817683Spst		if (val != ep->pred->val[A_ATOM])
130917683Spst			return;
131017683Spst
131117683Spst	if (JT(b->in_edges->pred) == b)
131217683Spst		diffp = &JT(b->in_edges->pred);
131317683Spst	else
131417683Spst		diffp = &JF(b->in_edges->pred);
131517683Spst
131617683Spst	at_top = 1;
131717683Spst	while (1) {
131817683Spst		if (*diffp == 0)
131917683Spst			return;
132017683Spst
132117683Spst		if (JT(*diffp) != JT(b))
132217683Spst			return;
132317683Spst
132417683Spst		if (!SET_MEMBER((*diffp)->dom, b->id))
132517683Spst			return;
132617683Spst
132717683Spst		if ((*diffp)->val[A_ATOM] != val)
132817683Spst			break;
132917683Spst
133017683Spst		diffp = &JF(*diffp);
133117683Spst		at_top = 0;
133217683Spst	}
133317683Spst	samep = &JF(*diffp);
133417683Spst	while (1) {
133517683Spst		if (*samep == 0)
133617683Spst			return;
133717683Spst
133817683Spst		if (JT(*samep) != JT(b))
133917683Spst			return;
134017683Spst
134117683Spst		if (!SET_MEMBER((*samep)->dom, b->id))
134217683Spst			return;
134317683Spst
134417683Spst		if ((*samep)->val[A_ATOM] == val)
134517683Spst			break;
134617683Spst
134717683Spst		/* XXX Need to check that there are no data dependencies
134817683Spst		   between dp0 and dp1.  Currently, the code generator
134917683Spst		   will not produce such dependencies. */
135017683Spst		samep = &JF(*samep);
135117683Spst	}
135217683Spst#ifdef notdef
135317683Spst	/* XXX This doesn't cover everything. */
135417683Spst	for (i = 0; i < N_ATOMS; ++i)
135517683Spst		if ((*samep)->val[i] != pred->val[i])
135617683Spst			return;
135717683Spst#endif
135817683Spst	/* Pull up the node. */
135917683Spst	pull = *samep;
136017683Spst	*samep = JF(pull);
136117683Spst	JF(pull) = *diffp;
136217683Spst
136317683Spst	/*
136417683Spst	 * At the top of the chain, each predecessor needs to point at the
136517683Spst	 * pulled up node.  Inside the chain, there is only one predecessor
136617683Spst	 * to worry about.
136717683Spst	 */
136817683Spst	if (at_top) {
136917683Spst		for (ep = b->in_edges; ep != 0; ep = ep->next) {
137017683Spst			if (JT(ep->pred) == b)
137117683Spst				JT(ep->pred) = pull;
137217683Spst			else
137317683Spst				JF(ep->pred) = pull;
137417683Spst		}
137517683Spst	}
137617683Spst	else
137717683Spst		*diffp = pull;
137817683Spst
137917683Spst	done = 0;
138017683Spst}
138117683Spst
138217683Spststatic void
138317683Spstand_pullup(b)
138417683Spst	struct block *b;
138517683Spst{
138617683Spst	int val, at_top;
138717683Spst	struct block *pull;
138817683Spst	struct block **diffp, **samep;
138917683Spst	struct edge *ep;
139017683Spst
139117683Spst	ep = b->in_edges;
139217683Spst	if (ep == 0)
139317683Spst		return;
139417683Spst
139517683Spst	/*
139617683Spst	 * Make sure each predecessor loads the same value.
139717683Spst	 */
139817683Spst	val = ep->pred->val[A_ATOM];
139917683Spst	for (ep = ep->next; ep != 0; ep = ep->next)
140017683Spst		if (val != ep->pred->val[A_ATOM])
140117683Spst			return;
140217683Spst
140317683Spst	if (JT(b->in_edges->pred) == b)
140417683Spst		diffp = &JT(b->in_edges->pred);
140517683Spst	else
140617683Spst		diffp = &JF(b->in_edges->pred);
140717683Spst
140817683Spst	at_top = 1;
140917683Spst	while (1) {
141017683Spst		if (*diffp == 0)
141117683Spst			return;
141217683Spst
141317683Spst		if (JF(*diffp) != JF(b))
141417683Spst			return;
141517683Spst
141617683Spst		if (!SET_MEMBER((*diffp)->dom, b->id))
141717683Spst			return;
141817683Spst
141917683Spst		if ((*diffp)->val[A_ATOM] != val)
142017683Spst			break;
142117683Spst
142217683Spst		diffp = &JT(*diffp);
142317683Spst		at_top = 0;
142417683Spst	}
142517683Spst	samep = &JT(*diffp);
142617683Spst	while (1) {
142717683Spst		if (*samep == 0)
142817683Spst			return;
142917683Spst
143017683Spst		if (JF(*samep) != JF(b))
143117683Spst			return;
143217683Spst
143317683Spst		if (!SET_MEMBER((*samep)->dom, b->id))
143417683Spst			return;
143517683Spst
143617683Spst		if ((*samep)->val[A_ATOM] == val)
143717683Spst			break;
143817683Spst
143917683Spst		/* XXX Need to check that there are no data dependencies
144017683Spst		   between diffp and samep.  Currently, the code generator
144117683Spst		   will not produce such dependencies. */
144217683Spst		samep = &JT(*samep);
144317683Spst	}
144417683Spst#ifdef notdef
144517683Spst	/* XXX This doesn't cover everything. */
144617683Spst	for (i = 0; i < N_ATOMS; ++i)
144717683Spst		if ((*samep)->val[i] != pred->val[i])
144817683Spst			return;
144917683Spst#endif
145017683Spst	/* Pull up the node. */
145117683Spst	pull = *samep;
145217683Spst	*samep = JT(pull);
145317683Spst	JT(pull) = *diffp;
145417683Spst
145517683Spst	/*
145617683Spst	 * At the top of the chain, each predecessor needs to point at the
145717683Spst	 * pulled up node.  Inside the chain, there is only one predecessor
145817683Spst	 * to worry about.
145917683Spst	 */
146017683Spst	if (at_top) {
146117683Spst		for (ep = b->in_edges; ep != 0; ep = ep->next) {
146217683Spst			if (JT(ep->pred) == b)
146317683Spst				JT(ep->pred) = pull;
146417683Spst			else
146517683Spst				JF(ep->pred) = pull;
146617683Spst		}
146717683Spst	}
146817683Spst	else
146917683Spst		*diffp = pull;
147017683Spst
147117683Spst	done = 0;
147217683Spst}
147317683Spst
147417683Spststatic void
147517683Spstopt_blks(root, do_stmts)
147617683Spst	struct block *root;
147717683Spst	int do_stmts;
147817683Spst{
147917683Spst	int i, maxlevel;
148017683Spst	struct block *p;
148117683Spst
148217683Spst	init_val();
148317683Spst	maxlevel = root->level;
148417683Spst	for (i = maxlevel; i >= 0; --i)
148517683Spst		for (p = levels[i]; p; p = p->link)
148617683Spst			opt_blk(p, do_stmts);
148717683Spst
148817683Spst	if (do_stmts)
148917683Spst		/*
149017683Spst		 * No point trying to move branches; it can't possibly
149117683Spst		 * make a difference at this point.
149217683Spst		 */
149317683Spst		return;
149417683Spst
149517683Spst	for (i = 1; i <= maxlevel; ++i) {
149617683Spst		for (p = levels[i]; p; p = p->link) {
149717683Spst			opt_j(&p->et);
149817683Spst			opt_j(&p->ef);
149917683Spst		}
150017683Spst	}
150117683Spst	for (i = 1; i <= maxlevel; ++i) {
150217683Spst		for (p = levels[i]; p; p = p->link) {
150317683Spst			or_pullup(p);
150417683Spst			and_pullup(p);
150517683Spst		}
150617683Spst	}
150717683Spst}
150817683Spst
150917683Spststatic inline void
151017683Spstlink_inedge(parent, child)
151117683Spst	struct edge *parent;
151217683Spst	struct block *child;
151317683Spst{
151417683Spst	parent->next = child->in_edges;
151517683Spst	child->in_edges = parent;
151617683Spst}
151717683Spst
151817683Spststatic void
151917683Spstfind_inedges(root)
152017683Spst	struct block *root;
152117683Spst{
152217683Spst	int i;
152317683Spst	struct block *b;
152417683Spst
152517683Spst	for (i = 0; i < n_blocks; ++i)
152617683Spst		blocks[i]->in_edges = 0;
152717683Spst
152817683Spst	/*
152917683Spst	 * Traverse the graph, adding each edge to the predecessor
153017683Spst	 * list of its successors.  Skip the leaves (i.e. level 0).
153117683Spst	 */
153217683Spst	for (i = root->level; i > 0; --i) {
153317683Spst		for (b = levels[i]; b != 0; b = b->link) {
153417683Spst			link_inedge(&b->et, JT(b));
153517683Spst			link_inedge(&b->ef, JF(b));
153617683Spst		}
153717683Spst	}
153817683Spst}
153917683Spst
154017683Spststatic void
154117683Spstopt_root(b)
154217683Spst	struct block **b;
154317683Spst{
154417683Spst	struct slist *tmp, *s;
154517683Spst
154617683Spst	s = (*b)->stmts;
154717683Spst	(*b)->stmts = 0;
154817683Spst	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
154917683Spst		*b = JT(*b);
155017683Spst
155117683Spst	tmp = (*b)->stmts;
155217683Spst	if (tmp != 0)
155317683Spst		sappend(s, tmp);
155417683Spst	(*b)->stmts = s;
155517683Spst
155617683Spst	/*
155717683Spst	 * If the root node is a return, then there is no
155817683Spst	 * point executing any statements (since the bpf machine
155917683Spst	 * has no side effects).
156017683Spst	 */
156117683Spst	if (BPF_CLASS((*b)->s.code) == BPF_RET)
156217683Spst		(*b)->stmts = 0;
156317683Spst}
156417683Spst
156517683Spststatic void
156617683Spstopt_loop(root, do_stmts)
156717683Spst	struct block *root;
156817683Spst	int do_stmts;
156917683Spst{
157017683Spst
157117683Spst#ifdef BDEBUG
157217683Spst	if (dflag > 1)
157317683Spst		opt_dump(root);
157417683Spst#endif
157517683Spst	do {
157617683Spst		done = 1;
157717683Spst		find_levels(root);
157817683Spst		find_dom(root);
157917683Spst		find_closure(root);
158017683Spst		find_inedges(root);
158117683Spst		find_ud(root);
158217683Spst		find_edom(root);
158317683Spst		opt_blks(root, do_stmts);
158417683Spst#ifdef BDEBUG
158517683Spst		if (dflag > 1)
158617683Spst			opt_dump(root);
158717683Spst#endif
158817683Spst	} while (!done);
158917683Spst}
159017683Spst
159117683Spst/*
159217683Spst * Optimize the filter code in its dag representation.
159317683Spst */
159417683Spstvoid
159517683Spstbpf_optimize(rootp)
159617683Spst	struct block **rootp;
159717683Spst{
159817683Spst	struct block *root;
159917683Spst
160017683Spst	root = *rootp;
160117683Spst
160217683Spst	opt_init(root);
160317683Spst	opt_loop(root, 0);
160417683Spst	opt_loop(root, 1);
160517683Spst	intern_blocks(root);
160617683Spst	opt_root(rootp);
160717683Spst	opt_cleanup();
160817683Spst}
160917683Spst
161017683Spststatic void
161117683Spstmake_marks(p)
161217683Spst	struct block *p;
161317683Spst{
161417683Spst	if (!isMarked(p)) {
161517683Spst		Mark(p);
161617683Spst		if (BPF_CLASS(p->s.code) != BPF_RET) {
161717683Spst			make_marks(JT(p));
161817683Spst			make_marks(JF(p));
161917683Spst		}
162017683Spst	}
162117683Spst}
162217683Spst
162317683Spst/*
162417683Spst * Mark code array such that isMarked(i) is true
162517683Spst * only for nodes that are alive.
162617683Spst */
162717683Spststatic void
162817683Spstmark_code(p)
162917683Spst	struct block *p;
163017683Spst{
163117683Spst	cur_mark += 1;
163217683Spst	make_marks(p);
163317683Spst}
163417683Spst
163517683Spst/*
163617683Spst * True iff the two stmt lists load the same value from the packet into
163717683Spst * the accumulator.
163817683Spst */
163917683Spststatic int
164017683Spsteq_slist(x, y)
164117683Spst	struct slist *x, *y;
164217683Spst{
164317683Spst	while (1) {
164417683Spst		while (x && x->s.code == NOP)
164517683Spst			x = x->next;
164617683Spst		while (y && y->s.code == NOP)
164717683Spst			y = y->next;
164817683Spst		if (x == 0)
164917683Spst			return y == 0;
165017683Spst		if (y == 0)
165117683Spst			return x == 0;
165217683Spst		if (x->s.code != y->s.code || x->s.k != y->s.k)
165317683Spst			return 0;
165417683Spst		x = x->next;
165517683Spst		y = y->next;
165617683Spst	}
165717683Spst}
165817683Spst
165917683Spststatic inline int
166017683Spsteq_blk(b0, b1)
166117683Spst	struct block *b0, *b1;
166217683Spst{
166317683Spst	if (b0->s.code == b1->s.code &&
166417683Spst	    b0->s.k == b1->s.k &&
166517683Spst	    b0->et.succ == b1->et.succ &&
166617683Spst	    b0->ef.succ == b1->ef.succ)
166717683Spst		return eq_slist(b0->stmts, b1->stmts);
166817683Spst	return 0;
166917683Spst}
167017683Spst
167117683Spststatic void
167217683Spstintern_blocks(root)
167317683Spst	struct block *root;
167417683Spst{
167517683Spst	struct block *p;
167617683Spst	int i, j;
167717683Spst	int done;
167817683Spst top:
167917683Spst	done = 1;
168017683Spst	for (i = 0; i < n_blocks; ++i)
168117683Spst		blocks[i]->link = 0;
168217683Spst
168317683Spst	mark_code(root);
168417683Spst
168517683Spst	for (i = n_blocks - 1; --i >= 0; ) {
168617683Spst		if (!isMarked(blocks[i]))
168717683Spst			continue;
168817683Spst		for (j = i + 1; j < n_blocks; ++j) {
168917683Spst			if (!isMarked(blocks[j]))
169017683Spst				continue;
169117683Spst			if (eq_blk(blocks[i], blocks[j])) {
169217683Spst				blocks[i]->link = blocks[j]->link ?
169317683Spst					blocks[j]->link : blocks[j];
169417683Spst				break;
169517683Spst			}
169617683Spst		}
169717683Spst	}
169817683Spst	for (i = 0; i < n_blocks; ++i) {
169917683Spst		p = blocks[i];
170017683Spst		if (JT(p) == 0)
170117683Spst			continue;
170217683Spst		if (JT(p)->link) {
170317683Spst			done = 0;
170417683Spst			JT(p) = JT(p)->link;
170517683Spst		}
170617683Spst		if (JF(p)->link) {
170717683Spst			done = 0;
170817683Spst			JF(p) = JF(p)->link;
170917683Spst		}
171017683Spst	}
171117683Spst	if (!done)
171217683Spst		goto top;
171317683Spst}
171417683Spst
171517683Spststatic void
171617683Spstopt_cleanup()
171717683Spst{
171817683Spst	free((void *)vnode_base);
171917683Spst	free((void *)vmap);
172017683Spst	free((void *)edges);
172117683Spst	free((void *)space);
172217683Spst	free((void *)levels);
172317683Spst	free((void *)blocks);
172417683Spst}
172517683Spst
172617683Spst/*
172717683Spst * Return the number of stmts in 's'.
172817683Spst */
172917683Spststatic int
173017683Spstslength(s)
173117683Spst	struct slist *s;
173217683Spst{
173317683Spst	int n = 0;
173417683Spst
173517683Spst	for (; s; s = s->next)
173617683Spst		if (s->s.code != NOP)
173717683Spst			++n;
173817683Spst	return n;
173917683Spst}
174017683Spst
174117683Spst/*
174217683Spst * Return the number of nodes reachable by 'p'.
174317683Spst * All nodes should be initially unmarked.
174417683Spst */
174517683Spststatic int
174617683Spstcount_blocks(p)
174717683Spst	struct block *p;
174817683Spst{
174917683Spst	if (p == 0 || isMarked(p))
175017683Spst		return 0;
175117683Spst	Mark(p);
175217683Spst	return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
175317683Spst}
175417683Spst
175517683Spst/*
175617683Spst * Do a depth first search on the flow graph, numbering the
175717683Spst * the basic blocks, and entering them into the 'blocks' array.`
175817683Spst */
175917683Spststatic void
176017683Spstnumber_blks_r(p)
176117683Spst	struct block *p;
176217683Spst{
176317683Spst	int n;
176417683Spst
176517683Spst	if (p == 0 || isMarked(p))
176617683Spst		return;
176717683Spst
176817683Spst	Mark(p);
176917683Spst	n = n_blocks++;
177017683Spst	p->id = n;
177117683Spst	blocks[n] = p;
177217683Spst
177317683Spst	number_blks_r(JT(p));
177417683Spst	number_blks_r(JF(p));
177517683Spst}
177617683Spst
177717683Spst/*
177817683Spst * Return the number of stmts in the flowgraph reachable by 'p'.
177917683Spst * The nodes should be unmarked before calling.
178017683Spst */
178117683Spststatic int
178217683Spstcount_stmts(p)
178317683Spst	struct block *p;
178417683Spst{
178517683Spst	int n;
178617683Spst
178717683Spst	if (p == 0 || isMarked(p))
178817683Spst		return 0;
178917683Spst	Mark(p);
179017683Spst	n = count_stmts(JT(p)) + count_stmts(JF(p));
179117683Spst	return slength(p->stmts) + n + 1;
179217683Spst}
179317683Spst
179417683Spst/*
179517683Spst * Allocate memory.  All allocation is done before optimization
179617683Spst * is begun.  A linear bound on the size of all data structures is computed
179717683Spst * from the total number of blocks and/or statements.
179817683Spst */
179917683Spststatic void
180017683Spstopt_init(root)
180117683Spst	struct block *root;
180217683Spst{
180317683Spst	bpf_u_int32 *p;
180417683Spst	int i, n, max_stmts;
180517683Spst
180617683Spst	/*
180717683Spst	 * First, count the blocks, so we can malloc an array to map
180817683Spst	 * block number to block.  Then, put the blocks into the array.
180917683Spst	 */
181017683Spst	unMarkAll();
181117683Spst	n = count_blocks(root);
181217683Spst	blocks = (struct block **)malloc(n * sizeof(*blocks));
181317683Spst	unMarkAll();
181417683Spst	n_blocks = 0;
181517683Spst	number_blks_r(root);
181617683Spst
181717683Spst	n_edges = 2 * n_blocks;
181817683Spst	edges = (struct edge **)malloc(n_edges * sizeof(*edges));
181917683Spst
182017683Spst	/*
182117683Spst	 * The number of levels is bounded by the number of nodes.
182217683Spst	 */
182317683Spst	levels = (struct block **)malloc(n_blocks * sizeof(*levels));
182417683Spst
182517683Spst	edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
182617683Spst	nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
182717683Spst
182817683Spst	/* XXX */
182917683Spst	space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
183017683Spst				 + n_edges * edgewords * sizeof(*space));
183117683Spst	p = space;
183217683Spst	all_dom_sets = p;
183317683Spst	for (i = 0; i < n; ++i) {
183417683Spst		blocks[i]->dom = p;
183517683Spst		p += nodewords;
183617683Spst	}
183717683Spst	all_closure_sets = p;
183817683Spst	for (i = 0; i < n; ++i) {
183917683Spst		blocks[i]->closure = p;
184017683Spst		p += nodewords;
184117683Spst	}
184217683Spst	all_edge_sets = p;
184317683Spst	for (i = 0; i < n; ++i) {
184417683Spst		register struct block *b = blocks[i];
184517683Spst
184617683Spst		b->et.edom = p;
184717683Spst		p += edgewords;
184817683Spst		b->ef.edom = p;
184917683Spst		p += edgewords;
185017683Spst		b->et.id = i;
185117683Spst		edges[i] = &b->et;
185217683Spst		b->ef.id = n_blocks + i;
185317683Spst		edges[n_blocks + i] = &b->ef;
185417683Spst		b->et.pred = b;
185517683Spst		b->ef.pred = b;
185617683Spst	}
185717683Spst	max_stmts = 0;
185817683Spst	for (i = 0; i < n; ++i)
185917683Spst		max_stmts += slength(blocks[i]->stmts) + 1;
186017683Spst	/*
186117683Spst	 * We allocate at most 3 value numbers per statement,
186217683Spst	 * so this is an upper bound on the number of valnodes
186317683Spst	 * we'll need.
186417683Spst	 */
186517683Spst	maxval = 3 * max_stmts;
186617683Spst	vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
186717683Spst	vnode_base = (struct valnode *)malloc(maxval * sizeof(*vmap));
186817683Spst}
186917683Spst
187017683Spst/*
187117683Spst * Some pointers used to convert the basic block form of the code,
187217683Spst * into the array form that BPF requires.  'fstart' will point to
187317683Spst * the malloc'd array while 'ftail' is used during the recursive traversal.
187417683Spst */
187517683Spststatic struct bpf_insn *fstart;
187617683Spststatic struct bpf_insn *ftail;
187717683Spst
187817683Spst#ifdef BDEBUG
187917683Spstint bids[1000];
188017683Spst#endif
188117683Spst
188217683Spst/*
188317683Spst * Returns true if successful.  Returns false if a branch has
188417683Spst * an offset that is too large.  If so, we have marked that
188517683Spst * branch so that on a subsequent iteration, it will be treated
188617683Spst * properly.
188717683Spst */
188817683Spststatic int
188917683Spstconvert_code_r(p)
189017683Spst	struct block *p;
189117683Spst{
189217683Spst	struct bpf_insn *dst;
189317683Spst	struct slist *src;
189417683Spst	int slen;
189517683Spst	u_int off;
189617683Spst	int extrajmps;		/* number of extra jumps inserted */
189756889Sfenner	struct slist **offset = NULL;
189817683Spst
189917683Spst	if (p == 0 || isMarked(p))
190017683Spst		return (1);
190117683Spst	Mark(p);
190217683Spst
190317683Spst	if (convert_code_r(JF(p)) == 0)
190417683Spst		return (0);
190517683Spst	if (convert_code_r(JT(p)) == 0)
190617683Spst		return (0);
190717683Spst
190817683Spst	slen = slength(p->stmts);
190917683Spst	dst = ftail -= (slen + 1 + p->longjt + p->longjf);
191017683Spst		/* inflate length by any extra jumps */
191117683Spst
191217683Spst	p->offset = dst - fstart;
191317683Spst
191456889Sfenner	/* generate offset[] for convenience  */
191556889Sfenner	if (slen) {
191656889Sfenner		offset = (struct slist **)calloc(sizeof(struct slist *), slen);
191756889Sfenner		if (!offset) {
191856889Sfenner			bpf_error("not enough core");
191956889Sfenner			/*NOTREACHED*/
192056889Sfenner		}
192156889Sfenner	}
192256889Sfenner	src = p->stmts;
192356889Sfenner	for (off = 0; off < slen && src; off++) {
192456889Sfenner#if 0
192556889Sfenner		printf("off=%d src=%x\n", off, src);
192656889Sfenner#endif
192756889Sfenner		offset[off] = src;
192856889Sfenner		src = src->next;
192956889Sfenner	}
193056889Sfenner
193156889Sfenner	off = 0;
193217683Spst	for (src = p->stmts; src; src = src->next) {
193317683Spst		if (src->s.code == NOP)
193417683Spst			continue;
193517683Spst		dst->code = (u_short)src->s.code;
193617683Spst		dst->k = src->s.k;
193756889Sfenner
193856889Sfenner		/* fill block-local relative jump */
193956889Sfenner		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == BPF_JMP|BPF_JA) {
194056889Sfenner#if 0
194156889Sfenner			if (src->s.jt || src->s.jf) {
194256889Sfenner				bpf_error("illegal jmp destination");
194356889Sfenner				/*NOTREACHED*/
194456889Sfenner			}
194556889Sfenner#endif
194656889Sfenner			goto filled;
194756889Sfenner		}
194856889Sfenner		if (off == slen - 2)	/*???*/
194956889Sfenner			goto filled;
195056889Sfenner
195156889Sfenner	    {
195256889Sfenner		int i;
195356889Sfenner		int jt, jf;
195456889Sfenner		char *ljerr = "%s for block-local relative jump: off=%d";
195556889Sfenner
195656889Sfenner#if 0
195756889Sfenner		printf("code=%x off=%d %x %x\n", src->s.code,
195856889Sfenner			off, src->s.jt, src->s.jf);
195956889Sfenner#endif
196056889Sfenner
196156889Sfenner		if (!src->s.jt || !src->s.jf) {
196256889Sfenner			bpf_error(ljerr, "no jmp destination", off);
196356889Sfenner			/*NOTREACHED*/
196456889Sfenner		}
196556889Sfenner
196656889Sfenner		jt = jf = 0;
196756889Sfenner		for (i = 0; i < slen; i++) {
196856889Sfenner			if (offset[i] == src->s.jt) {
196956889Sfenner				if (jt) {
197056889Sfenner					bpf_error(ljerr, "multiple matches", off);
197156889Sfenner					/*NOTREACHED*/
197256889Sfenner				}
197356889Sfenner
197456889Sfenner				dst->jt = i - off - 1;
197556889Sfenner				jt++;
197656889Sfenner			}
197756889Sfenner			if (offset[i] == src->s.jf) {
197856889Sfenner				if (jf) {
197956889Sfenner					bpf_error(ljerr, "multiple matches", off);
198056889Sfenner					/*NOTREACHED*/
198156889Sfenner				}
198256889Sfenner				dst->jf = i - off - 1;
198356889Sfenner				jf++;
198456889Sfenner			}
198556889Sfenner		}
198656889Sfenner		if (!jt || !jf) {
198756889Sfenner			bpf_error(ljerr, "no destination found", off);
198856889Sfenner			/*NOTREACHED*/
198956889Sfenner		}
199056889Sfenner	    }
199156889Sfennerfilled:
199217683Spst		++dst;
199356889Sfenner		++off;
199417683Spst	}
199556889Sfenner	if (offset)
199656889Sfenner		free(offset);
199756889Sfenner
199817683Spst#ifdef BDEBUG
199917683Spst	bids[dst - fstart] = p->id + 1;
200017683Spst#endif
200117683Spst	dst->code = (u_short)p->s.code;
200217683Spst	dst->k = p->s.k;
200317683Spst	if (JT(p)) {
200417683Spst		extrajmps = 0;
200517683Spst		off = JT(p)->offset - (p->offset + slen) - 1;
200617683Spst		if (off >= 256) {
200717683Spst		    /* offset too large for branch, must add a jump */
200817683Spst		    if (p->longjt == 0) {
200917683Spst		    	/* mark this instruction and retry */
201017683Spst			p->longjt++;
201117683Spst			return(0);
201217683Spst		    }
201317683Spst		    /* branch if T to following jump */
201417683Spst		    dst->jt = extrajmps;
201517683Spst		    extrajmps++;
201617683Spst		    dst[extrajmps].code = BPF_JMP|BPF_JA;
201717683Spst		    dst[extrajmps].k = off - extrajmps;
201817683Spst		}
201917683Spst		else
202017683Spst		    dst->jt = off;
202117683Spst		off = JF(p)->offset - (p->offset + slen) - 1;
202217683Spst		if (off >= 256) {
202317683Spst		    /* offset too large for branch, must add a jump */
202417683Spst		    if (p->longjf == 0) {
202517683Spst		    	/* mark this instruction and retry */
202617683Spst			p->longjf++;
202717683Spst			return(0);
202817683Spst		    }
202917683Spst		    /* branch if F to following jump */
203017683Spst		    /* if two jumps are inserted, F goes to second one */
203117683Spst		    dst->jf = extrajmps;
203217683Spst		    extrajmps++;
203317683Spst		    dst[extrajmps].code = BPF_JMP|BPF_JA;
203417683Spst		    dst[extrajmps].k = off - extrajmps;
203517683Spst		}
203617683Spst		else
203717683Spst		    dst->jf = off;
203817683Spst	}
203917683Spst	return (1);
204017683Spst}
204117683Spst
204217683Spst
204317683Spst/*
204417683Spst * Convert flowgraph intermediate representation to the
204517683Spst * BPF array representation.  Set *lenp to the number of instructions.
204617683Spst */
204717683Spststruct bpf_insn *
204817683Spsticode_to_fcode(root, lenp)
204917683Spst	struct block *root;
205017683Spst	int *lenp;
205117683Spst{
205217683Spst	int n;
205317683Spst	struct bpf_insn *fp;
205417683Spst
205517683Spst	/*
205617683Spst	 * Loop doing convert_codr_r() until no branches remain
205717683Spst	 * with too-large offsets.
205817683Spst	 */
205917683Spst	while (1) {
206017683Spst	    unMarkAll();
206117683Spst	    n = *lenp = count_stmts(root);
206217683Spst
206317683Spst	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
206417683Spst	    memset((char *)fp, 0, sizeof(*fp) * n);
206517683Spst	    fstart = fp;
206617683Spst	    ftail = fp + n;
206717683Spst
206817683Spst	    unMarkAll();
206917683Spst	    if (convert_code_r(root))
207017683Spst		break;
207117683Spst	    free(fp);
207217683Spst	}
207317683Spst
207417683Spst	return fp;
207517683Spst}
207617683Spst
207717683Spst#ifdef BDEBUG
207817683Spststatic void
207917683Spstopt_dump(root)
208017683Spst	struct block *root;
208117683Spst{
208217683Spst	struct bpf_program f;
208317683Spst
208417683Spst	memset(bids, 0, sizeof bids);
208517683Spst	f.bf_insns = icode_to_fcode(root, &f.bf_len);
208617683Spst	bpf_dump(&f, 1);
208717683Spst	putchar('\n');
208817683Spst	free((char *)f.bf_insns);
208917683Spst}
209017683Spst#endif
2091