optimize.c revision 17683
117683Spst/*
217683Spst * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
317683Spst *	The Regents of the University of California.  All rights reserved.
417683Spst *
517683Spst * Redistribution and use in source and binary forms, with or without
617683Spst * modification, are permitted provided that: (1) source code distributions
717683Spst * retain the above copyright notice and this paragraph in its entirety, (2)
817683Spst * distributions including binary code include the above copyright notice and
917683Spst * this paragraph in its entirety in the documentation or other materials
1017683Spst * provided with the distribution, and (3) all advertising materials mentioning
1117683Spst * features or use of this software display the following acknowledgement:
1217683Spst * ``This product includes software developed by the University of California,
1317683Spst * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
1417683Spst * the University nor the names of its contributors may be used to endorse
1517683Spst * or promote products derived from this software without specific prior
1617683Spst * written permission.
1717683Spst * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
1817683Spst * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
1917683Spst * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
2017683Spst *
2117683Spst *  Optimization module for tcpdump intermediate representation.
2217683Spst */
2317683Spst#ifndef lint
2417683Spststatic char rcsid[] =
2517683Spst    "@(#) $Header: optimize.c,v 1.59 96/07/15 00:48:49 leres Exp $ (LBL)";
2617683Spst#endif
2717683Spst
2817683Spst#include <sys/types.h>
2917683Spst#include <sys/time.h>
3017683Spst
3117683Spst#include <stdio.h>
3217683Spst#include <stdlib.h>
3317683Spst#include <memory.h>
3417683Spst
3517683Spst#include "pcap-int.h"
3617683Spst
3717683Spst#include "gencode.h"
3817683Spst
3917683Spst#include "gnuc.h"
4017683Spst#ifdef HAVE_OS_PROTO_H
4117683Spst#include "os-proto.h"
4217683Spst#endif
4317683Spst
4417683Spst#ifdef BDEBUG
4517683Spstextern int dflag;
4617683Spst#endif
4717683Spst
4817683Spst#define A_ATOM BPF_MEMWORDS
4917683Spst#define X_ATOM (BPF_MEMWORDS+1)
5017683Spst
5117683Spst#define NOP -1
5217683Spst
5317683Spst/*
5417683Spst * This define is used to represent *both* the accumulator and
5517683Spst * x register in use-def computations.
5617683Spst * Currently, the use-def code assumes only one definition per instruction.
5717683Spst */
5817683Spst#define AX_ATOM N_ATOMS
5917683Spst
6017683Spst/*
6117683Spst * A flag to indicate that further optimization is needed.
6217683Spst * Iterative passes are continued until a given pass yields no
6317683Spst * branch movement.
6417683Spst */
6517683Spststatic int done;
6617683Spst
6717683Spst/*
6817683Spst * A block is marked if only if its mark equals the current mark.
6917683Spst * Rather than traverse the code array, marking each item, 'cur_mark' is
7017683Spst * incremented.  This automatically makes each element unmarked.
7117683Spst */
7217683Spststatic int cur_mark;
7317683Spst#define isMarked(p) ((p)->mark == cur_mark)
7417683Spst#define unMarkAll() cur_mark += 1
7517683Spst#define Mark(p) ((p)->mark = cur_mark)
7617683Spst
7717683Spststatic void opt_init(struct block *);
7817683Spststatic void opt_cleanup(void);
7917683Spst
8017683Spststatic void make_marks(struct block *);
8117683Spststatic void mark_code(struct block *);
8217683Spst
8317683Spststatic void intern_blocks(struct block *);
8417683Spst
8517683Spststatic int eq_slist(struct slist *, struct slist *);
8617683Spst
8717683Spststatic void find_levels_r(struct block *);
8817683Spst
8917683Spststatic void find_levels(struct block *);
9017683Spststatic void find_dom(struct block *);
9117683Spststatic void propedom(struct edge *);
9217683Spststatic void find_edom(struct block *);
9317683Spststatic void find_closure(struct block *);
9417683Spststatic int atomuse(struct stmt *);
9517683Spststatic int atomdef(struct stmt *);
9617683Spststatic void compute_local_ud(struct block *);
9717683Spststatic void find_ud(struct block *);
9817683Spststatic void init_val(void);
9917683Spststatic int F(int, int, int);
10017683Spststatic inline void vstore(struct stmt *, int *, int, int);
10117683Spststatic void opt_blk(struct block *, int);
10217683Spststatic int use_conflict(struct block *, struct block *);
10317683Spststatic void opt_j(struct edge *);
10417683Spststatic void or_pullup(struct block *);
10517683Spststatic void and_pullup(struct block *);
10617683Spststatic void opt_blks(struct block *, int);
10717683Spststatic inline void link_inedge(struct edge *, struct block *);
10817683Spststatic void find_inedges(struct block *);
10917683Spststatic void opt_root(struct block **);
11017683Spststatic void opt_loop(struct block *, int);
11117683Spststatic void fold_op(struct stmt *, int, int);
11217683Spststatic inline struct slist *this_op(struct slist *);
11317683Spststatic void opt_not(struct block *);
11417683Spststatic void opt_peep(struct block *);
11517683Spststatic void opt_stmt(struct stmt *, int[], int);
11617683Spststatic void deadstmt(struct stmt *, struct stmt *[]);
11717683Spststatic void opt_deadstores(struct block *);
11817683Spststatic void opt_blk(struct block *, int);
11917683Spststatic int use_conflict(struct block *, struct block *);
12017683Spststatic void opt_j(struct edge *);
12117683Spststatic struct block *fold_edge(struct block *, struct edge *);
12217683Spststatic inline int eq_blk(struct block *, struct block *);
12317683Spststatic int slength(struct slist *);
12417683Spststatic int count_blocks(struct block *);
12517683Spststatic void number_blks_r(struct block *);
12617683Spststatic int count_stmts(struct block *);
12717683Spststatic int convert_code_r(struct block *);
12817683Spst#ifdef BDEBUG
12917683Spststatic void opt_dump(struct block *);
13017683Spst#endif
13117683Spst
13217683Spststatic int n_blocks;
13317683Spststruct block **blocks;
13417683Spststatic int n_edges;
13517683Spststruct edge **edges;
13617683Spst
13717683Spst/*
13817683Spst * A bit vector set representation of the dominators.
13917683Spst * We round up the set size to the next power of two.
14017683Spst */
14117683Spststatic int nodewords;
14217683Spststatic int edgewords;
14317683Spststruct block **levels;
14417683Spstbpf_u_int32 *space;
14517683Spst#define BITS_PER_WORD (8*sizeof(bpf_u_int32))
14617683Spst/*
14717683Spst * True if a is in uset {p}
14817683Spst */
14917683Spst#define SET_MEMBER(p, a) \
15017683Spst((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
15117683Spst
15217683Spst/*
15317683Spst * Add 'a' to uset p.
15417683Spst */
15517683Spst#define SET_INSERT(p, a) \
15617683Spst(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
15717683Spst
15817683Spst/*
15917683Spst * Delete 'a' from uset p.
16017683Spst */
16117683Spst#define SET_DELETE(p, a) \
16217683Spst(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
16317683Spst
16417683Spst/*
16517683Spst * a := a intersect b
16617683Spst */
16717683Spst#define SET_INTERSECT(a, b, n)\
16817683Spst{\
16917683Spst	register bpf_u_int32 *_x = a, *_y = b;\
17017683Spst	register int _n = n;\
17117683Spst	while (--_n >= 0) *_x++ &= *_y++;\
17217683Spst}
17317683Spst
17417683Spst/*
17517683Spst * a := a - b
17617683Spst */
17717683Spst#define SET_SUBTRACT(a, b, n)\
17817683Spst{\
17917683Spst	register bpf_u_int32 *_x = a, *_y = b;\
18017683Spst	register int _n = n;\
18117683Spst	while (--_n >= 0) *_x++ &=~ *_y++;\
18217683Spst}
18317683Spst
18417683Spst/*
18517683Spst * a := a union b
18617683Spst */
18717683Spst#define SET_UNION(a, b, n)\
18817683Spst{\
18917683Spst	register bpf_u_int32 *_x = a, *_y = b;\
19017683Spst	register int _n = n;\
19117683Spst	while (--_n >= 0) *_x++ |= *_y++;\
19217683Spst}
19317683Spst
19417683Spststatic uset all_dom_sets;
19517683Spststatic uset all_closure_sets;
19617683Spststatic uset all_edge_sets;
19717683Spst
19817683Spst#ifndef MAX
19917683Spst#define MAX(a,b) ((a)>(b)?(a):(b))
20017683Spst#endif
20117683Spst
20217683Spststatic void
20317683Spstfind_levels_r(b)
20417683Spst	struct block *b;
20517683Spst{
20617683Spst	int level;
20717683Spst
20817683Spst	if (isMarked(b))
20917683Spst		return;
21017683Spst
21117683Spst	Mark(b);
21217683Spst	b->link = 0;
21317683Spst
21417683Spst	if (JT(b)) {
21517683Spst		find_levels_r(JT(b));
21617683Spst		find_levels_r(JF(b));
21717683Spst		level = MAX(JT(b)->level, JF(b)->level) + 1;
21817683Spst	} else
21917683Spst		level = 0;
22017683Spst	b->level = level;
22117683Spst	b->link = levels[level];
22217683Spst	levels[level] = b;
22317683Spst}
22417683Spst
22517683Spst/*
22617683Spst * Level graph.  The levels go from 0 at the leaves to
22717683Spst * N_LEVELS at the root.  The levels[] array points to the
22817683Spst * first node of the level list, whose elements are linked
22917683Spst * with the 'link' field of the struct block.
23017683Spst */
23117683Spststatic void
23217683Spstfind_levels(root)
23317683Spst	struct block *root;
23417683Spst{
23517683Spst	memset((char *)levels, 0, n_blocks * sizeof(*levels));
23617683Spst	unMarkAll();
23717683Spst	find_levels_r(root);
23817683Spst}
23917683Spst
24017683Spst/*
24117683Spst * Find dominator relationships.
24217683Spst * Assumes graph has been leveled.
24317683Spst */
24417683Spststatic void
24517683Spstfind_dom(root)
24617683Spst	struct block *root;
24717683Spst{
24817683Spst	int i;
24917683Spst	struct block *b;
25017683Spst	bpf_u_int32 *x;
25117683Spst
25217683Spst	/*
25317683Spst	 * Initialize sets to contain all nodes.
25417683Spst	 */
25517683Spst	x = all_dom_sets;
25617683Spst	i = n_blocks * nodewords;
25717683Spst	while (--i >= 0)
25817683Spst		*x++ = ~0;
25917683Spst	/* Root starts off empty. */
26017683Spst	for (i = nodewords; --i >= 0;)
26117683Spst		root->dom[i] = 0;
26217683Spst
26317683Spst	/* root->level is the highest level no found. */
26417683Spst	for (i = root->level; i >= 0; --i) {
26517683Spst		for (b = levels[i]; b; b = b->link) {
26617683Spst			SET_INSERT(b->dom, b->id);
26717683Spst			if (JT(b) == 0)
26817683Spst				continue;
26917683Spst			SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
27017683Spst			SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
27117683Spst		}
27217683Spst	}
27317683Spst}
27417683Spst
27517683Spststatic void
27617683Spstpropedom(ep)
27717683Spst	struct edge *ep;
27817683Spst{
27917683Spst	SET_INSERT(ep->edom, ep->id);
28017683Spst	if (ep->succ) {
28117683Spst		SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
28217683Spst		SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
28317683Spst	}
28417683Spst}
28517683Spst
28617683Spst/*
28717683Spst * Compute edge dominators.
28817683Spst * Assumes graph has been leveled and predecessors established.
28917683Spst */
29017683Spststatic void
29117683Spstfind_edom(root)
29217683Spst	struct block *root;
29317683Spst{
29417683Spst	int i;
29517683Spst	uset x;
29617683Spst	struct block *b;
29717683Spst
29817683Spst	x = all_edge_sets;
29917683Spst	for (i = n_edges * edgewords; --i >= 0; )
30017683Spst		x[i] = ~0;
30117683Spst
30217683Spst	/* root->level is the highest level no found. */
30317683Spst	memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
30417683Spst	memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
30517683Spst	for (i = root->level; i >= 0; --i) {
30617683Spst		for (b = levels[i]; b != 0; b = b->link) {
30717683Spst			propedom(&b->et);
30817683Spst			propedom(&b->ef);
30917683Spst		}
31017683Spst	}
31117683Spst}
31217683Spst
31317683Spst/*
31417683Spst * Find the backwards transitive closure of the flow graph.  These sets
31517683Spst * are backwards in the sense that we find the set of nodes that reach
31617683Spst * a given node, not the set of nodes that can be reached by a node.
31717683Spst *
31817683Spst * Assumes graph has been leveled.
31917683Spst */
32017683Spststatic void
32117683Spstfind_closure(root)
32217683Spst	struct block *root;
32317683Spst{
32417683Spst	int i;
32517683Spst	struct block *b;
32617683Spst
32717683Spst	/*
32817683Spst	 * Initialize sets to contain no nodes.
32917683Spst	 */
33017683Spst	memset((char *)all_closure_sets, 0,
33117683Spst	      n_blocks * nodewords * sizeof(*all_closure_sets));
33217683Spst
33317683Spst	/* root->level is the highest level no found. */
33417683Spst	for (i = root->level; i >= 0; --i) {
33517683Spst		for (b = levels[i]; b; b = b->link) {
33617683Spst			SET_INSERT(b->closure, b->id);
33717683Spst			if (JT(b) == 0)
33817683Spst				continue;
33917683Spst			SET_UNION(JT(b)->closure, b->closure, nodewords);
34017683Spst			SET_UNION(JF(b)->closure, b->closure, nodewords);
34117683Spst		}
34217683Spst	}
34317683Spst}
34417683Spst
34517683Spst/*
34617683Spst * Return the register number that is used by s.  If A and X are both
34717683Spst * used, return AX_ATOM.  If no register is used, return -1.
34817683Spst *
34917683Spst * The implementation should probably change to an array access.
35017683Spst */
35117683Spststatic int
35217683Spstatomuse(s)
35317683Spst	struct stmt *s;
35417683Spst{
35517683Spst	register int c = s->code;
35617683Spst
35717683Spst	if (c == NOP)
35817683Spst		return -1;
35917683Spst
36017683Spst	switch (BPF_CLASS(c)) {
36117683Spst
36217683Spst	case BPF_RET:
36317683Spst		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
36417683Spst			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
36517683Spst
36617683Spst	case BPF_LD:
36717683Spst	case BPF_LDX:
36817683Spst		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
36917683Spst			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
37017683Spst
37117683Spst	case BPF_ST:
37217683Spst		return A_ATOM;
37317683Spst
37417683Spst	case BPF_STX:
37517683Spst		return X_ATOM;
37617683Spst
37717683Spst	case BPF_JMP:
37817683Spst	case BPF_ALU:
37917683Spst		if (BPF_SRC(c) == BPF_X)
38017683Spst			return AX_ATOM;
38117683Spst		return A_ATOM;
38217683Spst
38317683Spst	case BPF_MISC:
38417683Spst		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
38517683Spst	}
38617683Spst	abort();
38717683Spst	/* NOTREACHED */
38817683Spst}
38917683Spst
39017683Spst/*
39117683Spst * Return the register number that is defined by 's'.  We assume that
39217683Spst * a single stmt cannot define more than one register.  If no register
39317683Spst * is defined, return -1.
39417683Spst *
39517683Spst * The implementation should probably change to an array access.
39617683Spst */
39717683Spststatic int
39817683Spstatomdef(s)
39917683Spst	struct stmt *s;
40017683Spst{
40117683Spst	if (s->code == NOP)
40217683Spst		return -1;
40317683Spst
40417683Spst	switch (BPF_CLASS(s->code)) {
40517683Spst
40617683Spst	case BPF_LD:
40717683Spst	case BPF_ALU:
40817683Spst		return A_ATOM;
40917683Spst
41017683Spst	case BPF_LDX:
41117683Spst		return X_ATOM;
41217683Spst
41317683Spst	case BPF_ST:
41417683Spst	case BPF_STX:
41517683Spst		return s->k;
41617683Spst
41717683Spst	case BPF_MISC:
41817683Spst		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
41917683Spst	}
42017683Spst	return -1;
42117683Spst}
42217683Spst
42317683Spststatic void
42417683Spstcompute_local_ud(b)
42517683Spst	struct block *b;
42617683Spst{
42717683Spst	struct slist *s;
42817683Spst	atomset def = 0, use = 0, kill = 0;
42917683Spst	int atom;
43017683Spst
43117683Spst	for (s = b->stmts; s; s = s->next) {
43217683Spst		if (s->s.code == NOP)
43317683Spst			continue;
43417683Spst		atom = atomuse(&s->s);
43517683Spst		if (atom >= 0) {
43617683Spst			if (atom == AX_ATOM) {
43717683Spst				if (!ATOMELEM(def, X_ATOM))
43817683Spst					use |= ATOMMASK(X_ATOM);
43917683Spst				if (!ATOMELEM(def, A_ATOM))
44017683Spst					use |= ATOMMASK(A_ATOM);
44117683Spst			}
44217683Spst			else if (atom < N_ATOMS) {
44317683Spst				if (!ATOMELEM(def, atom))
44417683Spst					use |= ATOMMASK(atom);
44517683Spst			}
44617683Spst			else
44717683Spst				abort();
44817683Spst		}
44917683Spst		atom = atomdef(&s->s);
45017683Spst		if (atom >= 0) {
45117683Spst			if (!ATOMELEM(use, atom))
45217683Spst				kill |= ATOMMASK(atom);
45317683Spst			def |= ATOMMASK(atom);
45417683Spst		}
45517683Spst	}
45617683Spst	if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
45717683Spst		use |= ATOMMASK(A_ATOM);
45817683Spst
45917683Spst	b->def = def;
46017683Spst	b->kill = kill;
46117683Spst	b->in_use = use;
46217683Spst}
46317683Spst
46417683Spst/*
46517683Spst * Assume graph is already leveled.
46617683Spst */
46717683Spststatic void
46817683Spstfind_ud(root)
46917683Spst	struct block *root;
47017683Spst{
47117683Spst	int i, maxlevel;
47217683Spst	struct block *p;
47317683Spst
47417683Spst	/*
47517683Spst	 * root->level is the highest level no found;
47617683Spst	 * count down from there.
47717683Spst	 */
47817683Spst	maxlevel = root->level;
47917683Spst	for (i = maxlevel; i >= 0; --i)
48017683Spst		for (p = levels[i]; p; p = p->link) {
48117683Spst			compute_local_ud(p);
48217683Spst			p->out_use = 0;
48317683Spst		}
48417683Spst
48517683Spst	for (i = 1; i <= maxlevel; ++i) {
48617683Spst		for (p = levels[i]; p; p = p->link) {
48717683Spst			p->out_use |= JT(p)->in_use | JF(p)->in_use;
48817683Spst			p->in_use |= p->out_use &~ p->kill;
48917683Spst		}
49017683Spst	}
49117683Spst}
49217683Spst
49317683Spst/*
49417683Spst * These data structures are used in a Cocke and Shwarz style
49517683Spst * value numbering scheme.  Since the flowgraph is acyclic,
49617683Spst * exit values can be propagated from a node's predecessors
49717683Spst * provided it is uniquely defined.
49817683Spst */
49917683Spststruct valnode {
50017683Spst	int code;
50117683Spst	int v0, v1;
50217683Spst	int val;
50317683Spst	struct valnode *next;
50417683Spst};
50517683Spst
50617683Spst#define MODULUS 213
50717683Spststatic struct valnode *hashtbl[MODULUS];
50817683Spststatic int curval;
50917683Spststatic int maxval;
51017683Spst
51117683Spst/* Integer constants mapped with the load immediate opcode. */
51217683Spst#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
51317683Spst
51417683Spststruct vmapinfo {
51517683Spst	int is_const;
51617683Spst	bpf_int32 const_val;
51717683Spst};
51817683Spst
51917683Spststruct vmapinfo *vmap;
52017683Spststruct valnode *vnode_base;
52117683Spststruct valnode *next_vnode;
52217683Spst
52317683Spststatic void
52417683Spstinit_val()
52517683Spst{
52617683Spst	curval = 0;
52717683Spst	next_vnode = vnode_base;
52817683Spst	memset((char *)vmap, 0, maxval * sizeof(*vmap));
52917683Spst	memset((char *)hashtbl, 0, sizeof hashtbl);
53017683Spst}
53117683Spst
53217683Spst/* Because we really don't have an IR, this stuff is a little messy. */
53317683Spststatic int
53417683SpstF(code, v0, v1)
53517683Spst	int code;
53617683Spst	int v0, v1;
53717683Spst{
53817683Spst	u_int hash;
53917683Spst	int val;
54017683Spst	struct valnode *p;
54117683Spst
54217683Spst	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
54317683Spst	hash %= MODULUS;
54417683Spst
54517683Spst	for (p = hashtbl[hash]; p; p = p->next)
54617683Spst		if (p->code == code && p->v0 == v0 && p->v1 == v1)
54717683Spst			return p->val;
54817683Spst
54917683Spst	val = ++curval;
55017683Spst	if (BPF_MODE(code) == BPF_IMM &&
55117683Spst	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
55217683Spst		vmap[val].const_val = v0;
55317683Spst		vmap[val].is_const = 1;
55417683Spst	}
55517683Spst	p = next_vnode++;
55617683Spst	p->val = val;
55717683Spst	p->code = code;
55817683Spst	p->v0 = v0;
55917683Spst	p->v1 = v1;
56017683Spst	p->next = hashtbl[hash];
56117683Spst	hashtbl[hash] = p;
56217683Spst
56317683Spst	return val;
56417683Spst}
56517683Spst
56617683Spststatic inline void
56717683Spstvstore(s, valp, newval, alter)
56817683Spst	struct stmt *s;
56917683Spst	int *valp;
57017683Spst	int newval;
57117683Spst	int alter;
57217683Spst{
57317683Spst	if (alter && *valp == newval)
57417683Spst		s->code = NOP;
57517683Spst	else
57617683Spst		*valp = newval;
57717683Spst}
57817683Spst
57917683Spststatic void
58017683Spstfold_op(s, v0, v1)
58117683Spst	struct stmt *s;
58217683Spst	int v0, v1;
58317683Spst{
58417683Spst	bpf_int32 a, b;
58517683Spst
58617683Spst	a = vmap[v0].const_val;
58717683Spst	b = vmap[v1].const_val;
58817683Spst
58917683Spst	switch (BPF_OP(s->code)) {
59017683Spst	case BPF_ADD:
59117683Spst		a += b;
59217683Spst		break;
59317683Spst
59417683Spst	case BPF_SUB:
59517683Spst		a -= b;
59617683Spst		break;
59717683Spst
59817683Spst	case BPF_MUL:
59917683Spst		a *= b;
60017683Spst		break;
60117683Spst
60217683Spst	case BPF_DIV:
60317683Spst		if (b == 0)
60417683Spst			bpf_error("division by zero");
60517683Spst		a /= b;
60617683Spst		break;
60717683Spst
60817683Spst	case BPF_AND:
60917683Spst		a &= b;
61017683Spst		break;
61117683Spst
61217683Spst	case BPF_OR:
61317683Spst		a |= b;
61417683Spst		break;
61517683Spst
61617683Spst	case BPF_LSH:
61717683Spst		a <<= b;
61817683Spst		break;
61917683Spst
62017683Spst	case BPF_RSH:
62117683Spst		a >>= b;
62217683Spst		break;
62317683Spst
62417683Spst	case BPF_NEG:
62517683Spst		a = -a;
62617683Spst		break;
62717683Spst
62817683Spst	default:
62917683Spst		abort();
63017683Spst	}
63117683Spst	s->k = a;
63217683Spst	s->code = BPF_LD|BPF_IMM;
63317683Spst	done = 0;
63417683Spst}
63517683Spst
63617683Spststatic inline struct slist *
63717683Spstthis_op(s)
63817683Spst	struct slist *s;
63917683Spst{
64017683Spst	while (s != 0 && s->s.code == NOP)
64117683Spst		s = s->next;
64217683Spst	return s;
64317683Spst}
64417683Spst
64517683Spststatic void
64617683Spstopt_not(b)
64717683Spst	struct block *b;
64817683Spst{
64917683Spst	struct block *tmp = JT(b);
65017683Spst
65117683Spst	JT(b) = JF(b);
65217683Spst	JF(b) = tmp;
65317683Spst}
65417683Spst
65517683Spststatic void
65617683Spstopt_peep(b)
65717683Spst	struct block *b;
65817683Spst{
65917683Spst	struct slist *s;
66017683Spst	struct slist *next, *last;
66117683Spst	int val;
66217683Spst
66317683Spst	s = b->stmts;
66417683Spst	if (s == 0)
66517683Spst		return;
66617683Spst
66717683Spst	last = s;
66817683Spst	while (1) {
66917683Spst		s = this_op(s);
67017683Spst		if (s == 0)
67117683Spst			break;
67217683Spst		next = this_op(s->next);
67317683Spst		if (next == 0)
67417683Spst			break;
67517683Spst		last = next;
67617683Spst
67717683Spst		/*
67817683Spst		 * st  M[k]	-->	st  M[k]
67917683Spst		 * ldx M[k]		tax
68017683Spst		 */
68117683Spst		if (s->s.code == BPF_ST &&
68217683Spst		    next->s.code == (BPF_LDX|BPF_MEM) &&
68317683Spst		    s->s.k == next->s.k) {
68417683Spst			done = 0;
68517683Spst			next->s.code = BPF_MISC|BPF_TAX;
68617683Spst		}
68717683Spst		/*
68817683Spst		 * ld  #k	-->	ldx  #k
68917683Spst		 * tax			txa
69017683Spst		 */
69117683Spst		if (s->s.code == (BPF_LD|BPF_IMM) &&
69217683Spst		    next->s.code == (BPF_MISC|BPF_TAX)) {
69317683Spst			s->s.code = BPF_LDX|BPF_IMM;
69417683Spst			next->s.code = BPF_MISC|BPF_TXA;
69517683Spst			done = 0;
69617683Spst		}
69717683Spst		/*
69817683Spst		 * This is an ugly special case, but it happens
69917683Spst		 * when you say tcp[k] or udp[k] where k is a constant.
70017683Spst		 */
70117683Spst		if (s->s.code == (BPF_LD|BPF_IMM)) {
70217683Spst			struct slist *add, *tax, *ild;
70317683Spst
70417683Spst			/*
70517683Spst			 * Check that X isn't used on exit from this
70617683Spst			 * block (which the optimizer might cause).
70717683Spst			 * We know the code generator won't generate
70817683Spst			 * any local dependencies.
70917683Spst			 */
71017683Spst			if (ATOMELEM(b->out_use, X_ATOM))
71117683Spst				break;
71217683Spst
71317683Spst			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
71417683Spst				add = next;
71517683Spst			else
71617683Spst				add = this_op(next->next);
71717683Spst			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
71817683Spst				break;
71917683Spst
72017683Spst			tax = this_op(add->next);
72117683Spst			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
72217683Spst				break;
72317683Spst
72417683Spst			ild = this_op(tax->next);
72517683Spst			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
72617683Spst			    BPF_MODE(ild->s.code) != BPF_IND)
72717683Spst				break;
72817683Spst			/*
72917683Spst			 * XXX We need to check that X is not
73017683Spst			 * subsequently used.  We know we can eliminate the
73117683Spst			 * accumulator modifications since it is defined
73217683Spst			 * by the last stmt of this sequence.
73317683Spst			 *
73417683Spst			 * We want to turn this sequence:
73517683Spst			 *
73617683Spst			 * (004) ldi     #0x2		{s}
73717683Spst			 * (005) ldxms   [14]		{next}  -- optional
73817683Spst			 * (006) addx			{add}
73917683Spst			 * (007) tax			{tax}
74017683Spst			 * (008) ild     [x+0]		{ild}
74117683Spst			 *
74217683Spst			 * into this sequence:
74317683Spst			 *
74417683Spst			 * (004) nop
74517683Spst			 * (005) ldxms   [14]
74617683Spst			 * (006) nop
74717683Spst			 * (007) nop
74817683Spst			 * (008) ild     [x+2]
74917683Spst			 *
75017683Spst			 */
75117683Spst			ild->s.k += s->s.k;
75217683Spst			s->s.code = NOP;
75317683Spst			add->s.code = NOP;
75417683Spst			tax->s.code = NOP;
75517683Spst			done = 0;
75617683Spst		}
75717683Spst		s = next;
75817683Spst	}
75917683Spst	/*
76017683Spst	 * If we have a subtract to do a comparison, and the X register
76117683Spst	 * is a known constant, we can merge this value into the
76217683Spst	 * comparison.
76317683Spst	 */
76417683Spst	if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
76517683Spst	    !ATOMELEM(b->out_use, A_ATOM)) {
76617683Spst		val = b->val[X_ATOM];
76717683Spst		if (vmap[val].is_const) {
76817683Spst			int op;
76917683Spst
77017683Spst			b->s.k += vmap[val].const_val;
77117683Spst			op = BPF_OP(b->s.code);
77217683Spst			if (op == BPF_JGT || op == BPF_JGE) {
77317683Spst				struct block *t = JT(b);
77417683Spst				JT(b) = JF(b);
77517683Spst				JF(b) = t;
77617683Spst				b->s.k += 0x80000000;
77717683Spst			}
77817683Spst			last->s.code = NOP;
77917683Spst			done = 0;
78017683Spst		} else if (b->s.k == 0) {
78117683Spst			/*
78217683Spst			 * sub x  ->	nop
78317683Spst			 * j  #0	j  x
78417683Spst			 */
78517683Spst			last->s.code = NOP;
78617683Spst			b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
78717683Spst				BPF_X;
78817683Spst			done = 0;
78917683Spst		}
79017683Spst	}
79117683Spst	/*
79217683Spst	 * Likewise, a constant subtract can be simplified.
79317683Spst	 */
79417683Spst	else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
79517683Spst		 !ATOMELEM(b->out_use, A_ATOM)) {
79617683Spst		int op;
79717683Spst
79817683Spst		b->s.k += last->s.k;
79917683Spst		last->s.code = NOP;
80017683Spst		op = BPF_OP(b->s.code);
80117683Spst		if (op == BPF_JGT || op == BPF_JGE) {
80217683Spst			struct block *t = JT(b);
80317683Spst			JT(b) = JF(b);
80417683Spst			JF(b) = t;
80517683Spst			b->s.k += 0x80000000;
80617683Spst		}
80717683Spst		done = 0;
80817683Spst	}
80917683Spst	/*
81017683Spst	 * and #k	nop
81117683Spst	 * jeq #0  ->	jset #k
81217683Spst	 */
81317683Spst	if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
81417683Spst	    !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
81517683Spst		b->s.k = last->s.k;
81617683Spst		b->s.code = BPF_JMP|BPF_K|BPF_JSET;
81717683Spst		last->s.code = NOP;
81817683Spst		done = 0;
81917683Spst		opt_not(b);
82017683Spst	}
82117683Spst	/*
82217683Spst	 * If the accumulator is a known constant, we can compute the
82317683Spst	 * comparison result.
82417683Spst	 */
82517683Spst	val = b->val[A_ATOM];
82617683Spst	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
82717683Spst		bpf_int32 v = vmap[val].const_val;
82817683Spst		switch (BPF_OP(b->s.code)) {
82917683Spst
83017683Spst		case BPF_JEQ:
83117683Spst			v = v == b->s.k;
83217683Spst			break;
83317683Spst
83417683Spst		case BPF_JGT:
83517683Spst			v = (unsigned)v > b->s.k;
83617683Spst			break;
83717683Spst
83817683Spst		case BPF_JGE:
83917683Spst			v = (unsigned)v >= b->s.k;
84017683Spst			break;
84117683Spst
84217683Spst		case BPF_JSET:
84317683Spst			v &= b->s.k;
84417683Spst			break;
84517683Spst
84617683Spst		default:
84717683Spst			abort();
84817683Spst		}
84917683Spst		if (JF(b) != JT(b))
85017683Spst			done = 0;
85117683Spst		if (v)
85217683Spst			JF(b) = JT(b);
85317683Spst		else
85417683Spst			JT(b) = JF(b);
85517683Spst	}
85617683Spst}
85717683Spst
85817683Spst/*
85917683Spst * Compute the symbolic value of expression of 's', and update
86017683Spst * anything it defines in the value table 'val'.  If 'alter' is true,
86117683Spst * do various optimizations.  This code would be cleaner if symbolic
86217683Spst * evaluation and code transformations weren't folded together.
86317683Spst */
86417683Spststatic void
86517683Spstopt_stmt(s, val, alter)
86617683Spst	struct stmt *s;
86717683Spst	int val[];
86817683Spst	int alter;
86917683Spst{
87017683Spst	int op;
87117683Spst	int v;
87217683Spst
87317683Spst	switch (s->code) {
87417683Spst
87517683Spst	case BPF_LD|BPF_ABS|BPF_W:
87617683Spst	case BPF_LD|BPF_ABS|BPF_H:
87717683Spst	case BPF_LD|BPF_ABS|BPF_B:
87817683Spst		v = F(s->code, s->k, 0L);
87917683Spst		vstore(s, &val[A_ATOM], v, alter);
88017683Spst		break;
88117683Spst
88217683Spst	case BPF_LD|BPF_IND|BPF_W:
88317683Spst	case BPF_LD|BPF_IND|BPF_H:
88417683Spst	case BPF_LD|BPF_IND|BPF_B:
88517683Spst		v = val[X_ATOM];
88617683Spst		if (alter && vmap[v].is_const) {
88717683Spst			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
88817683Spst			s->k += vmap[v].const_val;
88917683Spst			v = F(s->code, s->k, 0L);
89017683Spst			done = 0;
89117683Spst		}
89217683Spst		else
89317683Spst			v = F(s->code, s->k, v);
89417683Spst		vstore(s, &val[A_ATOM], v, alter);
89517683Spst		break;
89617683Spst
89717683Spst	case BPF_LD|BPF_LEN:
89817683Spst		v = F(s->code, 0L, 0L);
89917683Spst		vstore(s, &val[A_ATOM], v, alter);
90017683Spst		break;
90117683Spst
90217683Spst	case BPF_LD|BPF_IMM:
90317683Spst		v = K(s->k);
90417683Spst		vstore(s, &val[A_ATOM], v, alter);
90517683Spst		break;
90617683Spst
90717683Spst	case BPF_LDX|BPF_IMM:
90817683Spst		v = K(s->k);
90917683Spst		vstore(s, &val[X_ATOM], v, alter);
91017683Spst		break;
91117683Spst
91217683Spst	case BPF_LDX|BPF_MSH|BPF_B:
91317683Spst		v = F(s->code, s->k, 0L);
91417683Spst		vstore(s, &val[X_ATOM], v, alter);
91517683Spst		break;
91617683Spst
91717683Spst	case BPF_ALU|BPF_NEG:
91817683Spst		if (alter && vmap[val[A_ATOM]].is_const) {
91917683Spst			s->code = BPF_LD|BPF_IMM;
92017683Spst			s->k = -vmap[val[A_ATOM]].const_val;
92117683Spst			val[A_ATOM] = K(s->k);
92217683Spst		}
92317683Spst		else
92417683Spst			val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
92517683Spst		break;
92617683Spst
92717683Spst	case BPF_ALU|BPF_ADD|BPF_K:
92817683Spst	case BPF_ALU|BPF_SUB|BPF_K:
92917683Spst	case BPF_ALU|BPF_MUL|BPF_K:
93017683Spst	case BPF_ALU|BPF_DIV|BPF_K:
93117683Spst	case BPF_ALU|BPF_AND|BPF_K:
93217683Spst	case BPF_ALU|BPF_OR|BPF_K:
93317683Spst	case BPF_ALU|BPF_LSH|BPF_K:
93417683Spst	case BPF_ALU|BPF_RSH|BPF_K:
93517683Spst		op = BPF_OP(s->code);
93617683Spst		if (alter) {
93717683Spst			if (s->k == 0) {
93817683Spst				if (op == BPF_ADD || op == BPF_SUB ||
93917683Spst				    op == BPF_LSH || op == BPF_RSH ||
94017683Spst				    op == BPF_OR) {
94117683Spst					s->code = NOP;
94217683Spst					break;
94317683Spst				}
94417683Spst				if (op == BPF_MUL || op == BPF_AND) {
94517683Spst					s->code = BPF_LD|BPF_IMM;
94617683Spst					val[A_ATOM] = K(s->k);
94717683Spst					break;
94817683Spst				}
94917683Spst			}
95017683Spst			if (vmap[val[A_ATOM]].is_const) {
95117683Spst				fold_op(s, val[A_ATOM], K(s->k));
95217683Spst				val[A_ATOM] = K(s->k);
95317683Spst				break;
95417683Spst			}
95517683Spst		}
95617683Spst		val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
95717683Spst		break;
95817683Spst
95917683Spst	case BPF_ALU|BPF_ADD|BPF_X:
96017683Spst	case BPF_ALU|BPF_SUB|BPF_X:
96117683Spst	case BPF_ALU|BPF_MUL|BPF_X:
96217683Spst	case BPF_ALU|BPF_DIV|BPF_X:
96317683Spst	case BPF_ALU|BPF_AND|BPF_X:
96417683Spst	case BPF_ALU|BPF_OR|BPF_X:
96517683Spst	case BPF_ALU|BPF_LSH|BPF_X:
96617683Spst	case BPF_ALU|BPF_RSH|BPF_X:
96717683Spst		op = BPF_OP(s->code);
96817683Spst		if (alter && vmap[val[X_ATOM]].is_const) {
96917683Spst			if (vmap[val[A_ATOM]].is_const) {
97017683Spst				fold_op(s, val[A_ATOM], val[X_ATOM]);
97117683Spst				val[A_ATOM] = K(s->k);
97217683Spst			}
97317683Spst			else {
97417683Spst				s->code = BPF_ALU|BPF_K|op;
97517683Spst				s->k = vmap[val[X_ATOM]].const_val;
97617683Spst				done = 0;
97717683Spst				val[A_ATOM] =
97817683Spst					F(s->code, val[A_ATOM], K(s->k));
97917683Spst			}
98017683Spst			break;
98117683Spst		}
98217683Spst		/*
98317683Spst		 * Check if we're doing something to an accumulator
98417683Spst		 * that is 0, and simplify.  This may not seem like
98517683Spst		 * much of a simplification but it could open up further
98617683Spst		 * optimizations.
98717683Spst		 * XXX We could also check for mul by 1, and -1, etc.
98817683Spst		 */
98917683Spst		if (alter && vmap[val[A_ATOM]].is_const
99017683Spst		    && vmap[val[A_ATOM]].const_val == 0) {
99117683Spst			if (op == BPF_ADD || op == BPF_OR ||
99217683Spst			    op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
99317683Spst				s->code = BPF_MISC|BPF_TXA;
99417683Spst				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
99517683Spst				break;
99617683Spst			}
99717683Spst			else if (op == BPF_MUL || op == BPF_DIV ||
99817683Spst				 op == BPF_AND) {
99917683Spst				s->code = BPF_LD|BPF_IMM;
100017683Spst				s->k = 0;
100117683Spst				vstore(s, &val[A_ATOM], K(s->k), alter);
100217683Spst				break;
100317683Spst			}
100417683Spst			else if (op == BPF_NEG) {
100517683Spst				s->code = NOP;
100617683Spst				break;
100717683Spst			}
100817683Spst		}
100917683Spst		val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
101017683Spst		break;
101117683Spst
101217683Spst	case BPF_MISC|BPF_TXA:
101317683Spst		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
101417683Spst		break;
101517683Spst
101617683Spst	case BPF_LD|BPF_MEM:
101717683Spst		v = val[s->k];
101817683Spst		if (alter && vmap[v].is_const) {
101917683Spst			s->code = BPF_LD|BPF_IMM;
102017683Spst			s->k = vmap[v].const_val;
102117683Spst			done = 0;
102217683Spst		}
102317683Spst		vstore(s, &val[A_ATOM], v, alter);
102417683Spst		break;
102517683Spst
102617683Spst	case BPF_MISC|BPF_TAX:
102717683Spst		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
102817683Spst		break;
102917683Spst
103017683Spst	case BPF_LDX|BPF_MEM:
103117683Spst		v = val[s->k];
103217683Spst		if (alter && vmap[v].is_const) {
103317683Spst			s->code = BPF_LDX|BPF_IMM;
103417683Spst			s->k = vmap[v].const_val;
103517683Spst			done = 0;
103617683Spst		}
103717683Spst		vstore(s, &val[X_ATOM], v, alter);
103817683Spst		break;
103917683Spst
104017683Spst	case BPF_ST:
104117683Spst		vstore(s, &val[s->k], val[A_ATOM], alter);
104217683Spst		break;
104317683Spst
104417683Spst	case BPF_STX:
104517683Spst		vstore(s, &val[s->k], val[X_ATOM], alter);
104617683Spst		break;
104717683Spst	}
104817683Spst}
104917683Spst
105017683Spststatic void
105117683Spstdeadstmt(s, last)
105217683Spst	register struct stmt *s;
105317683Spst	register struct stmt *last[];
105417683Spst{
105517683Spst	register int atom;
105617683Spst
105717683Spst	atom = atomuse(s);
105817683Spst	if (atom >= 0) {
105917683Spst		if (atom == AX_ATOM) {
106017683Spst			last[X_ATOM] = 0;
106117683Spst			last[A_ATOM] = 0;
106217683Spst		}
106317683Spst		else
106417683Spst			last[atom] = 0;
106517683Spst	}
106617683Spst	atom = atomdef(s);
106717683Spst	if (atom >= 0) {
106817683Spst		if (last[atom]) {
106917683Spst			done = 0;
107017683Spst			last[atom]->code = NOP;
107117683Spst		}
107217683Spst		last[atom] = s;
107317683Spst	}
107417683Spst}
107517683Spst
107617683Spststatic void
107717683Spstopt_deadstores(b)
107817683Spst	register struct block *b;
107917683Spst{
108017683Spst	register struct slist *s;
108117683Spst	register int atom;
108217683Spst	struct stmt *last[N_ATOMS];
108317683Spst
108417683Spst	memset((char *)last, 0, sizeof last);
108517683Spst
108617683Spst	for (s = b->stmts; s != 0; s = s->next)
108717683Spst		deadstmt(&s->s, last);
108817683Spst	deadstmt(&b->s, last);
108917683Spst
109017683Spst	for (atom = 0; atom < N_ATOMS; ++atom)
109117683Spst		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
109217683Spst			last[atom]->code = NOP;
109317683Spst			done = 0;
109417683Spst		}
109517683Spst}
109617683Spst
109717683Spststatic void
109817683Spstopt_blk(b, do_stmts)
109917683Spst	struct block *b;
110017683Spst	int do_stmts;
110117683Spst{
110217683Spst	struct slist *s;
110317683Spst	struct edge *p;
110417683Spst	int i;
110517683Spst	bpf_int32 aval;
110617683Spst
110717683Spst	/*
110817683Spst	 * Initialize the atom values.
110917683Spst	 * If we have no predecessors, everything is undefined.
111017683Spst	 * Otherwise, we inherent our values from our predecessors.
111117683Spst	 * If any register has an ambiguous value (i.e. control paths are
111217683Spst	 * merging) give it the undefined value of 0.
111317683Spst	 */
111417683Spst	p = b->in_edges;
111517683Spst	if (p == 0)
111617683Spst		memset((char *)b->val, 0, sizeof(b->val));
111717683Spst	else {
111817683Spst		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
111917683Spst		while ((p = p->next) != NULL) {
112017683Spst			for (i = 0; i < N_ATOMS; ++i)
112117683Spst				if (b->val[i] != p->pred->val[i])
112217683Spst					b->val[i] = 0;
112317683Spst		}
112417683Spst	}
112517683Spst	aval = b->val[A_ATOM];
112617683Spst	for (s = b->stmts; s; s = s->next)
112717683Spst		opt_stmt(&s->s, b->val, do_stmts);
112817683Spst
112917683Spst	/*
113017683Spst	 * This is a special case: if we don't use anything from this
113117683Spst	 * block, and we load the accumulator with value that is
113217683Spst	 * already there, or if this block is a return,
113317683Spst	 * eliminate all the statements.
113417683Spst	 */
113517683Spst	if (do_stmts &&
113617683Spst	    ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
113717683Spst	     BPF_CLASS(b->s.code) == BPF_RET)) {
113817683Spst		if (b->stmts != 0) {
113917683Spst			b->stmts = 0;
114017683Spst			done = 0;
114117683Spst		}
114217683Spst	} else {
114317683Spst		opt_peep(b);
114417683Spst		opt_deadstores(b);
114517683Spst	}
114617683Spst	/*
114717683Spst	 * Set up values for branch optimizer.
114817683Spst	 */
114917683Spst	if (BPF_SRC(b->s.code) == BPF_K)
115017683Spst		b->oval = K(b->s.k);
115117683Spst	else
115217683Spst		b->oval = b->val[X_ATOM];
115317683Spst	b->et.code = b->s.code;
115417683Spst	b->ef.code = -b->s.code;
115517683Spst}
115617683Spst
115717683Spst/*
115817683Spst * Return true if any register that is used on exit from 'succ', has
115917683Spst * an exit value that is different from the corresponding exit value
116017683Spst * from 'b'.
116117683Spst */
116217683Spststatic int
116317683Spstuse_conflict(b, succ)
116417683Spst	struct block *b, *succ;
116517683Spst{
116617683Spst	int atom;
116717683Spst	atomset use = succ->out_use;
116817683Spst
116917683Spst	if (use == 0)
117017683Spst		return 0;
117117683Spst
117217683Spst	for (atom = 0; atom < N_ATOMS; ++atom)
117317683Spst		if (ATOMELEM(use, atom))
117417683Spst			if (b->val[atom] != succ->val[atom])
117517683Spst				return 1;
117617683Spst	return 0;
117717683Spst}
117817683Spst
117917683Spststatic struct block *
118017683Spstfold_edge(child, ep)
118117683Spst	struct block *child;
118217683Spst	struct edge *ep;
118317683Spst{
118417683Spst	int sense;
118517683Spst	int aval0, aval1, oval0, oval1;
118617683Spst	int code = ep->code;
118717683Spst
118817683Spst	if (code < 0) {
118917683Spst		code = -code;
119017683Spst		sense = 0;
119117683Spst	} else
119217683Spst		sense = 1;
119317683Spst
119417683Spst	if (child->s.code != code)
119517683Spst		return 0;
119617683Spst
119717683Spst	aval0 = child->val[A_ATOM];
119817683Spst	oval0 = child->oval;
119917683Spst	aval1 = ep->pred->val[A_ATOM];
120017683Spst	oval1 = ep->pred->oval;
120117683Spst
120217683Spst	if (aval0 != aval1)
120317683Spst		return 0;
120417683Spst
120517683Spst	if (oval0 == oval1)
120617683Spst		/*
120717683Spst		 * The operands are identical, so the
120817683Spst		 * result is true if a true branch was
120917683Spst		 * taken to get here, otherwise false.
121017683Spst		 */
121117683Spst		return sense ? JT(child) : JF(child);
121217683Spst
121317683Spst	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
121417683Spst		/*
121517683Spst		 * At this point, we only know the comparison if we
121617683Spst		 * came down the true branch, and it was an equality
121717683Spst		 * comparison with a constant.  We rely on the fact that
121817683Spst		 * distinct constants have distinct value numbers.
121917683Spst		 */
122017683Spst		return JF(child);
122117683Spst
122217683Spst	return 0;
122317683Spst}
122417683Spst
122517683Spststatic void
122617683Spstopt_j(ep)
122717683Spst	struct edge *ep;
122817683Spst{
122917683Spst	register int i, k;
123017683Spst	register struct block *target;
123117683Spst
123217683Spst	if (JT(ep->succ) == 0)
123317683Spst		return;
123417683Spst
123517683Spst	if (JT(ep->succ) == JF(ep->succ)) {
123617683Spst		/*
123717683Spst		 * Common branch targets can be eliminated, provided
123817683Spst		 * there is no data dependency.
123917683Spst		 */
124017683Spst		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
124117683Spst			done = 0;
124217683Spst			ep->succ = JT(ep->succ);
124317683Spst		}
124417683Spst	}
124517683Spst	/*
124617683Spst	 * For each edge dominator that matches the successor of this
124717683Spst	 * edge, promote the edge successor to the its grandchild.
124817683Spst	 *
124917683Spst	 * XXX We violate the set abstraction here in favor a reasonably
125017683Spst	 * efficient loop.
125117683Spst	 */
125217683Spst top:
125317683Spst	for (i = 0; i < edgewords; ++i) {
125417683Spst		register bpf_u_int32 x = ep->edom[i];
125517683Spst
125617683Spst		while (x != 0) {
125717683Spst			k = ffs(x) - 1;
125817683Spst			x &=~ (1 << k);
125917683Spst			k += i * BITS_PER_WORD;
126017683Spst
126117683Spst			target = fold_edge(ep->succ, edges[k]);
126217683Spst			/*
126317683Spst			 * Check that there is no data dependency between
126417683Spst			 * nodes that will be violated if we move the edge.
126517683Spst			 */
126617683Spst			if (target != 0 && !use_conflict(ep->pred, target)) {
126717683Spst				done = 0;
126817683Spst				ep->succ = target;
126917683Spst				if (JT(target) != 0)
127017683Spst					/*
127117683Spst					 * Start over unless we hit a leaf.
127217683Spst					 */
127317683Spst					goto top;
127417683Spst				return;
127517683Spst			}
127617683Spst		}
127717683Spst	}
127817683Spst}
127917683Spst
128017683Spst
128117683Spststatic void
128217683Spstor_pullup(b)
128317683Spst	struct block *b;
128417683Spst{
128517683Spst	int val, at_top;
128617683Spst	struct block *pull;
128717683Spst	struct block **diffp, **samep;
128817683Spst	struct edge *ep;
128917683Spst
129017683Spst	ep = b->in_edges;
129117683Spst	if (ep == 0)
129217683Spst		return;
129317683Spst
129417683Spst	/*
129517683Spst	 * Make sure each predecessor loads the same value.
129617683Spst	 * XXX why?
129717683Spst	 */
129817683Spst	val = ep->pred->val[A_ATOM];
129917683Spst	for (ep = ep->next; ep != 0; ep = ep->next)
130017683Spst		if (val != ep->pred->val[A_ATOM])
130117683Spst			return;
130217683Spst
130317683Spst	if (JT(b->in_edges->pred) == b)
130417683Spst		diffp = &JT(b->in_edges->pred);
130517683Spst	else
130617683Spst		diffp = &JF(b->in_edges->pred);
130717683Spst
130817683Spst	at_top = 1;
130917683Spst	while (1) {
131017683Spst		if (*diffp == 0)
131117683Spst			return;
131217683Spst
131317683Spst		if (JT(*diffp) != JT(b))
131417683Spst			return;
131517683Spst
131617683Spst		if (!SET_MEMBER((*diffp)->dom, b->id))
131717683Spst			return;
131817683Spst
131917683Spst		if ((*diffp)->val[A_ATOM] != val)
132017683Spst			break;
132117683Spst
132217683Spst		diffp = &JF(*diffp);
132317683Spst		at_top = 0;
132417683Spst	}
132517683Spst	samep = &JF(*diffp);
132617683Spst	while (1) {
132717683Spst		if (*samep == 0)
132817683Spst			return;
132917683Spst
133017683Spst		if (JT(*samep) != JT(b))
133117683Spst			return;
133217683Spst
133317683Spst		if (!SET_MEMBER((*samep)->dom, b->id))
133417683Spst			return;
133517683Spst
133617683Spst		if ((*samep)->val[A_ATOM] == val)
133717683Spst			break;
133817683Spst
133917683Spst		/* XXX Need to check that there are no data dependencies
134017683Spst		   between dp0 and dp1.  Currently, the code generator
134117683Spst		   will not produce such dependencies. */
134217683Spst		samep = &JF(*samep);
134317683Spst	}
134417683Spst#ifdef notdef
134517683Spst	/* XXX This doesn't cover everything. */
134617683Spst	for (i = 0; i < N_ATOMS; ++i)
134717683Spst		if ((*samep)->val[i] != pred->val[i])
134817683Spst			return;
134917683Spst#endif
135017683Spst	/* Pull up the node. */
135117683Spst	pull = *samep;
135217683Spst	*samep = JF(pull);
135317683Spst	JF(pull) = *diffp;
135417683Spst
135517683Spst	/*
135617683Spst	 * At the top of the chain, each predecessor needs to point at the
135717683Spst	 * pulled up node.  Inside the chain, there is only one predecessor
135817683Spst	 * to worry about.
135917683Spst	 */
136017683Spst	if (at_top) {
136117683Spst		for (ep = b->in_edges; ep != 0; ep = ep->next) {
136217683Spst			if (JT(ep->pred) == b)
136317683Spst				JT(ep->pred) = pull;
136417683Spst			else
136517683Spst				JF(ep->pred) = pull;
136617683Spst		}
136717683Spst	}
136817683Spst	else
136917683Spst		*diffp = pull;
137017683Spst
137117683Spst	done = 0;
137217683Spst}
137317683Spst
137417683Spststatic void
137517683Spstand_pullup(b)
137617683Spst	struct block *b;
137717683Spst{
137817683Spst	int val, at_top;
137917683Spst	struct block *pull;
138017683Spst	struct block **diffp, **samep;
138117683Spst	struct edge *ep;
138217683Spst
138317683Spst	ep = b->in_edges;
138417683Spst	if (ep == 0)
138517683Spst		return;
138617683Spst
138717683Spst	/*
138817683Spst	 * Make sure each predecessor loads the same value.
138917683Spst	 */
139017683Spst	val = ep->pred->val[A_ATOM];
139117683Spst	for (ep = ep->next; ep != 0; ep = ep->next)
139217683Spst		if (val != ep->pred->val[A_ATOM])
139317683Spst			return;
139417683Spst
139517683Spst	if (JT(b->in_edges->pred) == b)
139617683Spst		diffp = &JT(b->in_edges->pred);
139717683Spst	else
139817683Spst		diffp = &JF(b->in_edges->pred);
139917683Spst
140017683Spst	at_top = 1;
140117683Spst	while (1) {
140217683Spst		if (*diffp == 0)
140317683Spst			return;
140417683Spst
140517683Spst		if (JF(*diffp) != JF(b))
140617683Spst			return;
140717683Spst
140817683Spst		if (!SET_MEMBER((*diffp)->dom, b->id))
140917683Spst			return;
141017683Spst
141117683Spst		if ((*diffp)->val[A_ATOM] != val)
141217683Spst			break;
141317683Spst
141417683Spst		diffp = &JT(*diffp);
141517683Spst		at_top = 0;
141617683Spst	}
141717683Spst	samep = &JT(*diffp);
141817683Spst	while (1) {
141917683Spst		if (*samep == 0)
142017683Spst			return;
142117683Spst
142217683Spst		if (JF(*samep) != JF(b))
142317683Spst			return;
142417683Spst
142517683Spst		if (!SET_MEMBER((*samep)->dom, b->id))
142617683Spst			return;
142717683Spst
142817683Spst		if ((*samep)->val[A_ATOM] == val)
142917683Spst			break;
143017683Spst
143117683Spst		/* XXX Need to check that there are no data dependencies
143217683Spst		   between diffp and samep.  Currently, the code generator
143317683Spst		   will not produce such dependencies. */
143417683Spst		samep = &JT(*samep);
143517683Spst	}
143617683Spst#ifdef notdef
143717683Spst	/* XXX This doesn't cover everything. */
143817683Spst	for (i = 0; i < N_ATOMS; ++i)
143917683Spst		if ((*samep)->val[i] != pred->val[i])
144017683Spst			return;
144117683Spst#endif
144217683Spst	/* Pull up the node. */
144317683Spst	pull = *samep;
144417683Spst	*samep = JT(pull);
144517683Spst	JT(pull) = *diffp;
144617683Spst
144717683Spst	/*
144817683Spst	 * At the top of the chain, each predecessor needs to point at the
144917683Spst	 * pulled up node.  Inside the chain, there is only one predecessor
145017683Spst	 * to worry about.
145117683Spst	 */
145217683Spst	if (at_top) {
145317683Spst		for (ep = b->in_edges; ep != 0; ep = ep->next) {
145417683Spst			if (JT(ep->pred) == b)
145517683Spst				JT(ep->pred) = pull;
145617683Spst			else
145717683Spst				JF(ep->pred) = pull;
145817683Spst		}
145917683Spst	}
146017683Spst	else
146117683Spst		*diffp = pull;
146217683Spst
146317683Spst	done = 0;
146417683Spst}
146517683Spst
146617683Spststatic void
146717683Spstopt_blks(root, do_stmts)
146817683Spst	struct block *root;
146917683Spst	int do_stmts;
147017683Spst{
147117683Spst	int i, maxlevel;
147217683Spst	struct block *p;
147317683Spst
147417683Spst	init_val();
147517683Spst	maxlevel = root->level;
147617683Spst	for (i = maxlevel; i >= 0; --i)
147717683Spst		for (p = levels[i]; p; p = p->link)
147817683Spst			opt_blk(p, do_stmts);
147917683Spst
148017683Spst	if (do_stmts)
148117683Spst		/*
148217683Spst		 * No point trying to move branches; it can't possibly
148317683Spst		 * make a difference at this point.
148417683Spst		 */
148517683Spst		return;
148617683Spst
148717683Spst	for (i = 1; i <= maxlevel; ++i) {
148817683Spst		for (p = levels[i]; p; p = p->link) {
148917683Spst			opt_j(&p->et);
149017683Spst			opt_j(&p->ef);
149117683Spst		}
149217683Spst	}
149317683Spst	for (i = 1; i <= maxlevel; ++i) {
149417683Spst		for (p = levels[i]; p; p = p->link) {
149517683Spst			or_pullup(p);
149617683Spst			and_pullup(p);
149717683Spst		}
149817683Spst	}
149917683Spst}
150017683Spst
150117683Spststatic inline void
150217683Spstlink_inedge(parent, child)
150317683Spst	struct edge *parent;
150417683Spst	struct block *child;
150517683Spst{
150617683Spst	parent->next = child->in_edges;
150717683Spst	child->in_edges = parent;
150817683Spst}
150917683Spst
151017683Spststatic void
151117683Spstfind_inedges(root)
151217683Spst	struct block *root;
151317683Spst{
151417683Spst	int i;
151517683Spst	struct block *b;
151617683Spst
151717683Spst	for (i = 0; i < n_blocks; ++i)
151817683Spst		blocks[i]->in_edges = 0;
151917683Spst
152017683Spst	/*
152117683Spst	 * Traverse the graph, adding each edge to the predecessor
152217683Spst	 * list of its successors.  Skip the leaves (i.e. level 0).
152317683Spst	 */
152417683Spst	for (i = root->level; i > 0; --i) {
152517683Spst		for (b = levels[i]; b != 0; b = b->link) {
152617683Spst			link_inedge(&b->et, JT(b));
152717683Spst			link_inedge(&b->ef, JF(b));
152817683Spst		}
152917683Spst	}
153017683Spst}
153117683Spst
153217683Spststatic void
153317683Spstopt_root(b)
153417683Spst	struct block **b;
153517683Spst{
153617683Spst	struct slist *tmp, *s;
153717683Spst
153817683Spst	s = (*b)->stmts;
153917683Spst	(*b)->stmts = 0;
154017683Spst	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
154117683Spst		*b = JT(*b);
154217683Spst
154317683Spst	tmp = (*b)->stmts;
154417683Spst	if (tmp != 0)
154517683Spst		sappend(s, tmp);
154617683Spst	(*b)->stmts = s;
154717683Spst
154817683Spst	/*
154917683Spst	 * If the root node is a return, then there is no
155017683Spst	 * point executing any statements (since the bpf machine
155117683Spst	 * has no side effects).
155217683Spst	 */
155317683Spst	if (BPF_CLASS((*b)->s.code) == BPF_RET)
155417683Spst		(*b)->stmts = 0;
155517683Spst}
155617683Spst
155717683Spststatic void
155817683Spstopt_loop(root, do_stmts)
155917683Spst	struct block *root;
156017683Spst	int do_stmts;
156117683Spst{
156217683Spst
156317683Spst#ifdef BDEBUG
156417683Spst	if (dflag > 1)
156517683Spst		opt_dump(root);
156617683Spst#endif
156717683Spst	do {
156817683Spst		done = 1;
156917683Spst		find_levels(root);
157017683Spst		find_dom(root);
157117683Spst		find_closure(root);
157217683Spst		find_inedges(root);
157317683Spst		find_ud(root);
157417683Spst		find_edom(root);
157517683Spst		opt_blks(root, do_stmts);
157617683Spst#ifdef BDEBUG
157717683Spst		if (dflag > 1)
157817683Spst			opt_dump(root);
157917683Spst#endif
158017683Spst	} while (!done);
158117683Spst}
158217683Spst
158317683Spst/*
158417683Spst * Optimize the filter code in its dag representation.
158517683Spst */
158617683Spstvoid
158717683Spstbpf_optimize(rootp)
158817683Spst	struct block **rootp;
158917683Spst{
159017683Spst	struct block *root;
159117683Spst
159217683Spst	root = *rootp;
159317683Spst
159417683Spst	opt_init(root);
159517683Spst	opt_loop(root, 0);
159617683Spst	opt_loop(root, 1);
159717683Spst	intern_blocks(root);
159817683Spst	opt_root(rootp);
159917683Spst	opt_cleanup();
160017683Spst}
160117683Spst
160217683Spststatic void
160317683Spstmake_marks(p)
160417683Spst	struct block *p;
160517683Spst{
160617683Spst	if (!isMarked(p)) {
160717683Spst		Mark(p);
160817683Spst		if (BPF_CLASS(p->s.code) != BPF_RET) {
160917683Spst			make_marks(JT(p));
161017683Spst			make_marks(JF(p));
161117683Spst		}
161217683Spst	}
161317683Spst}
161417683Spst
161517683Spst/*
161617683Spst * Mark code array such that isMarked(i) is true
161717683Spst * only for nodes that are alive.
161817683Spst */
161917683Spststatic void
162017683Spstmark_code(p)
162117683Spst	struct block *p;
162217683Spst{
162317683Spst	cur_mark += 1;
162417683Spst	make_marks(p);
162517683Spst}
162617683Spst
162717683Spst/*
162817683Spst * True iff the two stmt lists load the same value from the packet into
162917683Spst * the accumulator.
163017683Spst */
163117683Spststatic int
163217683Spsteq_slist(x, y)
163317683Spst	struct slist *x, *y;
163417683Spst{
163517683Spst	while (1) {
163617683Spst		while (x && x->s.code == NOP)
163717683Spst			x = x->next;
163817683Spst		while (y && y->s.code == NOP)
163917683Spst			y = y->next;
164017683Spst		if (x == 0)
164117683Spst			return y == 0;
164217683Spst		if (y == 0)
164317683Spst			return x == 0;
164417683Spst		if (x->s.code != y->s.code || x->s.k != y->s.k)
164517683Spst			return 0;
164617683Spst		x = x->next;
164717683Spst		y = y->next;
164817683Spst	}
164917683Spst}
165017683Spst
165117683Spststatic inline int
165217683Spsteq_blk(b0, b1)
165317683Spst	struct block *b0, *b1;
165417683Spst{
165517683Spst	if (b0->s.code == b1->s.code &&
165617683Spst	    b0->s.k == b1->s.k &&
165717683Spst	    b0->et.succ == b1->et.succ &&
165817683Spst	    b0->ef.succ == b1->ef.succ)
165917683Spst		return eq_slist(b0->stmts, b1->stmts);
166017683Spst	return 0;
166117683Spst}
166217683Spst
166317683Spststatic void
166417683Spstintern_blocks(root)
166517683Spst	struct block *root;
166617683Spst{
166717683Spst	struct block *p;
166817683Spst	int i, j;
166917683Spst	int done;
167017683Spst top:
167117683Spst	done = 1;
167217683Spst	for (i = 0; i < n_blocks; ++i)
167317683Spst		blocks[i]->link = 0;
167417683Spst
167517683Spst	mark_code(root);
167617683Spst
167717683Spst	for (i = n_blocks - 1; --i >= 0; ) {
167817683Spst		if (!isMarked(blocks[i]))
167917683Spst			continue;
168017683Spst		for (j = i + 1; j < n_blocks; ++j) {
168117683Spst			if (!isMarked(blocks[j]))
168217683Spst				continue;
168317683Spst			if (eq_blk(blocks[i], blocks[j])) {
168417683Spst				blocks[i]->link = blocks[j]->link ?
168517683Spst					blocks[j]->link : blocks[j];
168617683Spst				break;
168717683Spst			}
168817683Spst		}
168917683Spst	}
169017683Spst	for (i = 0; i < n_blocks; ++i) {
169117683Spst		p = blocks[i];
169217683Spst		if (JT(p) == 0)
169317683Spst			continue;
169417683Spst		if (JT(p)->link) {
169517683Spst			done = 0;
169617683Spst			JT(p) = JT(p)->link;
169717683Spst		}
169817683Spst		if (JF(p)->link) {
169917683Spst			done = 0;
170017683Spst			JF(p) = JF(p)->link;
170117683Spst		}
170217683Spst	}
170317683Spst	if (!done)
170417683Spst		goto top;
170517683Spst}
170617683Spst
170717683Spststatic void
170817683Spstopt_cleanup()
170917683Spst{
171017683Spst	free((void *)vnode_base);
171117683Spst	free((void *)vmap);
171217683Spst	free((void *)edges);
171317683Spst	free((void *)space);
171417683Spst	free((void *)levels);
171517683Spst	free((void *)blocks);
171617683Spst}
171717683Spst
171817683Spst/*
171917683Spst * Return the number of stmts in 's'.
172017683Spst */
172117683Spststatic int
172217683Spstslength(s)
172317683Spst	struct slist *s;
172417683Spst{
172517683Spst	int n = 0;
172617683Spst
172717683Spst	for (; s; s = s->next)
172817683Spst		if (s->s.code != NOP)
172917683Spst			++n;
173017683Spst	return n;
173117683Spst}
173217683Spst
173317683Spst/*
173417683Spst * Return the number of nodes reachable by 'p'.
173517683Spst * All nodes should be initially unmarked.
173617683Spst */
173717683Spststatic int
173817683Spstcount_blocks(p)
173917683Spst	struct block *p;
174017683Spst{
174117683Spst	if (p == 0 || isMarked(p))
174217683Spst		return 0;
174317683Spst	Mark(p);
174417683Spst	return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
174517683Spst}
174617683Spst
174717683Spst/*
174817683Spst * Do a depth first search on the flow graph, numbering the
174917683Spst * the basic blocks, and entering them into the 'blocks' array.`
175017683Spst */
175117683Spststatic void
175217683Spstnumber_blks_r(p)
175317683Spst	struct block *p;
175417683Spst{
175517683Spst	int n;
175617683Spst
175717683Spst	if (p == 0 || isMarked(p))
175817683Spst		return;
175917683Spst
176017683Spst	Mark(p);
176117683Spst	n = n_blocks++;
176217683Spst	p->id = n;
176317683Spst	blocks[n] = p;
176417683Spst
176517683Spst	number_blks_r(JT(p));
176617683Spst	number_blks_r(JF(p));
176717683Spst}
176817683Spst
176917683Spst/*
177017683Spst * Return the number of stmts in the flowgraph reachable by 'p'.
177117683Spst * The nodes should be unmarked before calling.
177217683Spst */
177317683Spststatic int
177417683Spstcount_stmts(p)
177517683Spst	struct block *p;
177617683Spst{
177717683Spst	int n;
177817683Spst
177917683Spst	if (p == 0 || isMarked(p))
178017683Spst		return 0;
178117683Spst	Mark(p);
178217683Spst	n = count_stmts(JT(p)) + count_stmts(JF(p));
178317683Spst	return slength(p->stmts) + n + 1;
178417683Spst}
178517683Spst
178617683Spst/*
178717683Spst * Allocate memory.  All allocation is done before optimization
178817683Spst * is begun.  A linear bound on the size of all data structures is computed
178917683Spst * from the total number of blocks and/or statements.
179017683Spst */
179117683Spststatic void
179217683Spstopt_init(root)
179317683Spst	struct block *root;
179417683Spst{
179517683Spst	bpf_u_int32 *p;
179617683Spst	int i, n, max_stmts;
179717683Spst
179817683Spst	/*
179917683Spst	 * First, count the blocks, so we can malloc an array to map
180017683Spst	 * block number to block.  Then, put the blocks into the array.
180117683Spst	 */
180217683Spst	unMarkAll();
180317683Spst	n = count_blocks(root);
180417683Spst	blocks = (struct block **)malloc(n * sizeof(*blocks));
180517683Spst	unMarkAll();
180617683Spst	n_blocks = 0;
180717683Spst	number_blks_r(root);
180817683Spst
180917683Spst	n_edges = 2 * n_blocks;
181017683Spst	edges = (struct edge **)malloc(n_edges * sizeof(*edges));
181117683Spst
181217683Spst	/*
181317683Spst	 * The number of levels is bounded by the number of nodes.
181417683Spst	 */
181517683Spst	levels = (struct block **)malloc(n_blocks * sizeof(*levels));
181617683Spst
181717683Spst	edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
181817683Spst	nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
181917683Spst
182017683Spst	/* XXX */
182117683Spst	space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
182217683Spst				 + n_edges * edgewords * sizeof(*space));
182317683Spst	p = space;
182417683Spst	all_dom_sets = p;
182517683Spst	for (i = 0; i < n; ++i) {
182617683Spst		blocks[i]->dom = p;
182717683Spst		p += nodewords;
182817683Spst	}
182917683Spst	all_closure_sets = p;
183017683Spst	for (i = 0; i < n; ++i) {
183117683Spst		blocks[i]->closure = p;
183217683Spst		p += nodewords;
183317683Spst	}
183417683Spst	all_edge_sets = p;
183517683Spst	for (i = 0; i < n; ++i) {
183617683Spst		register struct block *b = blocks[i];
183717683Spst
183817683Spst		b->et.edom = p;
183917683Spst		p += edgewords;
184017683Spst		b->ef.edom = p;
184117683Spst		p += edgewords;
184217683Spst		b->et.id = i;
184317683Spst		edges[i] = &b->et;
184417683Spst		b->ef.id = n_blocks + i;
184517683Spst		edges[n_blocks + i] = &b->ef;
184617683Spst		b->et.pred = b;
184717683Spst		b->ef.pred = b;
184817683Spst	}
184917683Spst	max_stmts = 0;
185017683Spst	for (i = 0; i < n; ++i)
185117683Spst		max_stmts += slength(blocks[i]->stmts) + 1;
185217683Spst	/*
185317683Spst	 * We allocate at most 3 value numbers per statement,
185417683Spst	 * so this is an upper bound on the number of valnodes
185517683Spst	 * we'll need.
185617683Spst	 */
185717683Spst	maxval = 3 * max_stmts;
185817683Spst	vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
185917683Spst	vnode_base = (struct valnode *)malloc(maxval * sizeof(*vmap));
186017683Spst}
186117683Spst
186217683Spst/*
186317683Spst * Some pointers used to convert the basic block form of the code,
186417683Spst * into the array form that BPF requires.  'fstart' will point to
186517683Spst * the malloc'd array while 'ftail' is used during the recursive traversal.
186617683Spst */
186717683Spststatic struct bpf_insn *fstart;
186817683Spststatic struct bpf_insn *ftail;
186917683Spst
187017683Spst#ifdef BDEBUG
187117683Spstint bids[1000];
187217683Spst#endif
187317683Spst
187417683Spst/*
187517683Spst * Returns true if successful.  Returns false if a branch has
187617683Spst * an offset that is too large.  If so, we have marked that
187717683Spst * branch so that on a subsequent iteration, it will be treated
187817683Spst * properly.
187917683Spst */
188017683Spststatic int
188117683Spstconvert_code_r(p)
188217683Spst	struct block *p;
188317683Spst{
188417683Spst	struct bpf_insn *dst;
188517683Spst	struct slist *src;
188617683Spst	int slen;
188717683Spst	u_int off;
188817683Spst	int extrajmps;		/* number of extra jumps inserted */
188917683Spst
189017683Spst	if (p == 0 || isMarked(p))
189117683Spst		return (1);
189217683Spst	Mark(p);
189317683Spst
189417683Spst	if (convert_code_r(JF(p)) == 0)
189517683Spst		return (0);
189617683Spst	if (convert_code_r(JT(p)) == 0)
189717683Spst		return (0);
189817683Spst
189917683Spst	slen = slength(p->stmts);
190017683Spst	dst = ftail -= (slen + 1 + p->longjt + p->longjf);
190117683Spst		/* inflate length by any extra jumps */
190217683Spst
190317683Spst	p->offset = dst - fstart;
190417683Spst
190517683Spst	for (src = p->stmts; src; src = src->next) {
190617683Spst		if (src->s.code == NOP)
190717683Spst			continue;
190817683Spst		dst->code = (u_short)src->s.code;
190917683Spst		dst->k = src->s.k;
191017683Spst		++dst;
191117683Spst	}
191217683Spst#ifdef BDEBUG
191317683Spst	bids[dst - fstart] = p->id + 1;
191417683Spst#endif
191517683Spst	dst->code = (u_short)p->s.code;
191617683Spst	dst->k = p->s.k;
191717683Spst	if (JT(p)) {
191817683Spst		extrajmps = 0;
191917683Spst		off = JT(p)->offset - (p->offset + slen) - 1;
192017683Spst		if (off >= 256) {
192117683Spst		    /* offset too large for branch, must add a jump */
192217683Spst		    if (p->longjt == 0) {
192317683Spst		    	/* mark this instruction and retry */
192417683Spst			p->longjt++;
192517683Spst			return(0);
192617683Spst		    }
192717683Spst		    /* branch if T to following jump */
192817683Spst		    dst->jt = extrajmps;
192917683Spst		    extrajmps++;
193017683Spst		    dst[extrajmps].code = BPF_JMP|BPF_JA;
193117683Spst		    dst[extrajmps].k = off - extrajmps;
193217683Spst		}
193317683Spst		else
193417683Spst		    dst->jt = off;
193517683Spst		off = JF(p)->offset - (p->offset + slen) - 1;
193617683Spst		if (off >= 256) {
193717683Spst		    /* offset too large for branch, must add a jump */
193817683Spst		    if (p->longjf == 0) {
193917683Spst		    	/* mark this instruction and retry */
194017683Spst			p->longjf++;
194117683Spst			return(0);
194217683Spst		    }
194317683Spst		    /* branch if F to following jump */
194417683Spst		    /* if two jumps are inserted, F goes to second one */
194517683Spst		    dst->jf = extrajmps;
194617683Spst		    extrajmps++;
194717683Spst		    dst[extrajmps].code = BPF_JMP|BPF_JA;
194817683Spst		    dst[extrajmps].k = off - extrajmps;
194917683Spst		}
195017683Spst		else
195117683Spst		    dst->jf = off;
195217683Spst	}
195317683Spst	return (1);
195417683Spst}
195517683Spst
195617683Spst
195717683Spst/*
195817683Spst * Convert flowgraph intermediate representation to the
195917683Spst * BPF array representation.  Set *lenp to the number of instructions.
196017683Spst */
196117683Spststruct bpf_insn *
196217683Spsticode_to_fcode(root, lenp)
196317683Spst	struct block *root;
196417683Spst	int *lenp;
196517683Spst{
196617683Spst	int n;
196717683Spst	struct bpf_insn *fp;
196817683Spst
196917683Spst	/*
197017683Spst	 * Loop doing convert_codr_r() until no branches remain
197117683Spst	 * with too-large offsets.
197217683Spst	 */
197317683Spst	while (1) {
197417683Spst	    unMarkAll();
197517683Spst	    n = *lenp = count_stmts(root);
197617683Spst
197717683Spst	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
197817683Spst	    memset((char *)fp, 0, sizeof(*fp) * n);
197917683Spst	    fstart = fp;
198017683Spst	    ftail = fp + n;
198117683Spst
198217683Spst	    unMarkAll();
198317683Spst	    if (convert_code_r(root))
198417683Spst		break;
198517683Spst	    free(fp);
198617683Spst	}
198717683Spst
198817683Spst	return fp;
198917683Spst}
199017683Spst
199117683Spst#ifdef BDEBUG
199217683Spststatic void
199317683Spstopt_dump(root)
199417683Spst	struct block *root;
199517683Spst{
199617683Spst	struct bpf_program f;
199717683Spst
199817683Spst	memset(bids, 0, sizeof bids);
199917683Spst	f.bf_insns = icode_to_fcode(root, &f.bf_len);
200017683Spst	bpf_dump(&f, 1);
200117683Spst	putchar('\n');
200217683Spst	free((char *)f.bf_insns);
200317683Spst}
200417683Spst#endif
2005