117683Spst/*
217683Spst * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
317683Spst *	The Regents of the University of California.  All rights reserved.
417683Spst *
517683Spst * Redistribution and use in source and binary forms, with or without
617683Spst * modification, are permitted provided that: (1) source code distributions
717683Spst * retain the above copyright notice and this paragraph in its entirety, (2)
817683Spst * distributions including binary code include the above copyright notice and
917683Spst * this paragraph in its entirety in the documentation or other materials
1017683Spst * provided with the distribution, and (3) all advertising materials mentioning
1117683Spst * features or use of this software display the following acknowledgement:
1217683Spst * ``This product includes software developed by the University of California,
1317683Spst * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
1417683Spst * the University nor the names of its contributors may be used to endorse
1517683Spst * or promote products derived from this software without specific prior
1617683Spst * written permission.
1717683Spst * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
1817683Spst * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
1917683Spst * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
2017683Spst *
2117683Spst *  Optimization module for tcpdump intermediate representation.
2217683Spst */
2317683Spst#ifndef lint
24127664Sbmsstatic const char rcsid[] _U_ =
25214518Srpaulo    "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.91 2008-01-02 04:16:46 guy Exp $ (LBL)";
2617683Spst#endif
2717683Spst
2875107Sfenner#ifdef HAVE_CONFIG_H
2975107Sfenner#include "config.h"
3075107Sfenner#endif
3175107Sfenner
32214518Srpaulo#ifdef WIN32
33214518Srpaulo#include <pcap-stdinc.h>
34214518Srpaulo#else /* WIN32 */
35214518Srpaulo#if HAVE_INTTYPES_H
36214518Srpaulo#include <inttypes.h>
37214518Srpaulo#elif HAVE_STDINT_H
38214518Srpaulo#include <stdint.h>
39214518Srpaulo#endif
40214518Srpaulo#ifdef HAVE_SYS_BITYPES_H
41214518Srpaulo#include <sys/bitypes.h>
42214518Srpaulo#endif
43214518Srpaulo#include <sys/types.h>
44214518Srpaulo#endif /* WIN32 */
45214518Srpaulo
4617683Spst#include <stdio.h>
4717683Spst#include <stdlib.h>
4817683Spst#include <memory.h>
49146768Ssam#include <string.h>
5017683Spst
5175107Sfenner#include <errno.h>
5275107Sfenner
5317683Spst#include "pcap-int.h"
5417683Spst
5517683Spst#include "gencode.h"
5617683Spst
5717683Spst#ifdef HAVE_OS_PROTO_H
5817683Spst#include "os-proto.h"
5917683Spst#endif
6017683Spst
6117683Spst#ifdef BDEBUG
6217683Spstextern int dflag;
6317683Spst#endif
6417683Spst
65146768Ssam#if defined(MSDOS) && !defined(__DJGPP__)
66146768Ssamextern int _w32_ffs (int mask);
67146768Ssam#define ffs _w32_ffs
68146768Ssam#endif
6917683Spst
70190225Srpaulo#if defined(WIN32) && defined (_MSC_VER)
71190225Srpauloint ffs(int mask);
72190225Srpaulo#endif
73190225Srpaulo
74146768Ssam/*
75146768Ssam * Represents a deleted instruction.
76146768Ssam */
7717683Spst#define NOP -1
7817683Spst
7917683Spst/*
80146768Ssam * Register numbers for use-def values.
81146768Ssam * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
82146768Ssam * location.  A_ATOM is the accumulator and X_ATOM is the index
83146768Ssam * register.
84146768Ssam */
85146768Ssam#define A_ATOM BPF_MEMWORDS
86146768Ssam#define X_ATOM (BPF_MEMWORDS+1)
87146768Ssam
88146768Ssam/*
8917683Spst * This define is used to represent *both* the accumulator and
9017683Spst * x register in use-def computations.
9117683Spst * Currently, the use-def code assumes only one definition per instruction.
9217683Spst */
9317683Spst#define AX_ATOM N_ATOMS
9417683Spst
9517683Spst/*
9617683Spst * A flag to indicate that further optimization is needed.
9717683Spst * Iterative passes are continued until a given pass yields no
9817683Spst * branch movement.
9917683Spst */
10017683Spststatic int done;
10117683Spst
10217683Spst/*
10317683Spst * A block is marked if only if its mark equals the current mark.
10417683Spst * Rather than traverse the code array, marking each item, 'cur_mark' is
10517683Spst * incremented.  This automatically makes each element unmarked.
10617683Spst */
10717683Spststatic int cur_mark;
10817683Spst#define isMarked(p) ((p)->mark == cur_mark)
10917683Spst#define unMarkAll() cur_mark += 1
11017683Spst#define Mark(p) ((p)->mark = cur_mark)
11117683Spst
11217683Spststatic void opt_init(struct block *);
11317683Spststatic void opt_cleanup(void);
11417683Spst
11517683Spststatic void intern_blocks(struct block *);
11617683Spst
11717683Spststatic void find_inedges(struct block *);
11817683Spst#ifdef BDEBUG
11917683Spststatic void opt_dump(struct block *);
12017683Spst#endif
12117683Spst
12217683Spststatic int n_blocks;
12317683Spststruct block **blocks;
12417683Spststatic int n_edges;
12517683Spststruct edge **edges;
12617683Spst
12717683Spst/*
12817683Spst * A bit vector set representation of the dominators.
12917683Spst * We round up the set size to the next power of two.
13017683Spst */
13117683Spststatic int nodewords;
13217683Spststatic int edgewords;
13317683Spststruct block **levels;
13417683Spstbpf_u_int32 *space;
13517683Spst#define BITS_PER_WORD (8*sizeof(bpf_u_int32))
13617683Spst/*
13717683Spst * True if a is in uset {p}
13817683Spst */
13917683Spst#define SET_MEMBER(p, a) \
14017683Spst((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
14117683Spst
14217683Spst/*
14317683Spst * Add 'a' to uset p.
14417683Spst */
14517683Spst#define SET_INSERT(p, a) \
14617683Spst(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
14717683Spst
14817683Spst/*
14917683Spst * Delete 'a' from uset p.
15017683Spst */
15117683Spst#define SET_DELETE(p, a) \
15217683Spst(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
15317683Spst
15417683Spst/*
15517683Spst * a := a intersect b
15617683Spst */
15717683Spst#define SET_INTERSECT(a, b, n)\
15817683Spst{\
15917683Spst	register bpf_u_int32 *_x = a, *_y = b;\
16017683Spst	register int _n = n;\
16117683Spst	while (--_n >= 0) *_x++ &= *_y++;\
16217683Spst}
16317683Spst
16417683Spst/*
16517683Spst * a := a - b
16617683Spst */
16717683Spst#define SET_SUBTRACT(a, b, n)\
16817683Spst{\
16917683Spst	register bpf_u_int32 *_x = a, *_y = b;\
17017683Spst	register int _n = n;\
17117683Spst	while (--_n >= 0) *_x++ &=~ *_y++;\
17217683Spst}
17317683Spst
17417683Spst/*
17517683Spst * a := a union b
17617683Spst */
17717683Spst#define SET_UNION(a, b, n)\
17817683Spst{\
17917683Spst	register bpf_u_int32 *_x = a, *_y = b;\
18017683Spst	register int _n = n;\
18117683Spst	while (--_n >= 0) *_x++ |= *_y++;\
18217683Spst}
18317683Spst
18417683Spststatic uset all_dom_sets;
18517683Spststatic uset all_closure_sets;
18617683Spststatic uset all_edge_sets;
18717683Spst
18817683Spst#ifndef MAX
18917683Spst#define MAX(a,b) ((a)>(b)?(a):(b))
19017683Spst#endif
19117683Spst
19217683Spststatic void
193251129Sdelphijfind_levels_r(struct block *b)
19417683Spst{
19517683Spst	int level;
19617683Spst
19717683Spst	if (isMarked(b))
19817683Spst		return;
19917683Spst
20017683Spst	Mark(b);
20117683Spst	b->link = 0;
20217683Spst
20317683Spst	if (JT(b)) {
20417683Spst		find_levels_r(JT(b));
20517683Spst		find_levels_r(JF(b));
20617683Spst		level = MAX(JT(b)->level, JF(b)->level) + 1;
20717683Spst	} else
20817683Spst		level = 0;
20917683Spst	b->level = level;
21017683Spst	b->link = levels[level];
21117683Spst	levels[level] = b;
21217683Spst}
21317683Spst
21417683Spst/*
21517683Spst * Level graph.  The levels go from 0 at the leaves to
21617683Spst * N_LEVELS at the root.  The levels[] array points to the
21717683Spst * first node of the level list, whose elements are linked
21817683Spst * with the 'link' field of the struct block.
21917683Spst */
22017683Spststatic void
221251129Sdelphijfind_levels(struct block *root)
22217683Spst{
22317683Spst	memset((char *)levels, 0, n_blocks * sizeof(*levels));
22417683Spst	unMarkAll();
22517683Spst	find_levels_r(root);
22617683Spst}
22717683Spst
22817683Spst/*
22917683Spst * Find dominator relationships.
23017683Spst * Assumes graph has been leveled.
23117683Spst */
23217683Spststatic void
233251129Sdelphijfind_dom(struct block *root)
23417683Spst{
23517683Spst	int i;
23617683Spst	struct block *b;
23717683Spst	bpf_u_int32 *x;
23817683Spst
23917683Spst	/*
24017683Spst	 * Initialize sets to contain all nodes.
24117683Spst	 */
24217683Spst	x = all_dom_sets;
24317683Spst	i = n_blocks * nodewords;
24417683Spst	while (--i >= 0)
24517683Spst		*x++ = ~0;
24617683Spst	/* Root starts off empty. */
24717683Spst	for (i = nodewords; --i >= 0;)
24817683Spst		root->dom[i] = 0;
24917683Spst
25017683Spst	/* root->level is the highest level no found. */
25117683Spst	for (i = root->level; i >= 0; --i) {
25217683Spst		for (b = levels[i]; b; b = b->link) {
25317683Spst			SET_INSERT(b->dom, b->id);
25417683Spst			if (JT(b) == 0)
25517683Spst				continue;
25617683Spst			SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
25717683Spst			SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
25817683Spst		}
25917683Spst	}
26017683Spst}
26117683Spst
26217683Spststatic void
263251129Sdelphijpropedom(struct edge *ep)
26417683Spst{
26517683Spst	SET_INSERT(ep->edom, ep->id);
26617683Spst	if (ep->succ) {
26717683Spst		SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
26817683Spst		SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
26917683Spst	}
27017683Spst}
27117683Spst
27217683Spst/*
27317683Spst * Compute edge dominators.
27417683Spst * Assumes graph has been leveled and predecessors established.
27517683Spst */
27617683Spststatic void
277251129Sdelphijfind_edom(struct block *root)
27817683Spst{
27917683Spst	int i;
28017683Spst	uset x;
28117683Spst	struct block *b;
28217683Spst
28317683Spst	x = all_edge_sets;
28417683Spst	for (i = n_edges * edgewords; --i >= 0; )
28517683Spst		x[i] = ~0;
28617683Spst
28717683Spst	/* root->level is the highest level no found. */
28817683Spst	memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
28917683Spst	memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
29017683Spst	for (i = root->level; i >= 0; --i) {
29117683Spst		for (b = levels[i]; b != 0; b = b->link) {
29217683Spst			propedom(&b->et);
29317683Spst			propedom(&b->ef);
29417683Spst		}
29517683Spst	}
29617683Spst}
29717683Spst
29817683Spst/*
29917683Spst * Find the backwards transitive closure of the flow graph.  These sets
30017683Spst * are backwards in the sense that we find the set of nodes that reach
30117683Spst * a given node, not the set of nodes that can be reached by a node.
30217683Spst *
30317683Spst * Assumes graph has been leveled.
30417683Spst */
30517683Spststatic void
306251129Sdelphijfind_closure(struct block *root)
30717683Spst{
30817683Spst	int i;
30917683Spst	struct block *b;
31017683Spst
31117683Spst	/*
31217683Spst	 * Initialize sets to contain no nodes.
31317683Spst	 */
31417683Spst	memset((char *)all_closure_sets, 0,
31517683Spst	      n_blocks * nodewords * sizeof(*all_closure_sets));
31617683Spst
31717683Spst	/* root->level is the highest level no found. */
31817683Spst	for (i = root->level; i >= 0; --i) {
31917683Spst		for (b = levels[i]; b; b = b->link) {
32017683Spst			SET_INSERT(b->closure, b->id);
32117683Spst			if (JT(b) == 0)
32217683Spst				continue;
32317683Spst			SET_UNION(JT(b)->closure, b->closure, nodewords);
32417683Spst			SET_UNION(JF(b)->closure, b->closure, nodewords);
32517683Spst		}
32617683Spst	}
32717683Spst}
32817683Spst
32917683Spst/*
33017683Spst * Return the register number that is used by s.  If A and X are both
33117683Spst * used, return AX_ATOM.  If no register is used, return -1.
33217683Spst *
33317683Spst * The implementation should probably change to an array access.
33417683Spst */
33517683Spststatic int
336251129Sdelphijatomuse(struct stmt *s)
33717683Spst{
33817683Spst	register int c = s->code;
33917683Spst
34017683Spst	if (c == NOP)
34117683Spst		return -1;
34217683Spst
34317683Spst	switch (BPF_CLASS(c)) {
34417683Spst
34517683Spst	case BPF_RET:
34617683Spst		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
34717683Spst			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
34817683Spst
34917683Spst	case BPF_LD:
35017683Spst	case BPF_LDX:
35117683Spst		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
35217683Spst			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
35317683Spst
35417683Spst	case BPF_ST:
35517683Spst		return A_ATOM;
35617683Spst
35717683Spst	case BPF_STX:
35817683Spst		return X_ATOM;
35917683Spst
36017683Spst	case BPF_JMP:
36117683Spst	case BPF_ALU:
36217683Spst		if (BPF_SRC(c) == BPF_X)
36317683Spst			return AX_ATOM;
36417683Spst		return A_ATOM;
36517683Spst
36617683Spst	case BPF_MISC:
36717683Spst		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
36817683Spst	}
36917683Spst	abort();
37017683Spst	/* NOTREACHED */
37117683Spst}
37217683Spst
37317683Spst/*
37417683Spst * Return the register number that is defined by 's'.  We assume that
37517683Spst * a single stmt cannot define more than one register.  If no register
37617683Spst * is defined, return -1.
37717683Spst *
37817683Spst * The implementation should probably change to an array access.
37917683Spst */
38017683Spststatic int
381251129Sdelphijatomdef(struct stmt *s)
38217683Spst{
38317683Spst	if (s->code == NOP)
38417683Spst		return -1;
38517683Spst
38617683Spst	switch (BPF_CLASS(s->code)) {
38717683Spst
38817683Spst	case BPF_LD:
38917683Spst	case BPF_ALU:
39017683Spst		return A_ATOM;
39117683Spst
39217683Spst	case BPF_LDX:
39317683Spst		return X_ATOM;
39417683Spst
39517683Spst	case BPF_ST:
39617683Spst	case BPF_STX:
39717683Spst		return s->k;
39817683Spst
39917683Spst	case BPF_MISC:
40017683Spst		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
40117683Spst	}
40217683Spst	return -1;
40317683Spst}
40417683Spst
405146768Ssam/*
406146768Ssam * Compute the sets of registers used, defined, and killed by 'b'.
407146768Ssam *
408146768Ssam * "Used" means that a statement in 'b' uses the register before any
409146768Ssam * statement in 'b' defines it, i.e. it uses the value left in
410146768Ssam * that register by a predecessor block of this block.
411146768Ssam * "Defined" means that a statement in 'b' defines it.
412146768Ssam * "Killed" means that a statement in 'b' defines it before any
413146768Ssam * statement in 'b' uses it, i.e. it kills the value left in that
414146768Ssam * register by a predecessor block of this block.
415146768Ssam */
41617683Spststatic void
417251129Sdelphijcompute_local_ud(struct block *b)
41817683Spst{
41917683Spst	struct slist *s;
42017683Spst	atomset def = 0, use = 0, kill = 0;
42117683Spst	int atom;
42217683Spst
42317683Spst	for (s = b->stmts; s; s = s->next) {
42417683Spst		if (s->s.code == NOP)
42517683Spst			continue;
42617683Spst		atom = atomuse(&s->s);
42717683Spst		if (atom >= 0) {
42817683Spst			if (atom == AX_ATOM) {
42917683Spst				if (!ATOMELEM(def, X_ATOM))
43017683Spst					use |= ATOMMASK(X_ATOM);
43117683Spst				if (!ATOMELEM(def, A_ATOM))
43217683Spst					use |= ATOMMASK(A_ATOM);
43317683Spst			}
43417683Spst			else if (atom < N_ATOMS) {
43517683Spst				if (!ATOMELEM(def, atom))
43617683Spst					use |= ATOMMASK(atom);
43717683Spst			}
43817683Spst			else
43917683Spst				abort();
44017683Spst		}
44117683Spst		atom = atomdef(&s->s);
44217683Spst		if (atom >= 0) {
44317683Spst			if (!ATOMELEM(use, atom))
44417683Spst				kill |= ATOMMASK(atom);
44517683Spst			def |= ATOMMASK(atom);
44617683Spst		}
44717683Spst	}
448146768Ssam	if (BPF_CLASS(b->s.code) == BPF_JMP) {
449146768Ssam		/*
450146768Ssam		 * XXX - what about RET?
451146768Ssam		 */
452146768Ssam		atom = atomuse(&b->s);
453146768Ssam		if (atom >= 0) {
454146768Ssam			if (atom == AX_ATOM) {
455146768Ssam				if (!ATOMELEM(def, X_ATOM))
456146768Ssam					use |= ATOMMASK(X_ATOM);
457146768Ssam				if (!ATOMELEM(def, A_ATOM))
458146768Ssam					use |= ATOMMASK(A_ATOM);
459146768Ssam			}
460146768Ssam			else if (atom < N_ATOMS) {
461146768Ssam				if (!ATOMELEM(def, atom))
462146768Ssam					use |= ATOMMASK(atom);
463146768Ssam			}
464146768Ssam			else
465146768Ssam				abort();
466146768Ssam		}
467146768Ssam	}
46817683Spst
46917683Spst	b->def = def;
47017683Spst	b->kill = kill;
47117683Spst	b->in_use = use;
47217683Spst}
47317683Spst
47417683Spst/*
47517683Spst * Assume graph is already leveled.
47617683Spst */
47717683Spststatic void
478251129Sdelphijfind_ud(struct block *root)
47917683Spst{
48017683Spst	int i, maxlevel;
48117683Spst	struct block *p;
48217683Spst
48317683Spst	/*
48417683Spst	 * root->level is the highest level no found;
48517683Spst	 * count down from there.
48617683Spst	 */
48717683Spst	maxlevel = root->level;
48817683Spst	for (i = maxlevel; i >= 0; --i)
48917683Spst		for (p = levels[i]; p; p = p->link) {
49017683Spst			compute_local_ud(p);
49117683Spst			p->out_use = 0;
49217683Spst		}
49317683Spst
49417683Spst	for (i = 1; i <= maxlevel; ++i) {
49517683Spst		for (p = levels[i]; p; p = p->link) {
49617683Spst			p->out_use |= JT(p)->in_use | JF(p)->in_use;
49717683Spst			p->in_use |= p->out_use &~ p->kill;
49817683Spst		}
49917683Spst	}
50017683Spst}
50117683Spst
50217683Spst/*
50317683Spst * These data structures are used in a Cocke and Shwarz style
50417683Spst * value numbering scheme.  Since the flowgraph is acyclic,
50517683Spst * exit values can be propagated from a node's predecessors
50617683Spst * provided it is uniquely defined.
50717683Spst */
50817683Spststruct valnode {
50917683Spst	int code;
51017683Spst	int v0, v1;
51117683Spst	int val;
51217683Spst	struct valnode *next;
51317683Spst};
51417683Spst
51517683Spst#define MODULUS 213
51617683Spststatic struct valnode *hashtbl[MODULUS];
51717683Spststatic int curval;
51817683Spststatic int maxval;
51917683Spst
52017683Spst/* Integer constants mapped with the load immediate opcode. */
52117683Spst#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
52217683Spst
52317683Spststruct vmapinfo {
52417683Spst	int is_const;
52517683Spst	bpf_int32 const_val;
52617683Spst};
52717683Spst
52817683Spststruct vmapinfo *vmap;
52917683Spststruct valnode *vnode_base;
53017683Spststruct valnode *next_vnode;
53117683Spst
53217683Spststatic void
533251129Sdelphijinit_val(void)
53417683Spst{
53517683Spst	curval = 0;
53617683Spst	next_vnode = vnode_base;
53717683Spst	memset((char *)vmap, 0, maxval * sizeof(*vmap));
53817683Spst	memset((char *)hashtbl, 0, sizeof hashtbl);
53917683Spst}
54017683Spst
54117683Spst/* Because we really don't have an IR, this stuff is a little messy. */
54217683Spststatic int
543251129SdelphijF(int code, int v0, int v1)
54417683Spst{
54517683Spst	u_int hash;
54617683Spst	int val;
54717683Spst	struct valnode *p;
54817683Spst
54917683Spst	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
55017683Spst	hash %= MODULUS;
55117683Spst
55217683Spst	for (p = hashtbl[hash]; p; p = p->next)
55317683Spst		if (p->code == code && p->v0 == v0 && p->v1 == v1)
55417683Spst			return p->val;
55517683Spst
55617683Spst	val = ++curval;
55717683Spst	if (BPF_MODE(code) == BPF_IMM &&
55817683Spst	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
55917683Spst		vmap[val].const_val = v0;
56017683Spst		vmap[val].is_const = 1;
56117683Spst	}
56217683Spst	p = next_vnode++;
56317683Spst	p->val = val;
56417683Spst	p->code = code;
56517683Spst	p->v0 = v0;
56617683Spst	p->v1 = v1;
56717683Spst	p->next = hashtbl[hash];
56817683Spst	hashtbl[hash] = p;
56917683Spst
57017683Spst	return val;
57117683Spst}
57217683Spst
57317683Spststatic inline void
574251129Sdelphijvstore(struct stmt *s, int *valp, int newval, int alter)
57517683Spst{
57617683Spst	if (alter && *valp == newval)
57717683Spst		s->code = NOP;
57817683Spst	else
57917683Spst		*valp = newval;
58017683Spst}
58117683Spst
58217683Spststatic void
583251129Sdelphijfold_op(struct stmt *s, int v0, int v1)
58417683Spst{
585172677Smlaier	bpf_u_int32 a, b;
58617683Spst
58717683Spst	a = vmap[v0].const_val;
58817683Spst	b = vmap[v1].const_val;
58917683Spst
59017683Spst	switch (BPF_OP(s->code)) {
59117683Spst	case BPF_ADD:
59217683Spst		a += b;
59317683Spst		break;
59417683Spst
59517683Spst	case BPF_SUB:
59617683Spst		a -= b;
59717683Spst		break;
59817683Spst
59917683Spst	case BPF_MUL:
60017683Spst		a *= b;
60117683Spst		break;
60217683Spst
60317683Spst	case BPF_DIV:
60417683Spst		if (b == 0)
60517683Spst			bpf_error("division by zero");
60617683Spst		a /= b;
60717683Spst		break;
60817683Spst
60917683Spst	case BPF_AND:
61017683Spst		a &= b;
61117683Spst		break;
61217683Spst
61317683Spst	case BPF_OR:
61417683Spst		a |= b;
61517683Spst		break;
61617683Spst
61717683Spst	case BPF_LSH:
61817683Spst		a <<= b;
61917683Spst		break;
62017683Spst
62117683Spst	case BPF_RSH:
62217683Spst		a >>= b;
62317683Spst		break;
62417683Spst
62517683Spst	case BPF_NEG:
62617683Spst		a = -a;
62717683Spst		break;
62817683Spst
62917683Spst	default:
63017683Spst		abort();
63117683Spst	}
63217683Spst	s->k = a;
63317683Spst	s->code = BPF_LD|BPF_IMM;
63417683Spst	done = 0;
63517683Spst}
63617683Spst
63717683Spststatic inline struct slist *
638251129Sdelphijthis_op(struct slist *s)
63917683Spst{
64017683Spst	while (s != 0 && s->s.code == NOP)
64117683Spst		s = s->next;
64217683Spst	return s;
64317683Spst}
64417683Spst
64517683Spststatic void
646251129Sdelphijopt_not(struct block *b)
64717683Spst{
64817683Spst	struct block *tmp = JT(b);
64917683Spst
65017683Spst	JT(b) = JF(b);
65117683Spst	JF(b) = tmp;
65217683Spst}
65317683Spst
65417683Spststatic void
655251129Sdelphijopt_peep(struct block *b)
65617683Spst{
65717683Spst	struct slist *s;
65817683Spst	struct slist *next, *last;
65917683Spst	int val;
66017683Spst
66117683Spst	s = b->stmts;
66217683Spst	if (s == 0)
66317683Spst		return;
66417683Spst
66517683Spst	last = s;
666146768Ssam	for (/*empty*/; /*empty*/; s = next) {
667146768Ssam		/*
668146768Ssam		 * Skip over nops.
669146768Ssam		 */
67017683Spst		s = this_op(s);
67117683Spst		if (s == 0)
672146768Ssam			break;	/* nothing left in the block */
673146768Ssam
674146768Ssam		/*
675146768Ssam		 * Find the next real instruction after that one
676146768Ssam		 * (skipping nops).
677146768Ssam		 */
67817683Spst		next = this_op(s->next);
67917683Spst		if (next == 0)
680146768Ssam			break;	/* no next instruction */
68117683Spst		last = next;
68217683Spst
68317683Spst		/*
68417683Spst		 * st  M[k]	-->	st  M[k]
68517683Spst		 * ldx M[k]		tax
68617683Spst		 */
68717683Spst		if (s->s.code == BPF_ST &&
68817683Spst		    next->s.code == (BPF_LDX|BPF_MEM) &&
68917683Spst		    s->s.k == next->s.k) {
69017683Spst			done = 0;
69117683Spst			next->s.code = BPF_MISC|BPF_TAX;
69217683Spst		}
69317683Spst		/*
69417683Spst		 * ld  #k	-->	ldx  #k
69517683Spst		 * tax			txa
69617683Spst		 */
69717683Spst		if (s->s.code == (BPF_LD|BPF_IMM) &&
69817683Spst		    next->s.code == (BPF_MISC|BPF_TAX)) {
69917683Spst			s->s.code = BPF_LDX|BPF_IMM;
70017683Spst			next->s.code = BPF_MISC|BPF_TXA;
70117683Spst			done = 0;
70217683Spst		}
70317683Spst		/*
70417683Spst		 * This is an ugly special case, but it happens
70517683Spst		 * when you say tcp[k] or udp[k] where k is a constant.
70617683Spst		 */
70717683Spst		if (s->s.code == (BPF_LD|BPF_IMM)) {
70817683Spst			struct slist *add, *tax, *ild;
70917683Spst
71017683Spst			/*
71117683Spst			 * Check that X isn't used on exit from this
71217683Spst			 * block (which the optimizer might cause).
71317683Spst			 * We know the code generator won't generate
71417683Spst			 * any local dependencies.
71517683Spst			 */
71617683Spst			if (ATOMELEM(b->out_use, X_ATOM))
717146768Ssam				continue;
71817683Spst
719146768Ssam			/*
720146768Ssam			 * Check that the instruction following the ldi
721146768Ssam			 * is an addx, or it's an ldxms with an addx
722146768Ssam			 * following it (with 0 or more nops between the
723146768Ssam			 * ldxms and addx).
724146768Ssam			 */
72517683Spst			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
72617683Spst				add = next;
72717683Spst			else
72817683Spst				add = this_op(next->next);
72917683Spst			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
730146768Ssam				continue;
73117683Spst
732146768Ssam			/*
733146768Ssam			 * Check that a tax follows that (with 0 or more
734146768Ssam			 * nops between them).
735146768Ssam			 */
73617683Spst			tax = this_op(add->next);
73717683Spst			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
738146768Ssam				continue;
73917683Spst
740146768Ssam			/*
741146768Ssam			 * Check that an ild follows that (with 0 or more
742146768Ssam			 * nops between them).
743146768Ssam			 */
74417683Spst			ild = this_op(tax->next);
74517683Spst			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
74617683Spst			    BPF_MODE(ild->s.code) != BPF_IND)
747146768Ssam				continue;
74817683Spst			/*
74917683Spst			 * We want to turn this sequence:
75017683Spst			 *
75117683Spst			 * (004) ldi     #0x2		{s}
75217683Spst			 * (005) ldxms   [14]		{next}  -- optional
75317683Spst			 * (006) addx			{add}
75417683Spst			 * (007) tax			{tax}
75517683Spst			 * (008) ild     [x+0]		{ild}
75617683Spst			 *
75717683Spst			 * into this sequence:
75817683Spst			 *
75917683Spst			 * (004) nop
76017683Spst			 * (005) ldxms   [14]
76117683Spst			 * (006) nop
76217683Spst			 * (007) nop
76317683Spst			 * (008) ild     [x+2]
76417683Spst			 *
765146768Ssam			 * XXX We need to check that X is not
766146768Ssam			 * subsequently used, because we want to change
767146768Ssam			 * what'll be in it after this sequence.
768146768Ssam			 *
769146768Ssam			 * We know we can eliminate the accumulator
770146768Ssam			 * modifications earlier in the sequence since
771146768Ssam			 * it is defined by the last stmt of this sequence
772146768Ssam			 * (i.e., the last statement of the sequence loads
773146768Ssam			 * a value into the accumulator, so we can eliminate
774146768Ssam			 * earlier operations on the accumulator).
77517683Spst			 */
77617683Spst			ild->s.k += s->s.k;
77717683Spst			s->s.code = NOP;
77817683Spst			add->s.code = NOP;
77917683Spst			tax->s.code = NOP;
78017683Spst			done = 0;
78117683Spst		}
78217683Spst	}
78317683Spst	/*
784146768Ssam	 * If the comparison at the end of a block is an equality
785146768Ssam	 * comparison against a constant, and nobody uses the value
786146768Ssam	 * we leave in the A register at the end of a block, and
787146768Ssam	 * the operation preceding the comparison is an arithmetic
788146768Ssam	 * operation, we can sometime optimize it away.
78917683Spst	 */
790146768Ssam	if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
791146768Ssam	    !ATOMELEM(b->out_use, A_ATOM)) {
792146768Ssam	    	/*
793146768Ssam	    	 * We can optimize away certain subtractions of the
794146768Ssam	    	 * X register.
795146768Ssam	    	 */
796146768Ssam		if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
797127664Sbms			val = b->val[X_ATOM];
798127664Sbms			if (vmap[val].is_const) {
799127664Sbms				/*
800146768Ssam				 * If we have a subtract to do a comparison,
801146768Ssam				 * and the X register is a known constant,
802146768Ssam				 * we can merge this value into the
803146768Ssam				 * comparison:
804146768Ssam				 *
805127664Sbms				 * sub x  ->	nop
806127664Sbms				 * jeq #y	jeq #(x+y)
807127664Sbms				 */
808127664Sbms				b->s.k += vmap[val].const_val;
809127664Sbms				last->s.code = NOP;
810127664Sbms				done = 0;
811127664Sbms			} else if (b->s.k == 0) {
812127664Sbms				/*
813146768Ssam				 * If the X register isn't a constant,
814146768Ssam				 * and the comparison in the test is
815146768Ssam				 * against 0, we can compare with the
816146768Ssam				 * X register, instead:
817146768Ssam				 *
818146768Ssam				 * sub x  ->	nop
819146768Ssam				 * jeq #0	jeq x
820127664Sbms				 */
821127664Sbms				last->s.code = NOP;
822146768Ssam				b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
823127664Sbms				done = 0;
824127664Sbms			}
825127664Sbms		}
826127664Sbms		/*
827146768Ssam		 * Likewise, a constant subtract can be simplified:
828146768Ssam		 *
829146768Ssam		 * sub #x ->	nop
830146768Ssam		 * jeq #y ->	jeq #(x+y)
831127664Sbms		 */
832146768Ssam		else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
83317683Spst			last->s.code = NOP;
834127664Sbms			b->s.k += last->s.k;
83517683Spst			done = 0;
83617683Spst		}
837146768Ssam		/*
838146768Ssam		 * And, similarly, a constant AND can be simplified
839146768Ssam		 * if we're testing against 0, i.e.:
840146768Ssam		 *
841146768Ssam		 * and #k	nop
842146768Ssam		 * jeq #0  ->	jset #k
843146768Ssam		 */
844146768Ssam		else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
845146768Ssam		    b->s.k == 0) {
846146768Ssam			b->s.k = last->s.k;
847146768Ssam			b->s.code = BPF_JMP|BPF_K|BPF_JSET;
848146768Ssam			last->s.code = NOP;
849146768Ssam			done = 0;
850146768Ssam			opt_not(b);
851146768Ssam		}
85217683Spst	}
85317683Spst	/*
854127664Sbms	 * jset #0        ->   never
855127664Sbms	 * jset #ffffffff ->   always
856127664Sbms	 */
857127664Sbms	if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
858127664Sbms		if (b->s.k == 0)
859127664Sbms			JT(b) = JF(b);
860127664Sbms		if (b->s.k == 0xffffffff)
861127664Sbms			JF(b) = JT(b);
862127664Sbms	}
863127664Sbms	/*
864190225Srpaulo	 * If we're comparing against the index register, and the index
865190225Srpaulo	 * register is a known constant, we can just compare against that
866190225Srpaulo	 * constant.
867190225Srpaulo	 */
868190225Srpaulo	val = b->val[X_ATOM];
869190225Srpaulo	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
870190225Srpaulo		bpf_int32 v = vmap[val].const_val;
871190225Srpaulo		b->s.code &= ~BPF_X;
872190225Srpaulo		b->s.k = v;
873190225Srpaulo	}
874190225Srpaulo	/*
87517683Spst	 * If the accumulator is a known constant, we can compute the
87617683Spst	 * comparison result.
87717683Spst	 */
87817683Spst	val = b->val[A_ATOM];
87917683Spst	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
88017683Spst		bpf_int32 v = vmap[val].const_val;
88117683Spst		switch (BPF_OP(b->s.code)) {
88217683Spst
88317683Spst		case BPF_JEQ:
88417683Spst			v = v == b->s.k;
88517683Spst			break;
88617683Spst
88717683Spst		case BPF_JGT:
88817683Spst			v = (unsigned)v > b->s.k;
88917683Spst			break;
89017683Spst
89117683Spst		case BPF_JGE:
89217683Spst			v = (unsigned)v >= b->s.k;
89317683Spst			break;
89417683Spst
89517683Spst		case BPF_JSET:
89617683Spst			v &= b->s.k;
89717683Spst			break;
89817683Spst
89917683Spst		default:
90017683Spst			abort();
90117683Spst		}
90217683Spst		if (JF(b) != JT(b))
90317683Spst			done = 0;
90417683Spst		if (v)
90517683Spst			JF(b) = JT(b);
90617683Spst		else
90717683Spst			JT(b) = JF(b);
90817683Spst	}
90917683Spst}
91017683Spst
91117683Spst/*
91217683Spst * Compute the symbolic value of expression of 's', and update
91317683Spst * anything it defines in the value table 'val'.  If 'alter' is true,
91417683Spst * do various optimizations.  This code would be cleaner if symbolic
91517683Spst * evaluation and code transformations weren't folded together.
91617683Spst */
91717683Spststatic void
918251129Sdelphijopt_stmt(struct stmt *s, int val[], int alter)
91917683Spst{
92017683Spst	int op;
92117683Spst	int v;
92217683Spst
92317683Spst	switch (s->code) {
92417683Spst
92517683Spst	case BPF_LD|BPF_ABS|BPF_W:
92617683Spst	case BPF_LD|BPF_ABS|BPF_H:
92717683Spst	case BPF_LD|BPF_ABS|BPF_B:
92817683Spst		v = F(s->code, s->k, 0L);
92917683Spst		vstore(s, &val[A_ATOM], v, alter);
93017683Spst		break;
93117683Spst
93217683Spst	case BPF_LD|BPF_IND|BPF_W:
93317683Spst	case BPF_LD|BPF_IND|BPF_H:
93417683Spst	case BPF_LD|BPF_IND|BPF_B:
93517683Spst		v = val[X_ATOM];
93617683Spst		if (alter && vmap[v].is_const) {
93717683Spst			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
93817683Spst			s->k += vmap[v].const_val;
93917683Spst			v = F(s->code, s->k, 0L);
94017683Spst			done = 0;
94117683Spst		}
94217683Spst		else
94317683Spst			v = F(s->code, s->k, v);
94417683Spst		vstore(s, &val[A_ATOM], v, alter);
94517683Spst		break;
94617683Spst
94717683Spst	case BPF_LD|BPF_LEN:
94817683Spst		v = F(s->code, 0L, 0L);
94917683Spst		vstore(s, &val[A_ATOM], v, alter);
95017683Spst		break;
95117683Spst
95217683Spst	case BPF_LD|BPF_IMM:
95317683Spst		v = K(s->k);
95417683Spst		vstore(s, &val[A_ATOM], v, alter);
95517683Spst		break;
95617683Spst
95717683Spst	case BPF_LDX|BPF_IMM:
95817683Spst		v = K(s->k);
95917683Spst		vstore(s, &val[X_ATOM], v, alter);
96017683Spst		break;
96117683Spst
96217683Spst	case BPF_LDX|BPF_MSH|BPF_B:
96317683Spst		v = F(s->code, s->k, 0L);
96417683Spst		vstore(s, &val[X_ATOM], v, alter);
96517683Spst		break;
96617683Spst
96717683Spst	case BPF_ALU|BPF_NEG:
96817683Spst		if (alter && vmap[val[A_ATOM]].is_const) {
96917683Spst			s->code = BPF_LD|BPF_IMM;
97017683Spst			s->k = -vmap[val[A_ATOM]].const_val;
97117683Spst			val[A_ATOM] = K(s->k);
97217683Spst		}
97317683Spst		else
97417683Spst			val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
97517683Spst		break;
97617683Spst
97717683Spst	case BPF_ALU|BPF_ADD|BPF_K:
97817683Spst	case BPF_ALU|BPF_SUB|BPF_K:
97917683Spst	case BPF_ALU|BPF_MUL|BPF_K:
98017683Spst	case BPF_ALU|BPF_DIV|BPF_K:
98117683Spst	case BPF_ALU|BPF_AND|BPF_K:
98217683Spst	case BPF_ALU|BPF_OR|BPF_K:
98317683Spst	case BPF_ALU|BPF_LSH|BPF_K:
98417683Spst	case BPF_ALU|BPF_RSH|BPF_K:
98517683Spst		op = BPF_OP(s->code);
98617683Spst		if (alter) {
98717683Spst			if (s->k == 0) {
98898530Sfenner				/* don't optimize away "sub #0"
98998530Sfenner				 * as it may be needed later to
99098530Sfenner				 * fixup the generated math code */
99198530Sfenner				if (op == BPF_ADD ||
99217683Spst				    op == BPF_LSH || op == BPF_RSH ||
99317683Spst				    op == BPF_OR) {
99417683Spst					s->code = NOP;
99517683Spst					break;
99617683Spst				}
99717683Spst				if (op == BPF_MUL || op == BPF_AND) {
99817683Spst					s->code = BPF_LD|BPF_IMM;
99917683Spst					val[A_ATOM] = K(s->k);
100017683Spst					break;
100117683Spst				}
100217683Spst			}
100317683Spst			if (vmap[val[A_ATOM]].is_const) {
100417683Spst				fold_op(s, val[A_ATOM], K(s->k));
100517683Spst				val[A_ATOM] = K(s->k);
100617683Spst				break;
100717683Spst			}
100817683Spst		}
100917683Spst		val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
101017683Spst		break;
101117683Spst
101217683Spst	case BPF_ALU|BPF_ADD|BPF_X:
101317683Spst	case BPF_ALU|BPF_SUB|BPF_X:
101417683Spst	case BPF_ALU|BPF_MUL|BPF_X:
101517683Spst	case BPF_ALU|BPF_DIV|BPF_X:
101617683Spst	case BPF_ALU|BPF_AND|BPF_X:
101717683Spst	case BPF_ALU|BPF_OR|BPF_X:
101817683Spst	case BPF_ALU|BPF_LSH|BPF_X:
101917683Spst	case BPF_ALU|BPF_RSH|BPF_X:
102017683Spst		op = BPF_OP(s->code);
102117683Spst		if (alter && vmap[val[X_ATOM]].is_const) {
102217683Spst			if (vmap[val[A_ATOM]].is_const) {
102317683Spst				fold_op(s, val[A_ATOM], val[X_ATOM]);
102417683Spst				val[A_ATOM] = K(s->k);
102517683Spst			}
102617683Spst			else {
102717683Spst				s->code = BPF_ALU|BPF_K|op;
102817683Spst				s->k = vmap[val[X_ATOM]].const_val;
102917683Spst				done = 0;
103017683Spst				val[A_ATOM] =
103117683Spst					F(s->code, val[A_ATOM], K(s->k));
103217683Spst			}
103317683Spst			break;
103417683Spst		}
103517683Spst		/*
103617683Spst		 * Check if we're doing something to an accumulator
103717683Spst		 * that is 0, and simplify.  This may not seem like
103817683Spst		 * much of a simplification but it could open up further
103917683Spst		 * optimizations.
1040127664Sbms		 * XXX We could also check for mul by 1, etc.
104117683Spst		 */
104217683Spst		if (alter && vmap[val[A_ATOM]].is_const
104317683Spst		    && vmap[val[A_ATOM]].const_val == 0) {
1044127664Sbms			if (op == BPF_ADD || op == BPF_OR) {
104517683Spst				s->code = BPF_MISC|BPF_TXA;
104617683Spst				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
104717683Spst				break;
104817683Spst			}
104917683Spst			else if (op == BPF_MUL || op == BPF_DIV ||
1050127664Sbms				 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
105117683Spst				s->code = BPF_LD|BPF_IMM;
105217683Spst				s->k = 0;
105317683Spst				vstore(s, &val[A_ATOM], K(s->k), alter);
105417683Spst				break;
105517683Spst			}
105617683Spst			else if (op == BPF_NEG) {
105717683Spst				s->code = NOP;
105817683Spst				break;
105917683Spst			}
106017683Spst		}
106117683Spst		val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
106217683Spst		break;
106317683Spst
106417683Spst	case BPF_MISC|BPF_TXA:
106517683Spst		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
106617683Spst		break;
106717683Spst
106817683Spst	case BPF_LD|BPF_MEM:
106917683Spst		v = val[s->k];
107017683Spst		if (alter && vmap[v].is_const) {
107117683Spst			s->code = BPF_LD|BPF_IMM;
107217683Spst			s->k = vmap[v].const_val;
107317683Spst			done = 0;
107417683Spst		}
107517683Spst		vstore(s, &val[A_ATOM], v, alter);
107617683Spst		break;
107717683Spst
107817683Spst	case BPF_MISC|BPF_TAX:
107917683Spst		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
108017683Spst		break;
108117683Spst
108217683Spst	case BPF_LDX|BPF_MEM:
108317683Spst		v = val[s->k];
108417683Spst		if (alter && vmap[v].is_const) {
108517683Spst			s->code = BPF_LDX|BPF_IMM;
108617683Spst			s->k = vmap[v].const_val;
108717683Spst			done = 0;
108817683Spst		}
108917683Spst		vstore(s, &val[X_ATOM], v, alter);
109017683Spst		break;
109117683Spst
109217683Spst	case BPF_ST:
109317683Spst		vstore(s, &val[s->k], val[A_ATOM], alter);
109417683Spst		break;
109517683Spst
109617683Spst	case BPF_STX:
109717683Spst		vstore(s, &val[s->k], val[X_ATOM], alter);
109817683Spst		break;
109917683Spst	}
110017683Spst}
110117683Spst
110217683Spststatic void
1103251129Sdelphijdeadstmt(register struct stmt *s, register struct stmt *last[])
110417683Spst{
110517683Spst	register int atom;
110617683Spst
110717683Spst	atom = atomuse(s);
110817683Spst	if (atom >= 0) {
110917683Spst		if (atom == AX_ATOM) {
111017683Spst			last[X_ATOM] = 0;
111117683Spst			last[A_ATOM] = 0;
111217683Spst		}
111317683Spst		else
111417683Spst			last[atom] = 0;
111517683Spst	}
111617683Spst	atom = atomdef(s);
111717683Spst	if (atom >= 0) {
111817683Spst		if (last[atom]) {
111917683Spst			done = 0;
112017683Spst			last[atom]->code = NOP;
112117683Spst		}
112217683Spst		last[atom] = s;
112317683Spst	}
112417683Spst}
112517683Spst
112617683Spststatic void
1127251129Sdelphijopt_deadstores(register struct block *b)
112817683Spst{
112917683Spst	register struct slist *s;
113017683Spst	register int atom;
113117683Spst	struct stmt *last[N_ATOMS];
113217683Spst
113317683Spst	memset((char *)last, 0, sizeof last);
113417683Spst
113517683Spst	for (s = b->stmts; s != 0; s = s->next)
113617683Spst		deadstmt(&s->s, last);
113717683Spst	deadstmt(&b->s, last);
113817683Spst
113917683Spst	for (atom = 0; atom < N_ATOMS; ++atom)
114017683Spst		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
114117683Spst			last[atom]->code = NOP;
114217683Spst			done = 0;
114317683Spst		}
114417683Spst}
114517683Spst
114617683Spststatic void
1147251129Sdelphijopt_blk(struct block *b, int do_stmts)
114817683Spst{
114917683Spst	struct slist *s;
115017683Spst	struct edge *p;
115117683Spst	int i;
1152146768Ssam	bpf_int32 aval, xval;
115317683Spst
115456889Sfenner#if 0
115556889Sfenner	for (s = b->stmts; s && s->next; s = s->next)
115656889Sfenner		if (BPF_CLASS(s->s.code) == BPF_JMP) {
115756889Sfenner			do_stmts = 0;
115856889Sfenner			break;
115956889Sfenner		}
116056889Sfenner#endif
116156889Sfenner
116217683Spst	/*
116317683Spst	 * Initialize the atom values.
116417683Spst	 */
116517683Spst	p = b->in_edges;
1166146768Ssam	if (p == 0) {
1167146768Ssam		/*
1168146768Ssam		 * We have no predecessors, so everything is undefined
1169146768Ssam		 * upon entry to this block.
1170146768Ssam		 */
117117683Spst		memset((char *)b->val, 0, sizeof(b->val));
1172146768Ssam	} else {
1173146768Ssam		/*
1174146768Ssam		 * Inherit values from our predecessors.
1175146768Ssam		 *
1176146768Ssam		 * First, get the values from the predecessor along the
1177146768Ssam		 * first edge leading to this node.
1178146768Ssam		 */
117917683Spst		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1180146768Ssam		/*
1181146768Ssam		 * Now look at all the other nodes leading to this node.
1182146768Ssam		 * If, for the predecessor along that edge, a register
1183146768Ssam		 * has a different value from the one we have (i.e.,
1184146768Ssam		 * control paths are merging, and the merging paths
1185146768Ssam		 * assign different values to that register), give the
1186146768Ssam		 * register the undefined value of 0.
1187146768Ssam		 */
118817683Spst		while ((p = p->next) != NULL) {
118917683Spst			for (i = 0; i < N_ATOMS; ++i)
119017683Spst				if (b->val[i] != p->pred->val[i])
119117683Spst					b->val[i] = 0;
119217683Spst		}
119317683Spst	}
119417683Spst	aval = b->val[A_ATOM];
1195146768Ssam	xval = b->val[X_ATOM];
119617683Spst	for (s = b->stmts; s; s = s->next)
119717683Spst		opt_stmt(&s->s, b->val, do_stmts);
119817683Spst
119917683Spst	/*
120017683Spst	 * This is a special case: if we don't use anything from this
1201146768Ssam	 * block, and we load the accumulator or index register with a
1202146768Ssam	 * value that is already there, or if this block is a return,
120317683Spst	 * eliminate all the statements.
1204146768Ssam	 *
1205146768Ssam	 * XXX - what if it does a store?
1206146768Ssam	 *
1207146768Ssam	 * XXX - why does it matter whether we use anything from this
1208146768Ssam	 * block?  If the accumulator or index register doesn't change
1209146768Ssam	 * its value, isn't that OK even if we use that value?
1210146768Ssam	 *
1211146768Ssam	 * XXX - if we load the accumulator with a different value,
1212146768Ssam	 * and the block ends with a conditional branch, we obviously
1213146768Ssam	 * can't eliminate it, as the branch depends on that value.
1214146768Ssam	 * For the index register, the conditional branch only depends
1215146768Ssam	 * on the index register value if the test is against the index
1216146768Ssam	 * register value rather than a constant; if nothing uses the
1217146768Ssam	 * value we put into the index register, and we're not testing
1218146768Ssam	 * against the index register's value, and there aren't any
1219146768Ssam	 * other problems that would keep us from eliminating this
1220146768Ssam	 * block, can we eliminate it?
122117683Spst	 */
1222127664Sbms	if (do_stmts &&
1223146768Ssam	    ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1224146768Ssam	      xval != 0 && b->val[X_ATOM] == xval) ||
122517683Spst	     BPF_CLASS(b->s.code) == BPF_RET)) {
122617683Spst		if (b->stmts != 0) {
122717683Spst			b->stmts = 0;
122817683Spst			done = 0;
122917683Spst		}
123017683Spst	} else {
123117683Spst		opt_peep(b);
123217683Spst		opt_deadstores(b);
123317683Spst	}
123417683Spst	/*
123517683Spst	 * Set up values for branch optimizer.
123617683Spst	 */
123717683Spst	if (BPF_SRC(b->s.code) == BPF_K)
123817683Spst		b->oval = K(b->s.k);
123917683Spst	else
124017683Spst		b->oval = b->val[X_ATOM];
124117683Spst	b->et.code = b->s.code;
124217683Spst	b->ef.code = -b->s.code;
124317683Spst}
124417683Spst
124517683Spst/*
124617683Spst * Return true if any register that is used on exit from 'succ', has
124717683Spst * an exit value that is different from the corresponding exit value
124817683Spst * from 'b'.
124917683Spst */
125017683Spststatic int
1251251129Sdelphijuse_conflict(struct block *b, struct block *succ)
125217683Spst{
125317683Spst	int atom;
125417683Spst	atomset use = succ->out_use;
125517683Spst
125617683Spst	if (use == 0)
125717683Spst		return 0;
125817683Spst
125917683Spst	for (atom = 0; atom < N_ATOMS; ++atom)
126017683Spst		if (ATOMELEM(use, atom))
126117683Spst			if (b->val[atom] != succ->val[atom])
126217683Spst				return 1;
126317683Spst	return 0;
126417683Spst}
126517683Spst
126617683Spststatic struct block *
1267251129Sdelphijfold_edge(struct block *child, struct edge *ep)
126817683Spst{
126917683Spst	int sense;
127017683Spst	int aval0, aval1, oval0, oval1;
127117683Spst	int code = ep->code;
127217683Spst
127317683Spst	if (code < 0) {
127417683Spst		code = -code;
127517683Spst		sense = 0;
127617683Spst	} else
127717683Spst		sense = 1;
127817683Spst
127917683Spst	if (child->s.code != code)
128017683Spst		return 0;
128117683Spst
128217683Spst	aval0 = child->val[A_ATOM];
128317683Spst	oval0 = child->oval;
128417683Spst	aval1 = ep->pred->val[A_ATOM];
128517683Spst	oval1 = ep->pred->oval;
128617683Spst
128717683Spst	if (aval0 != aval1)
128817683Spst		return 0;
128917683Spst
129017683Spst	if (oval0 == oval1)
129117683Spst		/*
1292146768Ssam		 * The operands of the branch instructions are
1293146768Ssam		 * identical, so the result is true if a true
1294146768Ssam		 * branch was taken to get here, otherwise false.
129517683Spst		 */
129617683Spst		return sense ? JT(child) : JF(child);
129717683Spst
129817683Spst	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
129917683Spst		/*
130017683Spst		 * At this point, we only know the comparison if we
130117683Spst		 * came down the true branch, and it was an equality
1302146768Ssam		 * comparison with a constant.
1303146768Ssam		 *
1304146768Ssam		 * I.e., if we came down the true branch, and the branch
1305146768Ssam		 * was an equality comparison with a constant, we know the
1306146768Ssam		 * accumulator contains that constant.  If we came down
1307146768Ssam		 * the false branch, or the comparison wasn't with a
1308146768Ssam		 * constant, we don't know what was in the accumulator.
1309146768Ssam		 *
1310146768Ssam		 * We rely on the fact that distinct constants have distinct
1311146768Ssam		 * value numbers.
131217683Spst		 */
131317683Spst		return JF(child);
131417683Spst
131517683Spst	return 0;
131617683Spst}
131717683Spst
131817683Spststatic void
1319251129Sdelphijopt_j(struct edge *ep)
132017683Spst{
132117683Spst	register int i, k;
132217683Spst	register struct block *target;
132317683Spst
132417683Spst	if (JT(ep->succ) == 0)
132517683Spst		return;
132617683Spst
132717683Spst	if (JT(ep->succ) == JF(ep->succ)) {
132817683Spst		/*
132917683Spst		 * Common branch targets can be eliminated, provided
133017683Spst		 * there is no data dependency.
133117683Spst		 */
133217683Spst		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
133317683Spst			done = 0;
133417683Spst			ep->succ = JT(ep->succ);
133517683Spst		}
133617683Spst	}
133717683Spst	/*
133817683Spst	 * For each edge dominator that matches the successor of this
133917683Spst	 * edge, promote the edge successor to the its grandchild.
134017683Spst	 *
134117683Spst	 * XXX We violate the set abstraction here in favor a reasonably
134217683Spst	 * efficient loop.
134317683Spst	 */
134417683Spst top:
134517683Spst	for (i = 0; i < edgewords; ++i) {
134617683Spst		register bpf_u_int32 x = ep->edom[i];
134717683Spst
134817683Spst		while (x != 0) {
134917683Spst			k = ffs(x) - 1;
135017683Spst			x &=~ (1 << k);
135117683Spst			k += i * BITS_PER_WORD;
135217683Spst
135317683Spst			target = fold_edge(ep->succ, edges[k]);
135417683Spst			/*
135517683Spst			 * Check that there is no data dependency between
135617683Spst			 * nodes that will be violated if we move the edge.
135717683Spst			 */
135817683Spst			if (target != 0 && !use_conflict(ep->pred, target)) {
135917683Spst				done = 0;
136017683Spst				ep->succ = target;
136117683Spst				if (JT(target) != 0)
136217683Spst					/*
136317683Spst					 * Start over unless we hit a leaf.
136417683Spst					 */
136517683Spst					goto top;
136617683Spst				return;
136717683Spst			}
136817683Spst		}
136917683Spst	}
137017683Spst}
137117683Spst
137217683Spst
137317683Spststatic void
1374251129Sdelphijor_pullup(struct block *b)
137517683Spst{
137617683Spst	int val, at_top;
137717683Spst	struct block *pull;
137817683Spst	struct block **diffp, **samep;
137917683Spst	struct edge *ep;
138017683Spst
138117683Spst	ep = b->in_edges;
138217683Spst	if (ep == 0)
138317683Spst		return;
138417683Spst
138517683Spst	/*
138617683Spst	 * Make sure each predecessor loads the same value.
138717683Spst	 * XXX why?
138817683Spst	 */
138917683Spst	val = ep->pred->val[A_ATOM];
139017683Spst	for (ep = ep->next; ep != 0; ep = ep->next)
139117683Spst		if (val != ep->pred->val[A_ATOM])
139217683Spst			return;
139317683Spst
139417683Spst	if (JT(b->in_edges->pred) == b)
139517683Spst		diffp = &JT(b->in_edges->pred);
139617683Spst	else
139717683Spst		diffp = &JF(b->in_edges->pred);
139817683Spst
139917683Spst	at_top = 1;
140017683Spst	while (1) {
140117683Spst		if (*diffp == 0)
140217683Spst			return;
140317683Spst
140417683Spst		if (JT(*diffp) != JT(b))
140517683Spst			return;
140617683Spst
140717683Spst		if (!SET_MEMBER((*diffp)->dom, b->id))
140817683Spst			return;
140917683Spst
141017683Spst		if ((*diffp)->val[A_ATOM] != val)
141117683Spst			break;
141217683Spst
141317683Spst		diffp = &JF(*diffp);
141417683Spst		at_top = 0;
141517683Spst	}
141617683Spst	samep = &JF(*diffp);
141717683Spst	while (1) {
141817683Spst		if (*samep == 0)
141917683Spst			return;
142017683Spst
142117683Spst		if (JT(*samep) != JT(b))
142217683Spst			return;
142317683Spst
142417683Spst		if (!SET_MEMBER((*samep)->dom, b->id))
142517683Spst			return;
142617683Spst
142717683Spst		if ((*samep)->val[A_ATOM] == val)
142817683Spst			break;
142917683Spst
143017683Spst		/* XXX Need to check that there are no data dependencies
143117683Spst		   between dp0 and dp1.  Currently, the code generator
143217683Spst		   will not produce such dependencies. */
143317683Spst		samep = &JF(*samep);
143417683Spst	}
143517683Spst#ifdef notdef
143617683Spst	/* XXX This doesn't cover everything. */
143717683Spst	for (i = 0; i < N_ATOMS; ++i)
143817683Spst		if ((*samep)->val[i] != pred->val[i])
143917683Spst			return;
144017683Spst#endif
144117683Spst	/* Pull up the node. */
144217683Spst	pull = *samep;
144317683Spst	*samep = JF(pull);
144417683Spst	JF(pull) = *diffp;
144517683Spst
144617683Spst	/*
144717683Spst	 * At the top of the chain, each predecessor needs to point at the
144817683Spst	 * pulled up node.  Inside the chain, there is only one predecessor
144917683Spst	 * to worry about.
145017683Spst	 */
145117683Spst	if (at_top) {
145217683Spst		for (ep = b->in_edges; ep != 0; ep = ep->next) {
145317683Spst			if (JT(ep->pred) == b)
145417683Spst				JT(ep->pred) = pull;
145517683Spst			else
145617683Spst				JF(ep->pred) = pull;
145717683Spst		}
145817683Spst	}
145917683Spst	else
146017683Spst		*diffp = pull;
146117683Spst
146217683Spst	done = 0;
146317683Spst}
146417683Spst
146517683Spststatic void
1466251129Sdelphijand_pullup(struct block *b)
146717683Spst{
146817683Spst	int val, at_top;
146917683Spst	struct block *pull;
147017683Spst	struct block **diffp, **samep;
147117683Spst	struct edge *ep;
147217683Spst
147317683Spst	ep = b->in_edges;
147417683Spst	if (ep == 0)
147517683Spst		return;
147617683Spst
147717683Spst	/*
147817683Spst	 * Make sure each predecessor loads the same value.
147917683Spst	 */
148017683Spst	val = ep->pred->val[A_ATOM];
148117683Spst	for (ep = ep->next; ep != 0; ep = ep->next)
148217683Spst		if (val != ep->pred->val[A_ATOM])
148317683Spst			return;
148417683Spst
148517683Spst	if (JT(b->in_edges->pred) == b)
148617683Spst		diffp = &JT(b->in_edges->pred);
148717683Spst	else
148817683Spst		diffp = &JF(b->in_edges->pred);
148917683Spst
149017683Spst	at_top = 1;
149117683Spst	while (1) {
149217683Spst		if (*diffp == 0)
149317683Spst			return;
149417683Spst
149517683Spst		if (JF(*diffp) != JF(b))
149617683Spst			return;
149717683Spst
149817683Spst		if (!SET_MEMBER((*diffp)->dom, b->id))
149917683Spst			return;
150017683Spst
150117683Spst		if ((*diffp)->val[A_ATOM] != val)
150217683Spst			break;
150317683Spst
150417683Spst		diffp = &JT(*diffp);
150517683Spst		at_top = 0;
150617683Spst	}
150717683Spst	samep = &JT(*diffp);
150817683Spst	while (1) {
150917683Spst		if (*samep == 0)
151017683Spst			return;
151117683Spst
151217683Spst		if (JF(*samep) != JF(b))
151317683Spst			return;
151417683Spst
151517683Spst		if (!SET_MEMBER((*samep)->dom, b->id))
151617683Spst			return;
151717683Spst
151817683Spst		if ((*samep)->val[A_ATOM] == val)
151917683Spst			break;
152017683Spst
152117683Spst		/* XXX Need to check that there are no data dependencies
152217683Spst		   between diffp and samep.  Currently, the code generator
152317683Spst		   will not produce such dependencies. */
152417683Spst		samep = &JT(*samep);
152517683Spst	}
152617683Spst#ifdef notdef
152717683Spst	/* XXX This doesn't cover everything. */
152817683Spst	for (i = 0; i < N_ATOMS; ++i)
152917683Spst		if ((*samep)->val[i] != pred->val[i])
153017683Spst			return;
153117683Spst#endif
153217683Spst	/* Pull up the node. */
153317683Spst	pull = *samep;
153417683Spst	*samep = JT(pull);
153517683Spst	JT(pull) = *diffp;
153617683Spst
153717683Spst	/*
153817683Spst	 * At the top of the chain, each predecessor needs to point at the
153917683Spst	 * pulled up node.  Inside the chain, there is only one predecessor
154017683Spst	 * to worry about.
154117683Spst	 */
154217683Spst	if (at_top) {
154317683Spst		for (ep = b->in_edges; ep != 0; ep = ep->next) {
154417683Spst			if (JT(ep->pred) == b)
154517683Spst				JT(ep->pred) = pull;
154617683Spst			else
154717683Spst				JF(ep->pred) = pull;
154817683Spst		}
154917683Spst	}
155017683Spst	else
155117683Spst		*diffp = pull;
155217683Spst
155317683Spst	done = 0;
155417683Spst}
155517683Spst
155617683Spststatic void
1557251129Sdelphijopt_blks(struct block *root, int do_stmts)
155817683Spst{
155917683Spst	int i, maxlevel;
156017683Spst	struct block *p;
156117683Spst
156217683Spst	init_val();
156317683Spst	maxlevel = root->level;
156475107Sfenner
156575107Sfenner	find_inedges(root);
156617683Spst	for (i = maxlevel; i >= 0; --i)
156717683Spst		for (p = levels[i]; p; p = p->link)
156817683Spst			opt_blk(p, do_stmts);
156917683Spst
157017683Spst	if (do_stmts)
157117683Spst		/*
157217683Spst		 * No point trying to move branches; it can't possibly
157317683Spst		 * make a difference at this point.
157417683Spst		 */
157517683Spst		return;
157617683Spst
157717683Spst	for (i = 1; i <= maxlevel; ++i) {
157817683Spst		for (p = levels[i]; p; p = p->link) {
157917683Spst			opt_j(&p->et);
158017683Spst			opt_j(&p->ef);
158117683Spst		}
158217683Spst	}
158375107Sfenner
158475107Sfenner	find_inedges(root);
158517683Spst	for (i = 1; i <= maxlevel; ++i) {
158617683Spst		for (p = levels[i]; p; p = p->link) {
158717683Spst			or_pullup(p);
158817683Spst			and_pullup(p);
158917683Spst		}
159017683Spst	}
159117683Spst}
159217683Spst
159317683Spststatic inline void
1594251129Sdelphijlink_inedge(struct edge *parent, struct block *child)
159517683Spst{
159617683Spst	parent->next = child->in_edges;
159717683Spst	child->in_edges = parent;
159817683Spst}
159917683Spst
160017683Spststatic void
1601251129Sdelphijfind_inedges(struct block *root)
160217683Spst{
160317683Spst	int i;
160417683Spst	struct block *b;
160517683Spst
160617683Spst	for (i = 0; i < n_blocks; ++i)
160717683Spst		blocks[i]->in_edges = 0;
160817683Spst
160917683Spst	/*
161017683Spst	 * Traverse the graph, adding each edge to the predecessor
161117683Spst	 * list of its successors.  Skip the leaves (i.e. level 0).
161217683Spst	 */
161317683Spst	for (i = root->level; i > 0; --i) {
161417683Spst		for (b = levels[i]; b != 0; b = b->link) {
161517683Spst			link_inedge(&b->et, JT(b));
161617683Spst			link_inedge(&b->ef, JF(b));
161717683Spst		}
161817683Spst	}
161917683Spst}
162017683Spst
162117683Spststatic void
1622251129Sdelphijopt_root(struct block **b)
162317683Spst{
162417683Spst	struct slist *tmp, *s;
162517683Spst
162617683Spst	s = (*b)->stmts;
162717683Spst	(*b)->stmts = 0;
162817683Spst	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
162917683Spst		*b = JT(*b);
163017683Spst
163117683Spst	tmp = (*b)->stmts;
163217683Spst	if (tmp != 0)
163317683Spst		sappend(s, tmp);
163417683Spst	(*b)->stmts = s;
163517683Spst
163617683Spst	/*
163717683Spst	 * If the root node is a return, then there is no
163817683Spst	 * point executing any statements (since the bpf machine
163917683Spst	 * has no side effects).
164017683Spst	 */
164117683Spst	if (BPF_CLASS((*b)->s.code) == BPF_RET)
164217683Spst		(*b)->stmts = 0;
164317683Spst}
164417683Spst
164517683Spststatic void
1646251129Sdelphijopt_loop(struct block *root, int do_stmts)
164717683Spst{
164817683Spst
164917683Spst#ifdef BDEBUG
165098530Sfenner	if (dflag > 1) {
165198530Sfenner		printf("opt_loop(root, %d) begin\n", do_stmts);
165217683Spst		opt_dump(root);
165398530Sfenner	}
165417683Spst#endif
165517683Spst	do {
165617683Spst		done = 1;
165717683Spst		find_levels(root);
165817683Spst		find_dom(root);
165917683Spst		find_closure(root);
166017683Spst		find_ud(root);
166117683Spst		find_edom(root);
166217683Spst		opt_blks(root, do_stmts);
166317683Spst#ifdef BDEBUG
166498530Sfenner		if (dflag > 1) {
166598530Sfenner			printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
166617683Spst			opt_dump(root);
166798530Sfenner		}
166817683Spst#endif
166917683Spst	} while (!done);
167017683Spst}
167117683Spst
167217683Spst/*
167317683Spst * Optimize the filter code in its dag representation.
167417683Spst */
167517683Spstvoid
1676251129Sdelphijbpf_optimize(struct block **rootp)
167717683Spst{
167817683Spst	struct block *root;
167917683Spst
168017683Spst	root = *rootp;
168117683Spst
168217683Spst	opt_init(root);
168317683Spst	opt_loop(root, 0);
168417683Spst	opt_loop(root, 1);
168517683Spst	intern_blocks(root);
168698530Sfenner#ifdef BDEBUG
168798530Sfenner	if (dflag > 1) {
168898530Sfenner		printf("after intern_blocks()\n");
168998530Sfenner		opt_dump(root);
169098530Sfenner	}
169198530Sfenner#endif
169217683Spst	opt_root(rootp);
169398530Sfenner#ifdef BDEBUG
169498530Sfenner	if (dflag > 1) {
169598530Sfenner		printf("after opt_root()\n");
169698530Sfenner		opt_dump(root);
169798530Sfenner	}
169898530Sfenner#endif
169917683Spst	opt_cleanup();
170017683Spst}
170117683Spst
170217683Spststatic void
1703251129Sdelphijmake_marks(struct block *p)
170417683Spst{
170517683Spst	if (!isMarked(p)) {
170617683Spst		Mark(p);
170717683Spst		if (BPF_CLASS(p->s.code) != BPF_RET) {
170817683Spst			make_marks(JT(p));
170917683Spst			make_marks(JF(p));
171017683Spst		}
171117683Spst	}
171217683Spst}
171317683Spst
171417683Spst/*
171517683Spst * Mark code array such that isMarked(i) is true
171617683Spst * only for nodes that are alive.
171717683Spst */
171817683Spststatic void
1719251129Sdelphijmark_code(struct block *p)
172017683Spst{
172117683Spst	cur_mark += 1;
172217683Spst	make_marks(p);
172317683Spst}
172417683Spst
172517683Spst/*
172617683Spst * True iff the two stmt lists load the same value from the packet into
172717683Spst * the accumulator.
172817683Spst */
172917683Spststatic int
1730251129Sdelphijeq_slist(struct slist *x, struct slist *y)
173117683Spst{
173217683Spst	while (1) {
173317683Spst		while (x && x->s.code == NOP)
173417683Spst			x = x->next;
173517683Spst		while (y && y->s.code == NOP)
173617683Spst			y = y->next;
173717683Spst		if (x == 0)
173817683Spst			return y == 0;
173917683Spst		if (y == 0)
174017683Spst			return x == 0;
174117683Spst		if (x->s.code != y->s.code || x->s.k != y->s.k)
174217683Spst			return 0;
174317683Spst		x = x->next;
174417683Spst		y = y->next;
174517683Spst	}
174617683Spst}
174717683Spst
174817683Spststatic inline int
1749251129Sdelphijeq_blk(struct block *b0, struct block *b1)
175017683Spst{
175117683Spst	if (b0->s.code == b1->s.code &&
175217683Spst	    b0->s.k == b1->s.k &&
175317683Spst	    b0->et.succ == b1->et.succ &&
175417683Spst	    b0->ef.succ == b1->ef.succ)
175517683Spst		return eq_slist(b0->stmts, b1->stmts);
175617683Spst	return 0;
175717683Spst}
175817683Spst
175917683Spststatic void
1760251129Sdelphijintern_blocks(struct block *root)
176117683Spst{
176217683Spst	struct block *p;
176317683Spst	int i, j;
1764172677Smlaier	int done1; /* don't shadow global */
176517683Spst top:
1766172677Smlaier	done1 = 1;
176717683Spst	for (i = 0; i < n_blocks; ++i)
176817683Spst		blocks[i]->link = 0;
176917683Spst
177017683Spst	mark_code(root);
177117683Spst
177217683Spst	for (i = n_blocks - 1; --i >= 0; ) {
177317683Spst		if (!isMarked(blocks[i]))
177417683Spst			continue;
177517683Spst		for (j = i + 1; j < n_blocks; ++j) {
177617683Spst			if (!isMarked(blocks[j]))
177717683Spst				continue;
177817683Spst			if (eq_blk(blocks[i], blocks[j])) {
177917683Spst				blocks[i]->link = blocks[j]->link ?
178017683Spst					blocks[j]->link : blocks[j];
178117683Spst				break;
178217683Spst			}
178317683Spst		}
178417683Spst	}
178517683Spst	for (i = 0; i < n_blocks; ++i) {
178617683Spst		p = blocks[i];
178717683Spst		if (JT(p) == 0)
178817683Spst			continue;
178917683Spst		if (JT(p)->link) {
1790172677Smlaier			done1 = 0;
179117683Spst			JT(p) = JT(p)->link;
179217683Spst		}
179317683Spst		if (JF(p)->link) {
1794172677Smlaier			done1 = 0;
179517683Spst			JF(p) = JF(p)->link;
179617683Spst		}
179717683Spst	}
1798172677Smlaier	if (!done1)
179917683Spst		goto top;
180017683Spst}
180117683Spst
180217683Spststatic void
1803251129Sdelphijopt_cleanup(void)
180417683Spst{
180517683Spst	free((void *)vnode_base);
180617683Spst	free((void *)vmap);
180717683Spst	free((void *)edges);
180817683Spst	free((void *)space);
180917683Spst	free((void *)levels);
181017683Spst	free((void *)blocks);
181117683Spst}
181217683Spst
181317683Spst/*
181417683Spst * Return the number of stmts in 's'.
181517683Spst */
1816241231Sdelphijstatic u_int
1817251129Sdelphijslength(struct slist *s)
181817683Spst{
1819241231Sdelphij	u_int n = 0;
182017683Spst
182117683Spst	for (; s; s = s->next)
182217683Spst		if (s->s.code != NOP)
182317683Spst			++n;
182417683Spst	return n;
182517683Spst}
182617683Spst
182717683Spst/*
182817683Spst * Return the number of nodes reachable by 'p'.
182917683Spst * All nodes should be initially unmarked.
183017683Spst */
183117683Spststatic int
1832251129Sdelphijcount_blocks(struct block *p)
183317683Spst{
183417683Spst	if (p == 0 || isMarked(p))
183517683Spst		return 0;
183617683Spst	Mark(p);
183717683Spst	return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
183817683Spst}
183917683Spst
184017683Spst/*
184117683Spst * Do a depth first search on the flow graph, numbering the
184217683Spst * the basic blocks, and entering them into the 'blocks' array.`
184317683Spst */
184417683Spststatic void
1845251129Sdelphijnumber_blks_r(struct block *p)
184617683Spst{
184717683Spst	int n;
184817683Spst
184917683Spst	if (p == 0 || isMarked(p))
185017683Spst		return;
185117683Spst
185217683Spst	Mark(p);
185317683Spst	n = n_blocks++;
185417683Spst	p->id = n;
185517683Spst	blocks[n] = p;
185617683Spst
185717683Spst	number_blks_r(JT(p));
185817683Spst	number_blks_r(JF(p));
185917683Spst}
186017683Spst
186117683Spst/*
186217683Spst * Return the number of stmts in the flowgraph reachable by 'p'.
186317683Spst * The nodes should be unmarked before calling.
186475107Sfenner *
186575107Sfenner * Note that "stmts" means "instructions", and that this includes
186675107Sfenner *
186775107Sfenner *	side-effect statements in 'p' (slength(p->stmts));
186875107Sfenner *
186975107Sfenner *	statements in the true branch from 'p' (count_stmts(JT(p)));
187075107Sfenner *
187175107Sfenner *	statements in the false branch from 'p' (count_stmts(JF(p)));
187275107Sfenner *
187375107Sfenner *	the conditional jump itself (1);
187475107Sfenner *
187575107Sfenner *	an extra long jump if the true branch requires it (p->longjt);
187675107Sfenner *
187775107Sfenner *	an extra long jump if the false branch requires it (p->longjf).
187817683Spst */
1879241231Sdelphijstatic u_int
1880251129Sdelphijcount_stmts(struct block *p)
188117683Spst{
1882241231Sdelphij	u_int n;
188317683Spst
188417683Spst	if (p == 0 || isMarked(p))
188517683Spst		return 0;
188617683Spst	Mark(p);
188717683Spst	n = count_stmts(JT(p)) + count_stmts(JF(p));
188875107Sfenner	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
188917683Spst}
189017683Spst
189117683Spst/*
189217683Spst * Allocate memory.  All allocation is done before optimization
189317683Spst * is begun.  A linear bound on the size of all data structures is computed
189417683Spst * from the total number of blocks and/or statements.
189517683Spst */
189617683Spststatic void
1897251129Sdelphijopt_init(struct block *root)
189817683Spst{
189917683Spst	bpf_u_int32 *p;
190017683Spst	int i, n, max_stmts;
190117683Spst
190217683Spst	/*
190317683Spst	 * First, count the blocks, so we can malloc an array to map
190417683Spst	 * block number to block.  Then, put the blocks into the array.
190517683Spst	 */
190617683Spst	unMarkAll();
190717683Spst	n = count_blocks(root);
1908172677Smlaier	blocks = (struct block **)calloc(n, sizeof(*blocks));
1909127664Sbms	if (blocks == NULL)
1910127664Sbms		bpf_error("malloc");
191117683Spst	unMarkAll();
191217683Spst	n_blocks = 0;
191317683Spst	number_blks_r(root);
191417683Spst
191517683Spst	n_edges = 2 * n_blocks;
1916172677Smlaier	edges = (struct edge **)calloc(n_edges, sizeof(*edges));
1917127664Sbms	if (edges == NULL)
1918127664Sbms		bpf_error("malloc");
191917683Spst
192017683Spst	/*
192117683Spst	 * The number of levels is bounded by the number of nodes.
192217683Spst	 */
1923172677Smlaier	levels = (struct block **)calloc(n_blocks, sizeof(*levels));
1924127664Sbms	if (levels == NULL)
1925127664Sbms		bpf_error("malloc");
192617683Spst
192717683Spst	edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
192817683Spst	nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
192917683Spst
193017683Spst	/* XXX */
193117683Spst	space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
193217683Spst				 + n_edges * edgewords * sizeof(*space));
1933127664Sbms	if (space == NULL)
1934127664Sbms		bpf_error("malloc");
193517683Spst	p = space;
193617683Spst	all_dom_sets = p;
193717683Spst	for (i = 0; i < n; ++i) {
193817683Spst		blocks[i]->dom = p;
193917683Spst		p += nodewords;
194017683Spst	}
194117683Spst	all_closure_sets = p;
194217683Spst	for (i = 0; i < n; ++i) {
194317683Spst		blocks[i]->closure = p;
194417683Spst		p += nodewords;
194517683Spst	}
194617683Spst	all_edge_sets = p;
194717683Spst	for (i = 0; i < n; ++i) {
194817683Spst		register struct block *b = blocks[i];
194917683Spst
195017683Spst		b->et.edom = p;
195117683Spst		p += edgewords;
195217683Spst		b->ef.edom = p;
195317683Spst		p += edgewords;
195417683Spst		b->et.id = i;
195517683Spst		edges[i] = &b->et;
195617683Spst		b->ef.id = n_blocks + i;
195717683Spst		edges[n_blocks + i] = &b->ef;
195817683Spst		b->et.pred = b;
195917683Spst		b->ef.pred = b;
196017683Spst	}
196117683Spst	max_stmts = 0;
196217683Spst	for (i = 0; i < n; ++i)
196317683Spst		max_stmts += slength(blocks[i]->stmts) + 1;
196417683Spst	/*
196517683Spst	 * We allocate at most 3 value numbers per statement,
196617683Spst	 * so this is an upper bound on the number of valnodes
196717683Spst	 * we'll need.
196817683Spst	 */
196917683Spst	maxval = 3 * max_stmts;
1970172677Smlaier	vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap));
1971172677Smlaier	vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base));
1972127664Sbms	if (vmap == NULL || vnode_base == NULL)
1973127664Sbms		bpf_error("malloc");
197417683Spst}
197517683Spst
197617683Spst/*
197717683Spst * Some pointers used to convert the basic block form of the code,
197817683Spst * into the array form that BPF requires.  'fstart' will point to
197917683Spst * the malloc'd array while 'ftail' is used during the recursive traversal.
198017683Spst */
198117683Spststatic struct bpf_insn *fstart;
198217683Spststatic struct bpf_insn *ftail;
198317683Spst
198417683Spst#ifdef BDEBUG
198517683Spstint bids[1000];
198617683Spst#endif
198717683Spst
198817683Spst/*
198917683Spst * Returns true if successful.  Returns false if a branch has
199017683Spst * an offset that is too large.  If so, we have marked that
199117683Spst * branch so that on a subsequent iteration, it will be treated
199217683Spst * properly.
199317683Spst */
199417683Spststatic int
1995251129Sdelphijconvert_code_r(struct block *p)
199617683Spst{
199717683Spst	struct bpf_insn *dst;
199817683Spst	struct slist *src;
199917683Spst	int slen;
200017683Spst	u_int off;
200117683Spst	int extrajmps;		/* number of extra jumps inserted */
200256889Sfenner	struct slist **offset = NULL;
200317683Spst
200417683Spst	if (p == 0 || isMarked(p))
200517683Spst		return (1);
200617683Spst	Mark(p);
200717683Spst
200817683Spst	if (convert_code_r(JF(p)) == 0)
200917683Spst		return (0);
201017683Spst	if (convert_code_r(JT(p)) == 0)
201117683Spst		return (0);
201217683Spst
201317683Spst	slen = slength(p->stmts);
201417683Spst	dst = ftail -= (slen + 1 + p->longjt + p->longjf);
201517683Spst		/* inflate length by any extra jumps */
201617683Spst
201717683Spst	p->offset = dst - fstart;
201817683Spst
201956889Sfenner	/* generate offset[] for convenience  */
202056889Sfenner	if (slen) {
2021127664Sbms		offset = (struct slist **)calloc(slen, sizeof(struct slist *));
202256889Sfenner		if (!offset) {
202356889Sfenner			bpf_error("not enough core");
202456889Sfenner			/*NOTREACHED*/
202556889Sfenner		}
202656889Sfenner	}
202756889Sfenner	src = p->stmts;
202856889Sfenner	for (off = 0; off < slen && src; off++) {
202956889Sfenner#if 0
203056889Sfenner		printf("off=%d src=%x\n", off, src);
203156889Sfenner#endif
203256889Sfenner		offset[off] = src;
203356889Sfenner		src = src->next;
203456889Sfenner	}
203556889Sfenner
203656889Sfenner	off = 0;
203717683Spst	for (src = p->stmts; src; src = src->next) {
203817683Spst		if (src->s.code == NOP)
203917683Spst			continue;
204017683Spst		dst->code = (u_short)src->s.code;
204117683Spst		dst->k = src->s.k;
204256889Sfenner
204356889Sfenner		/* fill block-local relative jump */
204475107Sfenner		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
204556889Sfenner#if 0
204656889Sfenner			if (src->s.jt || src->s.jf) {
204756889Sfenner				bpf_error("illegal jmp destination");
204856889Sfenner				/*NOTREACHED*/
204956889Sfenner			}
205056889Sfenner#endif
205156889Sfenner			goto filled;
205256889Sfenner		}
205356889Sfenner		if (off == slen - 2)	/*???*/
205456889Sfenner			goto filled;
205556889Sfenner
205656889Sfenner	    {
205756889Sfenner		int i;
205856889Sfenner		int jt, jf;
2059172677Smlaier		const char *ljerr = "%s for block-local relative jump: off=%d";
206056889Sfenner
206156889Sfenner#if 0
206256889Sfenner		printf("code=%x off=%d %x %x\n", src->s.code,
206356889Sfenner			off, src->s.jt, src->s.jf);
206456889Sfenner#endif
206556889Sfenner
206656889Sfenner		if (!src->s.jt || !src->s.jf) {
206756889Sfenner			bpf_error(ljerr, "no jmp destination", off);
206856889Sfenner			/*NOTREACHED*/
206956889Sfenner		}
207056889Sfenner
207156889Sfenner		jt = jf = 0;
207256889Sfenner		for (i = 0; i < slen; i++) {
207356889Sfenner			if (offset[i] == src->s.jt) {
207456889Sfenner				if (jt) {
207556889Sfenner					bpf_error(ljerr, "multiple matches", off);
207656889Sfenner					/*NOTREACHED*/
207756889Sfenner				}
207856889Sfenner
207956889Sfenner				dst->jt = i - off - 1;
208056889Sfenner				jt++;
208156889Sfenner			}
208256889Sfenner			if (offset[i] == src->s.jf) {
208356889Sfenner				if (jf) {
208456889Sfenner					bpf_error(ljerr, "multiple matches", off);
208556889Sfenner					/*NOTREACHED*/
208656889Sfenner				}
208756889Sfenner				dst->jf = i - off - 1;
208856889Sfenner				jf++;
208956889Sfenner			}
209056889Sfenner		}
209156889Sfenner		if (!jt || !jf) {
209256889Sfenner			bpf_error(ljerr, "no destination found", off);
209356889Sfenner			/*NOTREACHED*/
209456889Sfenner		}
209556889Sfenner	    }
209656889Sfennerfilled:
209717683Spst		++dst;
209856889Sfenner		++off;
209917683Spst	}
210056889Sfenner	if (offset)
210156889Sfenner		free(offset);
210256889Sfenner
210317683Spst#ifdef BDEBUG
210417683Spst	bids[dst - fstart] = p->id + 1;
210517683Spst#endif
210617683Spst	dst->code = (u_short)p->s.code;
210717683Spst	dst->k = p->s.k;
210817683Spst	if (JT(p)) {
210917683Spst		extrajmps = 0;
211017683Spst		off = JT(p)->offset - (p->offset + slen) - 1;
211117683Spst		if (off >= 256) {
211217683Spst		    /* offset too large for branch, must add a jump */
211317683Spst		    if (p->longjt == 0) {
211417683Spst		    	/* mark this instruction and retry */
211517683Spst			p->longjt++;
211617683Spst			return(0);
211717683Spst		    }
211817683Spst		    /* branch if T to following jump */
211917683Spst		    dst->jt = extrajmps;
212017683Spst		    extrajmps++;
212117683Spst		    dst[extrajmps].code = BPF_JMP|BPF_JA;
212217683Spst		    dst[extrajmps].k = off - extrajmps;
212317683Spst		}
212417683Spst		else
212517683Spst		    dst->jt = off;
212617683Spst		off = JF(p)->offset - (p->offset + slen) - 1;
212717683Spst		if (off >= 256) {
212817683Spst		    /* offset too large for branch, must add a jump */
212917683Spst		    if (p->longjf == 0) {
213017683Spst		    	/* mark this instruction and retry */
213117683Spst			p->longjf++;
213217683Spst			return(0);
213317683Spst		    }
213417683Spst		    /* branch if F to following jump */
213517683Spst		    /* if two jumps are inserted, F goes to second one */
213617683Spst		    dst->jf = extrajmps;
213717683Spst		    extrajmps++;
213817683Spst		    dst[extrajmps].code = BPF_JMP|BPF_JA;
213917683Spst		    dst[extrajmps].k = off - extrajmps;
214017683Spst		}
214117683Spst		else
214217683Spst		    dst->jf = off;
214317683Spst	}
214417683Spst	return (1);
214517683Spst}
214617683Spst
214717683Spst
214817683Spst/*
214917683Spst * Convert flowgraph intermediate representation to the
215017683Spst * BPF array representation.  Set *lenp to the number of instructions.
2151172677Smlaier *
2152172677Smlaier * This routine does *NOT* leak the memory pointed to by fp.  It *must
2153172677Smlaier * not* do free(fp) before returning fp; doing so would make no sense,
2154172677Smlaier * as the BPF array pointed to by the return value of icode_to_fcode()
2155172677Smlaier * must be valid - it's being returned for use in a bpf_program structure.
2156172677Smlaier *
2157172677Smlaier * If it appears that icode_to_fcode() is leaking, the problem is that
2158172677Smlaier * the program using pcap_compile() is failing to free the memory in
2159172677Smlaier * the BPF program when it's done - the leak is in the program, not in
2160172677Smlaier * the routine that happens to be allocating the memory.  (By analogy, if
2161172677Smlaier * a program calls fopen() without ever calling fclose() on the FILE *,
2162172677Smlaier * it will leak the FILE structure; the leak is not in fopen(), it's in
2163172677Smlaier * the program.)  Change the program to use pcap_freecode() when it's
2164172677Smlaier * done with the filter program.  See the pcap man page.
216517683Spst */
216617683Spststruct bpf_insn *
2167251129Sdelphijicode_to_fcode(struct block *root, u_int *lenp)
216817683Spst{
2169241231Sdelphij	u_int n;
217017683Spst	struct bpf_insn *fp;
217117683Spst
217217683Spst	/*
217398530Sfenner	 * Loop doing convert_code_r() until no branches remain
217417683Spst	 * with too-large offsets.
217517683Spst	 */
217617683Spst	while (1) {
217717683Spst	    unMarkAll();
217817683Spst	    n = *lenp = count_stmts(root);
2179127664Sbms
218017683Spst	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2181127664Sbms	    if (fp == NULL)
2182127664Sbms		    bpf_error("malloc");
218317683Spst	    memset((char *)fp, 0, sizeof(*fp) * n);
218417683Spst	    fstart = fp;
218517683Spst	    ftail = fp + n;
2186127664Sbms
218717683Spst	    unMarkAll();
218817683Spst	    if (convert_code_r(root))
218917683Spst		break;
219017683Spst	    free(fp);
219117683Spst	}
219217683Spst
219317683Spst	return fp;
219417683Spst}
219517683Spst
219675107Sfenner/*
219775107Sfenner * Make a copy of a BPF program and put it in the "fcode" member of
219875107Sfenner * a "pcap_t".
219975107Sfenner *
220075107Sfenner * If we fail to allocate memory for the copy, fill in the "errbuf"
220175107Sfenner * member of the "pcap_t" with an error message, and return -1;
220275107Sfenner * otherwise, return 0.
220375107Sfenner */
220475107Sfennerint
220575107Sfennerinstall_bpf_program(pcap_t *p, struct bpf_program *fp)
220675107Sfenner{
220775107Sfenner	size_t prog_size;
220875107Sfenner
220975107Sfenner	/*
2210190225Srpaulo	 * Validate the program.
2211190225Srpaulo	 */
2212190225Srpaulo	if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2213190225Srpaulo		snprintf(p->errbuf, sizeof(p->errbuf),
2214190225Srpaulo			"BPF program is not valid");
2215190225Srpaulo		return (-1);
2216190225Srpaulo	}
2217190225Srpaulo
2218190225Srpaulo	/*
221975107Sfenner	 * Free up any already installed program.
222075107Sfenner	 */
222175107Sfenner	pcap_freecode(&p->fcode);
222275107Sfenner
222375107Sfenner	prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
222475107Sfenner	p->fcode.bf_len = fp->bf_len;
222575107Sfenner	p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
222675107Sfenner	if (p->fcode.bf_insns == NULL) {
222775107Sfenner		snprintf(p->errbuf, sizeof(p->errbuf),
222875107Sfenner			 "malloc: %s", pcap_strerror(errno));
222975107Sfenner		return (-1);
223075107Sfenner	}
223175107Sfenner	memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
223275107Sfenner	return (0);
223375107Sfenner}
223475107Sfenner
223517683Spst#ifdef BDEBUG
223617683Spststatic void
2237251129Sdelphijopt_dump(struct block *root)
223817683Spst{
223917683Spst	struct bpf_program f;
224017683Spst
224117683Spst	memset(bids, 0, sizeof bids);
224217683Spst	f.bf_insns = icode_to_fcode(root, &f.bf_len);
224317683Spst	bpf_dump(&f, 1);
224417683Spst	putchar('\n');
224517683Spst	free((char *)f.bf_insns);
224617683Spst}
224717683Spst#endif
2248