optimize.c revision 127664
1/*
2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 *  Optimization module for tcpdump intermediate representation.
22 */
23#ifndef lint
24static const char rcsid[] _U_ =
25    "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.76.2.3 2003/12/22 00:26:36 guy Exp $ (LBL)";
26#endif
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include <stdio.h>
33#include <stdlib.h>
34#include <memory.h>
35
36#include <errno.h>
37
38#include "pcap-int.h"
39
40#include "gencode.h"
41
42#ifdef HAVE_OS_PROTO_H
43#include "os-proto.h"
44#endif
45
46#ifdef BDEBUG
47extern int dflag;
48#endif
49
50#define A_ATOM BPF_MEMWORDS
51#define X_ATOM (BPF_MEMWORDS+1)
52
53#define NOP -1
54
55/*
56 * This define is used to represent *both* the accumulator and
57 * x register in use-def computations.
58 * Currently, the use-def code assumes only one definition per instruction.
59 */
60#define AX_ATOM N_ATOMS
61
62/*
63 * A flag to indicate that further optimization is needed.
64 * Iterative passes are continued until a given pass yields no
65 * branch movement.
66 */
67static int done;
68
69/*
70 * A block is marked if only if its mark equals the current mark.
71 * Rather than traverse the code array, marking each item, 'cur_mark' is
72 * incremented.  This automatically makes each element unmarked.
73 */
74static int cur_mark;
75#define isMarked(p) ((p)->mark == cur_mark)
76#define unMarkAll() cur_mark += 1
77#define Mark(p) ((p)->mark = cur_mark)
78
79static void opt_init(struct block *);
80static void opt_cleanup(void);
81
82static void make_marks(struct block *);
83static void mark_code(struct block *);
84
85static void intern_blocks(struct block *);
86
87static int eq_slist(struct slist *, struct slist *);
88
89static void find_levels_r(struct block *);
90
91static void find_levels(struct block *);
92static void find_dom(struct block *);
93static void propedom(struct edge *);
94static void find_edom(struct block *);
95static void find_closure(struct block *);
96static int atomuse(struct stmt *);
97static int atomdef(struct stmt *);
98static void compute_local_ud(struct block *);
99static void find_ud(struct block *);
100static void init_val(void);
101static int F(int, int, int);
102static inline void vstore(struct stmt *, int *, int, int);
103static void opt_blk(struct block *, int);
104static int use_conflict(struct block *, struct block *);
105static void opt_j(struct edge *);
106static void or_pullup(struct block *);
107static void and_pullup(struct block *);
108static void opt_blks(struct block *, int);
109static inline void link_inedge(struct edge *, struct block *);
110static void find_inedges(struct block *);
111static void opt_root(struct block **);
112static void opt_loop(struct block *, int);
113static void fold_op(struct stmt *, int, int);
114static inline struct slist *this_op(struct slist *);
115static void opt_not(struct block *);
116static void opt_peep(struct block *);
117static void opt_stmt(struct stmt *, int[], int);
118static void deadstmt(struct stmt *, struct stmt *[]);
119static void opt_deadstores(struct block *);
120static struct block *fold_edge(struct block *, struct edge *);
121static inline int eq_blk(struct block *, struct block *);
122static int slength(struct slist *);
123static int count_blocks(struct block *);
124static void number_blks_r(struct block *);
125static int count_stmts(struct block *);
126static int convert_code_r(struct block *);
127#ifdef BDEBUG
128static void opt_dump(struct block *);
129#endif
130
131static int n_blocks;
132struct block **blocks;
133static int n_edges;
134struct edge **edges;
135
136/*
137 * A bit vector set representation of the dominators.
138 * We round up the set size to the next power of two.
139 */
140static int nodewords;
141static int edgewords;
142struct block **levels;
143bpf_u_int32 *space;
144#define BITS_PER_WORD (8*sizeof(bpf_u_int32))
145/*
146 * True if a is in uset {p}
147 */
148#define SET_MEMBER(p, a) \
149((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
150
151/*
152 * Add 'a' to uset p.
153 */
154#define SET_INSERT(p, a) \
155(p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
156
157/*
158 * Delete 'a' from uset p.
159 */
160#define SET_DELETE(p, a) \
161(p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
162
163/*
164 * a := a intersect b
165 */
166#define SET_INTERSECT(a, b, n)\
167{\
168	register bpf_u_int32 *_x = a, *_y = b;\
169	register int _n = n;\
170	while (--_n >= 0) *_x++ &= *_y++;\
171}
172
173/*
174 * a := a - b
175 */
176#define SET_SUBTRACT(a, b, n)\
177{\
178	register bpf_u_int32 *_x = a, *_y = b;\
179	register int _n = n;\
180	while (--_n >= 0) *_x++ &=~ *_y++;\
181}
182
183/*
184 * a := a union b
185 */
186#define SET_UNION(a, b, n)\
187{\
188	register bpf_u_int32 *_x = a, *_y = b;\
189	register int _n = n;\
190	while (--_n >= 0) *_x++ |= *_y++;\
191}
192
193static uset all_dom_sets;
194static uset all_closure_sets;
195static uset all_edge_sets;
196
197#ifndef MAX
198#define MAX(a,b) ((a)>(b)?(a):(b))
199#endif
200
201static void
202find_levels_r(b)
203	struct block *b;
204{
205	int level;
206
207	if (isMarked(b))
208		return;
209
210	Mark(b);
211	b->link = 0;
212
213	if (JT(b)) {
214		find_levels_r(JT(b));
215		find_levels_r(JF(b));
216		level = MAX(JT(b)->level, JF(b)->level) + 1;
217	} else
218		level = 0;
219	b->level = level;
220	b->link = levels[level];
221	levels[level] = b;
222}
223
224/*
225 * Level graph.  The levels go from 0 at the leaves to
226 * N_LEVELS at the root.  The levels[] array points to the
227 * first node of the level list, whose elements are linked
228 * with the 'link' field of the struct block.
229 */
230static void
231find_levels(root)
232	struct block *root;
233{
234	memset((char *)levels, 0, n_blocks * sizeof(*levels));
235	unMarkAll();
236	find_levels_r(root);
237}
238
239/*
240 * Find dominator relationships.
241 * Assumes graph has been leveled.
242 */
243static void
244find_dom(root)
245	struct block *root;
246{
247	int i;
248	struct block *b;
249	bpf_u_int32 *x;
250
251	/*
252	 * Initialize sets to contain all nodes.
253	 */
254	x = all_dom_sets;
255	i = n_blocks * nodewords;
256	while (--i >= 0)
257		*x++ = ~0;
258	/* Root starts off empty. */
259	for (i = nodewords; --i >= 0;)
260		root->dom[i] = 0;
261
262	/* root->level is the highest level no found. */
263	for (i = root->level; i >= 0; --i) {
264		for (b = levels[i]; b; b = b->link) {
265			SET_INSERT(b->dom, b->id);
266			if (JT(b) == 0)
267				continue;
268			SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
269			SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
270		}
271	}
272}
273
274static void
275propedom(ep)
276	struct edge *ep;
277{
278	SET_INSERT(ep->edom, ep->id);
279	if (ep->succ) {
280		SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
281		SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
282	}
283}
284
285/*
286 * Compute edge dominators.
287 * Assumes graph has been leveled and predecessors established.
288 */
289static void
290find_edom(root)
291	struct block *root;
292{
293	int i;
294	uset x;
295	struct block *b;
296
297	x = all_edge_sets;
298	for (i = n_edges * edgewords; --i >= 0; )
299		x[i] = ~0;
300
301	/* root->level is the highest level no found. */
302	memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
303	memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
304	for (i = root->level; i >= 0; --i) {
305		for (b = levels[i]; b != 0; b = b->link) {
306			propedom(&b->et);
307			propedom(&b->ef);
308		}
309	}
310}
311
312/*
313 * Find the backwards transitive closure of the flow graph.  These sets
314 * are backwards in the sense that we find the set of nodes that reach
315 * a given node, not the set of nodes that can be reached by a node.
316 *
317 * Assumes graph has been leveled.
318 */
319static void
320find_closure(root)
321	struct block *root;
322{
323	int i;
324	struct block *b;
325
326	/*
327	 * Initialize sets to contain no nodes.
328	 */
329	memset((char *)all_closure_sets, 0,
330	      n_blocks * nodewords * sizeof(*all_closure_sets));
331
332	/* root->level is the highest level no found. */
333	for (i = root->level; i >= 0; --i) {
334		for (b = levels[i]; b; b = b->link) {
335			SET_INSERT(b->closure, b->id);
336			if (JT(b) == 0)
337				continue;
338			SET_UNION(JT(b)->closure, b->closure, nodewords);
339			SET_UNION(JF(b)->closure, b->closure, nodewords);
340		}
341	}
342}
343
344/*
345 * Return the register number that is used by s.  If A and X are both
346 * used, return AX_ATOM.  If no register is used, return -1.
347 *
348 * The implementation should probably change to an array access.
349 */
350static int
351atomuse(s)
352	struct stmt *s;
353{
354	register int c = s->code;
355
356	if (c == NOP)
357		return -1;
358
359	switch (BPF_CLASS(c)) {
360
361	case BPF_RET:
362		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
363			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
364
365	case BPF_LD:
366	case BPF_LDX:
367		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
368			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
369
370	case BPF_ST:
371		return A_ATOM;
372
373	case BPF_STX:
374		return X_ATOM;
375
376	case BPF_JMP:
377	case BPF_ALU:
378		if (BPF_SRC(c) == BPF_X)
379			return AX_ATOM;
380		return A_ATOM;
381
382	case BPF_MISC:
383		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
384	}
385	abort();
386	/* NOTREACHED */
387}
388
389/*
390 * Return the register number that is defined by 's'.  We assume that
391 * a single stmt cannot define more than one register.  If no register
392 * is defined, return -1.
393 *
394 * The implementation should probably change to an array access.
395 */
396static int
397atomdef(s)
398	struct stmt *s;
399{
400	if (s->code == NOP)
401		return -1;
402
403	switch (BPF_CLASS(s->code)) {
404
405	case BPF_LD:
406	case BPF_ALU:
407		return A_ATOM;
408
409	case BPF_LDX:
410		return X_ATOM;
411
412	case BPF_ST:
413	case BPF_STX:
414		return s->k;
415
416	case BPF_MISC:
417		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
418	}
419	return -1;
420}
421
422static void
423compute_local_ud(b)
424	struct block *b;
425{
426	struct slist *s;
427	atomset def = 0, use = 0, kill = 0;
428	int atom;
429
430	for (s = b->stmts; s; s = s->next) {
431		if (s->s.code == NOP)
432			continue;
433		atom = atomuse(&s->s);
434		if (atom >= 0) {
435			if (atom == AX_ATOM) {
436				if (!ATOMELEM(def, X_ATOM))
437					use |= ATOMMASK(X_ATOM);
438				if (!ATOMELEM(def, A_ATOM))
439					use |= ATOMMASK(A_ATOM);
440			}
441			else if (atom < N_ATOMS) {
442				if (!ATOMELEM(def, atom))
443					use |= ATOMMASK(atom);
444			}
445			else
446				abort();
447		}
448		atom = atomdef(&s->s);
449		if (atom >= 0) {
450			if (!ATOMELEM(use, atom))
451				kill |= ATOMMASK(atom);
452			def |= ATOMMASK(atom);
453		}
454	}
455	if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
456		use |= ATOMMASK(A_ATOM);
457
458	b->def = def;
459	b->kill = kill;
460	b->in_use = use;
461}
462
463/*
464 * Assume graph is already leveled.
465 */
466static void
467find_ud(root)
468	struct block *root;
469{
470	int i, maxlevel;
471	struct block *p;
472
473	/*
474	 * root->level is the highest level no found;
475	 * count down from there.
476	 */
477	maxlevel = root->level;
478	for (i = maxlevel; i >= 0; --i)
479		for (p = levels[i]; p; p = p->link) {
480			compute_local_ud(p);
481			p->out_use = 0;
482		}
483
484	for (i = 1; i <= maxlevel; ++i) {
485		for (p = levels[i]; p; p = p->link) {
486			p->out_use |= JT(p)->in_use | JF(p)->in_use;
487			p->in_use |= p->out_use &~ p->kill;
488		}
489	}
490}
491
492/*
493 * These data structures are used in a Cocke and Shwarz style
494 * value numbering scheme.  Since the flowgraph is acyclic,
495 * exit values can be propagated from a node's predecessors
496 * provided it is uniquely defined.
497 */
498struct valnode {
499	int code;
500	int v0, v1;
501	int val;
502	struct valnode *next;
503};
504
505#define MODULUS 213
506static struct valnode *hashtbl[MODULUS];
507static int curval;
508static int maxval;
509
510/* Integer constants mapped with the load immediate opcode. */
511#define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
512
513struct vmapinfo {
514	int is_const;
515	bpf_int32 const_val;
516};
517
518struct vmapinfo *vmap;
519struct valnode *vnode_base;
520struct valnode *next_vnode;
521
522static void
523init_val()
524{
525	curval = 0;
526	next_vnode = vnode_base;
527	memset((char *)vmap, 0, maxval * sizeof(*vmap));
528	memset((char *)hashtbl, 0, sizeof hashtbl);
529}
530
531/* Because we really don't have an IR, this stuff is a little messy. */
532static int
533F(code, v0, v1)
534	int code;
535	int v0, v1;
536{
537	u_int hash;
538	int val;
539	struct valnode *p;
540
541	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
542	hash %= MODULUS;
543
544	for (p = hashtbl[hash]; p; p = p->next)
545		if (p->code == code && p->v0 == v0 && p->v1 == v1)
546			return p->val;
547
548	val = ++curval;
549	if (BPF_MODE(code) == BPF_IMM &&
550	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
551		vmap[val].const_val = v0;
552		vmap[val].is_const = 1;
553	}
554	p = next_vnode++;
555	p->val = val;
556	p->code = code;
557	p->v0 = v0;
558	p->v1 = v1;
559	p->next = hashtbl[hash];
560	hashtbl[hash] = p;
561
562	return val;
563}
564
565static inline void
566vstore(s, valp, newval, alter)
567	struct stmt *s;
568	int *valp;
569	int newval;
570	int alter;
571{
572	if (alter && *valp == newval)
573		s->code = NOP;
574	else
575		*valp = newval;
576}
577
578static void
579fold_op(s, v0, v1)
580	struct stmt *s;
581	int v0, v1;
582{
583	bpf_int32 a, b;
584
585	a = vmap[v0].const_val;
586	b = vmap[v1].const_val;
587
588	switch (BPF_OP(s->code)) {
589	case BPF_ADD:
590		a += b;
591		break;
592
593	case BPF_SUB:
594		a -= b;
595		break;
596
597	case BPF_MUL:
598		a *= b;
599		break;
600
601	case BPF_DIV:
602		if (b == 0)
603			bpf_error("division by zero");
604		a /= b;
605		break;
606
607	case BPF_AND:
608		a &= b;
609		break;
610
611	case BPF_OR:
612		a |= b;
613		break;
614
615	case BPF_LSH:
616		a <<= b;
617		break;
618
619	case BPF_RSH:
620		a >>= b;
621		break;
622
623	case BPF_NEG:
624		a = -a;
625		break;
626
627	default:
628		abort();
629	}
630	s->k = a;
631	s->code = BPF_LD|BPF_IMM;
632	done = 0;
633}
634
635static inline struct slist *
636this_op(s)
637	struct slist *s;
638{
639	while (s != 0 && s->s.code == NOP)
640		s = s->next;
641	return s;
642}
643
644static void
645opt_not(b)
646	struct block *b;
647{
648	struct block *tmp = JT(b);
649
650	JT(b) = JF(b);
651	JF(b) = tmp;
652}
653
654static void
655opt_peep(b)
656	struct block *b;
657{
658	struct slist *s;
659	struct slist *next, *last;
660	int val;
661
662	s = b->stmts;
663	if (s == 0)
664		return;
665
666	last = s;
667	while (1) {
668		s = this_op(s);
669		if (s == 0)
670			break;
671		next = this_op(s->next);
672		if (next == 0)
673			break;
674		last = next;
675
676		/*
677		 * st  M[k]	-->	st  M[k]
678		 * ldx M[k]		tax
679		 */
680		if (s->s.code == BPF_ST &&
681		    next->s.code == (BPF_LDX|BPF_MEM) &&
682		    s->s.k == next->s.k) {
683			done = 0;
684			next->s.code = BPF_MISC|BPF_TAX;
685		}
686		/*
687		 * ld  #k	-->	ldx  #k
688		 * tax			txa
689		 */
690		if (s->s.code == (BPF_LD|BPF_IMM) &&
691		    next->s.code == (BPF_MISC|BPF_TAX)) {
692			s->s.code = BPF_LDX|BPF_IMM;
693			next->s.code = BPF_MISC|BPF_TXA;
694			done = 0;
695		}
696		/*
697		 * This is an ugly special case, but it happens
698		 * when you say tcp[k] or udp[k] where k is a constant.
699		 */
700		if (s->s.code == (BPF_LD|BPF_IMM)) {
701			struct slist *add, *tax, *ild;
702
703			/*
704			 * Check that X isn't used on exit from this
705			 * block (which the optimizer might cause).
706			 * We know the code generator won't generate
707			 * any local dependencies.
708			 */
709			if (ATOMELEM(b->out_use, X_ATOM))
710				break;
711
712			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
713				add = next;
714			else
715				add = this_op(next->next);
716			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
717				break;
718
719			tax = this_op(add->next);
720			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
721				break;
722
723			ild = this_op(tax->next);
724			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
725			    BPF_MODE(ild->s.code) != BPF_IND)
726				break;
727			/*
728			 * XXX We need to check that X is not
729			 * subsequently used.  We know we can eliminate the
730			 * accumulator modifications since it is defined
731			 * by the last stmt of this sequence.
732			 *
733			 * We want to turn this sequence:
734			 *
735			 * (004) ldi     #0x2		{s}
736			 * (005) ldxms   [14]		{next}  -- optional
737			 * (006) addx			{add}
738			 * (007) tax			{tax}
739			 * (008) ild     [x+0]		{ild}
740			 *
741			 * into this sequence:
742			 *
743			 * (004) nop
744			 * (005) ldxms   [14]
745			 * (006) nop
746			 * (007) nop
747			 * (008) ild     [x+2]
748			 *
749			 */
750			ild->s.k += s->s.k;
751			s->s.code = NOP;
752			add->s.code = NOP;
753			tax->s.code = NOP;
754			done = 0;
755		}
756		s = next;
757	}
758	/*
759	 * If we have a subtract to do a comparison, and the X register
760	 * is a known constant, we can merge this value into the
761	 * comparison.
762	 */
763	if (BPF_OP(b->s.code) == BPF_JEQ) {
764		if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
765		    !ATOMELEM(b->out_use, A_ATOM)) {
766			val = b->val[X_ATOM];
767			if (vmap[val].is_const) {
768				/*
769				 * sub x  ->	nop
770				 * jeq #y	jeq #(x+y)
771				 */
772				b->s.k += vmap[val].const_val;
773				last->s.code = NOP;
774				done = 0;
775			} else if (b->s.k == 0) {
776				/*
777				 * sub #x  ->	nop
778				 * jeq #0	jeq #x
779				 */
780				last->s.code = NOP;
781				b->s.code = BPF_CLASS(b->s.code) |
782					BPF_OP(b->s.code) | BPF_X;
783				done = 0;
784			}
785		}
786		/*
787		 * Likewise, a constant subtract can be simplified.
788		 */
789		else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
790			 !ATOMELEM(b->out_use, A_ATOM)) {
791
792			last->s.code = NOP;
793			b->s.k += last->s.k;
794			done = 0;
795		}
796	}
797	/*
798	 * and #k	nop
799	 * jeq #0  ->	jset #k
800	 */
801	if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
802	    !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
803		b->s.k = last->s.k;
804		b->s.code = BPF_JMP|BPF_K|BPF_JSET;
805		last->s.code = NOP;
806		done = 0;
807		opt_not(b);
808	}
809	/*
810	 * jset #0        ->   never
811	 * jset #ffffffff ->   always
812	 */
813	if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
814		if (b->s.k == 0)
815			JT(b) = JF(b);
816		if (b->s.k == 0xffffffff)
817			JF(b) = JT(b);
818	}
819	/*
820	 * If the accumulator is a known constant, we can compute the
821	 * comparison result.
822	 */
823	val = b->val[A_ATOM];
824	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
825		bpf_int32 v = vmap[val].const_val;
826		switch (BPF_OP(b->s.code)) {
827
828		case BPF_JEQ:
829			v = v == b->s.k;
830			break;
831
832		case BPF_JGT:
833			v = (unsigned)v > b->s.k;
834			break;
835
836		case BPF_JGE:
837			v = (unsigned)v >= b->s.k;
838			break;
839
840		case BPF_JSET:
841			v &= b->s.k;
842			break;
843
844		default:
845			abort();
846		}
847		if (JF(b) != JT(b))
848			done = 0;
849		if (v)
850			JF(b) = JT(b);
851		else
852			JT(b) = JF(b);
853	}
854}
855
856/*
857 * Compute the symbolic value of expression of 's', and update
858 * anything it defines in the value table 'val'.  If 'alter' is true,
859 * do various optimizations.  This code would be cleaner if symbolic
860 * evaluation and code transformations weren't folded together.
861 */
862static void
863opt_stmt(s, val, alter)
864	struct stmt *s;
865	int val[];
866	int alter;
867{
868	int op;
869	int v;
870
871	switch (s->code) {
872
873	case BPF_LD|BPF_ABS|BPF_W:
874	case BPF_LD|BPF_ABS|BPF_H:
875	case BPF_LD|BPF_ABS|BPF_B:
876		v = F(s->code, s->k, 0L);
877		vstore(s, &val[A_ATOM], v, alter);
878		break;
879
880	case BPF_LD|BPF_IND|BPF_W:
881	case BPF_LD|BPF_IND|BPF_H:
882	case BPF_LD|BPF_IND|BPF_B:
883		v = val[X_ATOM];
884		if (alter && vmap[v].is_const) {
885			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
886			s->k += vmap[v].const_val;
887			v = F(s->code, s->k, 0L);
888			done = 0;
889		}
890		else
891			v = F(s->code, s->k, v);
892		vstore(s, &val[A_ATOM], v, alter);
893		break;
894
895	case BPF_LD|BPF_LEN:
896		v = F(s->code, 0L, 0L);
897		vstore(s, &val[A_ATOM], v, alter);
898		break;
899
900	case BPF_LD|BPF_IMM:
901		v = K(s->k);
902		vstore(s, &val[A_ATOM], v, alter);
903		break;
904
905	case BPF_LDX|BPF_IMM:
906		v = K(s->k);
907		vstore(s, &val[X_ATOM], v, alter);
908		break;
909
910	case BPF_LDX|BPF_MSH|BPF_B:
911		v = F(s->code, s->k, 0L);
912		vstore(s, &val[X_ATOM], v, alter);
913		break;
914
915	case BPF_ALU|BPF_NEG:
916		if (alter && vmap[val[A_ATOM]].is_const) {
917			s->code = BPF_LD|BPF_IMM;
918			s->k = -vmap[val[A_ATOM]].const_val;
919			val[A_ATOM] = K(s->k);
920		}
921		else
922			val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
923		break;
924
925	case BPF_ALU|BPF_ADD|BPF_K:
926	case BPF_ALU|BPF_SUB|BPF_K:
927	case BPF_ALU|BPF_MUL|BPF_K:
928	case BPF_ALU|BPF_DIV|BPF_K:
929	case BPF_ALU|BPF_AND|BPF_K:
930	case BPF_ALU|BPF_OR|BPF_K:
931	case BPF_ALU|BPF_LSH|BPF_K:
932	case BPF_ALU|BPF_RSH|BPF_K:
933		op = BPF_OP(s->code);
934		if (alter) {
935			if (s->k == 0) {
936				/* don't optimize away "sub #0"
937				 * as it may be needed later to
938				 * fixup the generated math code */
939				if (op == BPF_ADD ||
940				    op == BPF_LSH || op == BPF_RSH ||
941				    op == BPF_OR) {
942					s->code = NOP;
943					break;
944				}
945				if (op == BPF_MUL || op == BPF_AND) {
946					s->code = BPF_LD|BPF_IMM;
947					val[A_ATOM] = K(s->k);
948					break;
949				}
950			}
951			if (vmap[val[A_ATOM]].is_const) {
952				fold_op(s, val[A_ATOM], K(s->k));
953				val[A_ATOM] = K(s->k);
954				break;
955			}
956		}
957		val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
958		break;
959
960	case BPF_ALU|BPF_ADD|BPF_X:
961	case BPF_ALU|BPF_SUB|BPF_X:
962	case BPF_ALU|BPF_MUL|BPF_X:
963	case BPF_ALU|BPF_DIV|BPF_X:
964	case BPF_ALU|BPF_AND|BPF_X:
965	case BPF_ALU|BPF_OR|BPF_X:
966	case BPF_ALU|BPF_LSH|BPF_X:
967	case BPF_ALU|BPF_RSH|BPF_X:
968		op = BPF_OP(s->code);
969		if (alter && vmap[val[X_ATOM]].is_const) {
970			if (vmap[val[A_ATOM]].is_const) {
971				fold_op(s, val[A_ATOM], val[X_ATOM]);
972				val[A_ATOM] = K(s->k);
973			}
974			else {
975				s->code = BPF_ALU|BPF_K|op;
976				s->k = vmap[val[X_ATOM]].const_val;
977				done = 0;
978				val[A_ATOM] =
979					F(s->code, val[A_ATOM], K(s->k));
980			}
981			break;
982		}
983		/*
984		 * Check if we're doing something to an accumulator
985		 * that is 0, and simplify.  This may not seem like
986		 * much of a simplification but it could open up further
987		 * optimizations.
988		 * XXX We could also check for mul by 1, etc.
989		 */
990		if (alter && vmap[val[A_ATOM]].is_const
991		    && vmap[val[A_ATOM]].const_val == 0) {
992			if (op == BPF_ADD || op == BPF_OR) {
993				s->code = BPF_MISC|BPF_TXA;
994				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
995				break;
996			}
997			else if (op == BPF_MUL || op == BPF_DIV ||
998				 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
999				s->code = BPF_LD|BPF_IMM;
1000				s->k = 0;
1001				vstore(s, &val[A_ATOM], K(s->k), alter);
1002				break;
1003			}
1004			else if (op == BPF_NEG) {
1005				s->code = NOP;
1006				break;
1007			}
1008		}
1009		val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1010		break;
1011
1012	case BPF_MISC|BPF_TXA:
1013		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1014		break;
1015
1016	case BPF_LD|BPF_MEM:
1017		v = val[s->k];
1018		if (alter && vmap[v].is_const) {
1019			s->code = BPF_LD|BPF_IMM;
1020			s->k = vmap[v].const_val;
1021			done = 0;
1022		}
1023		vstore(s, &val[A_ATOM], v, alter);
1024		break;
1025
1026	case BPF_MISC|BPF_TAX:
1027		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1028		break;
1029
1030	case BPF_LDX|BPF_MEM:
1031		v = val[s->k];
1032		if (alter && vmap[v].is_const) {
1033			s->code = BPF_LDX|BPF_IMM;
1034			s->k = vmap[v].const_val;
1035			done = 0;
1036		}
1037		vstore(s, &val[X_ATOM], v, alter);
1038		break;
1039
1040	case BPF_ST:
1041		vstore(s, &val[s->k], val[A_ATOM], alter);
1042		break;
1043
1044	case BPF_STX:
1045		vstore(s, &val[s->k], val[X_ATOM], alter);
1046		break;
1047	}
1048}
1049
1050static void
1051deadstmt(s, last)
1052	register struct stmt *s;
1053	register struct stmt *last[];
1054{
1055	register int atom;
1056
1057	atom = atomuse(s);
1058	if (atom >= 0) {
1059		if (atom == AX_ATOM) {
1060			last[X_ATOM] = 0;
1061			last[A_ATOM] = 0;
1062		}
1063		else
1064			last[atom] = 0;
1065	}
1066	atom = atomdef(s);
1067	if (atom >= 0) {
1068		if (last[atom]) {
1069			done = 0;
1070			last[atom]->code = NOP;
1071		}
1072		last[atom] = s;
1073	}
1074}
1075
1076static void
1077opt_deadstores(b)
1078	register struct block *b;
1079{
1080	register struct slist *s;
1081	register int atom;
1082	struct stmt *last[N_ATOMS];
1083
1084	memset((char *)last, 0, sizeof last);
1085
1086	for (s = b->stmts; s != 0; s = s->next)
1087		deadstmt(&s->s, last);
1088	deadstmt(&b->s, last);
1089
1090	for (atom = 0; atom < N_ATOMS; ++atom)
1091		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1092			last[atom]->code = NOP;
1093			done = 0;
1094		}
1095}
1096
1097static void
1098opt_blk(b, do_stmts)
1099	struct block *b;
1100	int do_stmts;
1101{
1102	struct slist *s;
1103	struct edge *p;
1104	int i;
1105	bpf_int32 aval;
1106
1107#if 0
1108	for (s = b->stmts; s && s->next; s = s->next)
1109		if (BPF_CLASS(s->s.code) == BPF_JMP) {
1110			do_stmts = 0;
1111			break;
1112		}
1113#endif
1114
1115	/*
1116	 * Initialize the atom values.
1117	 * If we have no predecessors, everything is undefined.
1118	 * Otherwise, we inherent our values from our predecessors.
1119	 * If any register has an ambiguous value (i.e. control paths are
1120	 * merging) give it the undefined value of 0.
1121	 */
1122	p = b->in_edges;
1123	if (p == 0)
1124		memset((char *)b->val, 0, sizeof(b->val));
1125	else {
1126		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1127		while ((p = p->next) != NULL) {
1128			for (i = 0; i < N_ATOMS; ++i)
1129				if (b->val[i] != p->pred->val[i])
1130					b->val[i] = 0;
1131		}
1132	}
1133	aval = b->val[A_ATOM];
1134	for (s = b->stmts; s; s = s->next)
1135		opt_stmt(&s->s, b->val, do_stmts);
1136
1137	/*
1138	 * This is a special case: if we don't use anything from this
1139	 * block, and we load the accumulator with value that is
1140	 * already there, or if this block is a return,
1141	 * eliminate all the statements.
1142	 */
1143	if (do_stmts &&
1144	    ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
1145	     BPF_CLASS(b->s.code) == BPF_RET)) {
1146		if (b->stmts != 0) {
1147			b->stmts = 0;
1148			done = 0;
1149		}
1150	} else {
1151		opt_peep(b);
1152		opt_deadstores(b);
1153	}
1154	/*
1155	 * Set up values for branch optimizer.
1156	 */
1157	if (BPF_SRC(b->s.code) == BPF_K)
1158		b->oval = K(b->s.k);
1159	else
1160		b->oval = b->val[X_ATOM];
1161	b->et.code = b->s.code;
1162	b->ef.code = -b->s.code;
1163}
1164
1165/*
1166 * Return true if any register that is used on exit from 'succ', has
1167 * an exit value that is different from the corresponding exit value
1168 * from 'b'.
1169 */
1170static int
1171use_conflict(b, succ)
1172	struct block *b, *succ;
1173{
1174	int atom;
1175	atomset use = succ->out_use;
1176
1177	if (use == 0)
1178		return 0;
1179
1180	for (atom = 0; atom < N_ATOMS; ++atom)
1181		if (ATOMELEM(use, atom))
1182			if (b->val[atom] != succ->val[atom])
1183				return 1;
1184	return 0;
1185}
1186
1187static struct block *
1188fold_edge(child, ep)
1189	struct block *child;
1190	struct edge *ep;
1191{
1192	int sense;
1193	int aval0, aval1, oval0, oval1;
1194	int code = ep->code;
1195
1196	if (code < 0) {
1197		code = -code;
1198		sense = 0;
1199	} else
1200		sense = 1;
1201
1202	if (child->s.code != code)
1203		return 0;
1204
1205	aval0 = child->val[A_ATOM];
1206	oval0 = child->oval;
1207	aval1 = ep->pred->val[A_ATOM];
1208	oval1 = ep->pred->oval;
1209
1210	if (aval0 != aval1)
1211		return 0;
1212
1213	if (oval0 == oval1)
1214		/*
1215		 * The operands are identical, so the
1216		 * result is true if a true branch was
1217		 * taken to get here, otherwise false.
1218		 */
1219		return sense ? JT(child) : JF(child);
1220
1221	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1222		/*
1223		 * At this point, we only know the comparison if we
1224		 * came down the true branch, and it was an equality
1225		 * comparison with a constant.  We rely on the fact that
1226		 * distinct constants have distinct value numbers.
1227		 */
1228		return JF(child);
1229
1230	return 0;
1231}
1232
1233static void
1234opt_j(ep)
1235	struct edge *ep;
1236{
1237	register int i, k;
1238	register struct block *target;
1239
1240	if (JT(ep->succ) == 0)
1241		return;
1242
1243	if (JT(ep->succ) == JF(ep->succ)) {
1244		/*
1245		 * Common branch targets can be eliminated, provided
1246		 * there is no data dependency.
1247		 */
1248		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1249			done = 0;
1250			ep->succ = JT(ep->succ);
1251		}
1252	}
1253	/*
1254	 * For each edge dominator that matches the successor of this
1255	 * edge, promote the edge successor to the its grandchild.
1256	 *
1257	 * XXX We violate the set abstraction here in favor a reasonably
1258	 * efficient loop.
1259	 */
1260 top:
1261	for (i = 0; i < edgewords; ++i) {
1262		register bpf_u_int32 x = ep->edom[i];
1263
1264		while (x != 0) {
1265			k = ffs(x) - 1;
1266			x &=~ (1 << k);
1267			k += i * BITS_PER_WORD;
1268
1269			target = fold_edge(ep->succ, edges[k]);
1270			/*
1271			 * Check that there is no data dependency between
1272			 * nodes that will be violated if we move the edge.
1273			 */
1274			if (target != 0 && !use_conflict(ep->pred, target)) {
1275				done = 0;
1276				ep->succ = target;
1277				if (JT(target) != 0)
1278					/*
1279					 * Start over unless we hit a leaf.
1280					 */
1281					goto top;
1282				return;
1283			}
1284		}
1285	}
1286}
1287
1288
1289static void
1290or_pullup(b)
1291	struct block *b;
1292{
1293	int val, at_top;
1294	struct block *pull;
1295	struct block **diffp, **samep;
1296	struct edge *ep;
1297
1298	ep = b->in_edges;
1299	if (ep == 0)
1300		return;
1301
1302	/*
1303	 * Make sure each predecessor loads the same value.
1304	 * XXX why?
1305	 */
1306	val = ep->pred->val[A_ATOM];
1307	for (ep = ep->next; ep != 0; ep = ep->next)
1308		if (val != ep->pred->val[A_ATOM])
1309			return;
1310
1311	if (JT(b->in_edges->pred) == b)
1312		diffp = &JT(b->in_edges->pred);
1313	else
1314		diffp = &JF(b->in_edges->pred);
1315
1316	at_top = 1;
1317	while (1) {
1318		if (*diffp == 0)
1319			return;
1320
1321		if (JT(*diffp) != JT(b))
1322			return;
1323
1324		if (!SET_MEMBER((*diffp)->dom, b->id))
1325			return;
1326
1327		if ((*diffp)->val[A_ATOM] != val)
1328			break;
1329
1330		diffp = &JF(*diffp);
1331		at_top = 0;
1332	}
1333	samep = &JF(*diffp);
1334	while (1) {
1335		if (*samep == 0)
1336			return;
1337
1338		if (JT(*samep) != JT(b))
1339			return;
1340
1341		if (!SET_MEMBER((*samep)->dom, b->id))
1342			return;
1343
1344		if ((*samep)->val[A_ATOM] == val)
1345			break;
1346
1347		/* XXX Need to check that there are no data dependencies
1348		   between dp0 and dp1.  Currently, the code generator
1349		   will not produce such dependencies. */
1350		samep = &JF(*samep);
1351	}
1352#ifdef notdef
1353	/* XXX This doesn't cover everything. */
1354	for (i = 0; i < N_ATOMS; ++i)
1355		if ((*samep)->val[i] != pred->val[i])
1356			return;
1357#endif
1358	/* Pull up the node. */
1359	pull = *samep;
1360	*samep = JF(pull);
1361	JF(pull) = *diffp;
1362
1363	/*
1364	 * At the top of the chain, each predecessor needs to point at the
1365	 * pulled up node.  Inside the chain, there is only one predecessor
1366	 * to worry about.
1367	 */
1368	if (at_top) {
1369		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1370			if (JT(ep->pred) == b)
1371				JT(ep->pred) = pull;
1372			else
1373				JF(ep->pred) = pull;
1374		}
1375	}
1376	else
1377		*diffp = pull;
1378
1379	done = 0;
1380}
1381
1382static void
1383and_pullup(b)
1384	struct block *b;
1385{
1386	int val, at_top;
1387	struct block *pull;
1388	struct block **diffp, **samep;
1389	struct edge *ep;
1390
1391	ep = b->in_edges;
1392	if (ep == 0)
1393		return;
1394
1395	/*
1396	 * Make sure each predecessor loads the same value.
1397	 */
1398	val = ep->pred->val[A_ATOM];
1399	for (ep = ep->next; ep != 0; ep = ep->next)
1400		if (val != ep->pred->val[A_ATOM])
1401			return;
1402
1403	if (JT(b->in_edges->pred) == b)
1404		diffp = &JT(b->in_edges->pred);
1405	else
1406		diffp = &JF(b->in_edges->pred);
1407
1408	at_top = 1;
1409	while (1) {
1410		if (*diffp == 0)
1411			return;
1412
1413		if (JF(*diffp) != JF(b))
1414			return;
1415
1416		if (!SET_MEMBER((*diffp)->dom, b->id))
1417			return;
1418
1419		if ((*diffp)->val[A_ATOM] != val)
1420			break;
1421
1422		diffp = &JT(*diffp);
1423		at_top = 0;
1424	}
1425	samep = &JT(*diffp);
1426	while (1) {
1427		if (*samep == 0)
1428			return;
1429
1430		if (JF(*samep) != JF(b))
1431			return;
1432
1433		if (!SET_MEMBER((*samep)->dom, b->id))
1434			return;
1435
1436		if ((*samep)->val[A_ATOM] == val)
1437			break;
1438
1439		/* XXX Need to check that there are no data dependencies
1440		   between diffp and samep.  Currently, the code generator
1441		   will not produce such dependencies. */
1442		samep = &JT(*samep);
1443	}
1444#ifdef notdef
1445	/* XXX This doesn't cover everything. */
1446	for (i = 0; i < N_ATOMS; ++i)
1447		if ((*samep)->val[i] != pred->val[i])
1448			return;
1449#endif
1450	/* Pull up the node. */
1451	pull = *samep;
1452	*samep = JT(pull);
1453	JT(pull) = *diffp;
1454
1455	/*
1456	 * At the top of the chain, each predecessor needs to point at the
1457	 * pulled up node.  Inside the chain, there is only one predecessor
1458	 * to worry about.
1459	 */
1460	if (at_top) {
1461		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1462			if (JT(ep->pred) == b)
1463				JT(ep->pred) = pull;
1464			else
1465				JF(ep->pred) = pull;
1466		}
1467	}
1468	else
1469		*diffp = pull;
1470
1471	done = 0;
1472}
1473
1474static void
1475opt_blks(root, do_stmts)
1476	struct block *root;
1477	int do_stmts;
1478{
1479	int i, maxlevel;
1480	struct block *p;
1481
1482	init_val();
1483	maxlevel = root->level;
1484
1485	find_inedges(root);
1486	for (i = maxlevel; i >= 0; --i)
1487		for (p = levels[i]; p; p = p->link)
1488			opt_blk(p, do_stmts);
1489
1490	if (do_stmts)
1491		/*
1492		 * No point trying to move branches; it can't possibly
1493		 * make a difference at this point.
1494		 */
1495		return;
1496
1497	for (i = 1; i <= maxlevel; ++i) {
1498		for (p = levels[i]; p; p = p->link) {
1499			opt_j(&p->et);
1500			opt_j(&p->ef);
1501		}
1502	}
1503
1504	find_inedges(root);
1505	for (i = 1; i <= maxlevel; ++i) {
1506		for (p = levels[i]; p; p = p->link) {
1507			or_pullup(p);
1508			and_pullup(p);
1509		}
1510	}
1511}
1512
1513static inline void
1514link_inedge(parent, child)
1515	struct edge *parent;
1516	struct block *child;
1517{
1518	parent->next = child->in_edges;
1519	child->in_edges = parent;
1520}
1521
1522static void
1523find_inedges(root)
1524	struct block *root;
1525{
1526	int i;
1527	struct block *b;
1528
1529	for (i = 0; i < n_blocks; ++i)
1530		blocks[i]->in_edges = 0;
1531
1532	/*
1533	 * Traverse the graph, adding each edge to the predecessor
1534	 * list of its successors.  Skip the leaves (i.e. level 0).
1535	 */
1536	for (i = root->level; i > 0; --i) {
1537		for (b = levels[i]; b != 0; b = b->link) {
1538			link_inedge(&b->et, JT(b));
1539			link_inedge(&b->ef, JF(b));
1540		}
1541	}
1542}
1543
1544static void
1545opt_root(b)
1546	struct block **b;
1547{
1548	struct slist *tmp, *s;
1549
1550	s = (*b)->stmts;
1551	(*b)->stmts = 0;
1552	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1553		*b = JT(*b);
1554
1555	tmp = (*b)->stmts;
1556	if (tmp != 0)
1557		sappend(s, tmp);
1558	(*b)->stmts = s;
1559
1560	/*
1561	 * If the root node is a return, then there is no
1562	 * point executing any statements (since the bpf machine
1563	 * has no side effects).
1564	 */
1565	if (BPF_CLASS((*b)->s.code) == BPF_RET)
1566		(*b)->stmts = 0;
1567}
1568
1569static void
1570opt_loop(root, do_stmts)
1571	struct block *root;
1572	int do_stmts;
1573{
1574
1575#ifdef BDEBUG
1576	if (dflag > 1) {
1577		printf("opt_loop(root, %d) begin\n", do_stmts);
1578		opt_dump(root);
1579	}
1580#endif
1581	do {
1582		done = 1;
1583		find_levels(root);
1584		find_dom(root);
1585		find_closure(root);
1586		find_ud(root);
1587		find_edom(root);
1588		opt_blks(root, do_stmts);
1589#ifdef BDEBUG
1590		if (dflag > 1) {
1591			printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
1592			opt_dump(root);
1593		}
1594#endif
1595	} while (!done);
1596}
1597
1598/*
1599 * Optimize the filter code in its dag representation.
1600 */
1601void
1602bpf_optimize(rootp)
1603	struct block **rootp;
1604{
1605	struct block *root;
1606
1607	root = *rootp;
1608
1609	opt_init(root);
1610	opt_loop(root, 0);
1611	opt_loop(root, 1);
1612	intern_blocks(root);
1613#ifdef BDEBUG
1614	if (dflag > 1) {
1615		printf("after intern_blocks()\n");
1616		opt_dump(root);
1617	}
1618#endif
1619	opt_root(rootp);
1620#ifdef BDEBUG
1621	if (dflag > 1) {
1622		printf("after opt_root()\n");
1623		opt_dump(root);
1624	}
1625#endif
1626	opt_cleanup();
1627}
1628
1629static void
1630make_marks(p)
1631	struct block *p;
1632{
1633	if (!isMarked(p)) {
1634		Mark(p);
1635		if (BPF_CLASS(p->s.code) != BPF_RET) {
1636			make_marks(JT(p));
1637			make_marks(JF(p));
1638		}
1639	}
1640}
1641
1642/*
1643 * Mark code array such that isMarked(i) is true
1644 * only for nodes that are alive.
1645 */
1646static void
1647mark_code(p)
1648	struct block *p;
1649{
1650	cur_mark += 1;
1651	make_marks(p);
1652}
1653
1654/*
1655 * True iff the two stmt lists load the same value from the packet into
1656 * the accumulator.
1657 */
1658static int
1659eq_slist(x, y)
1660	struct slist *x, *y;
1661{
1662	while (1) {
1663		while (x && x->s.code == NOP)
1664			x = x->next;
1665		while (y && y->s.code == NOP)
1666			y = y->next;
1667		if (x == 0)
1668			return y == 0;
1669		if (y == 0)
1670			return x == 0;
1671		if (x->s.code != y->s.code || x->s.k != y->s.k)
1672			return 0;
1673		x = x->next;
1674		y = y->next;
1675	}
1676}
1677
1678static inline int
1679eq_blk(b0, b1)
1680	struct block *b0, *b1;
1681{
1682	if (b0->s.code == b1->s.code &&
1683	    b0->s.k == b1->s.k &&
1684	    b0->et.succ == b1->et.succ &&
1685	    b0->ef.succ == b1->ef.succ)
1686		return eq_slist(b0->stmts, b1->stmts);
1687	return 0;
1688}
1689
1690static void
1691intern_blocks(root)
1692	struct block *root;
1693{
1694	struct block *p;
1695	int i, j;
1696	int done;
1697 top:
1698	done = 1;
1699	for (i = 0; i < n_blocks; ++i)
1700		blocks[i]->link = 0;
1701
1702	mark_code(root);
1703
1704	for (i = n_blocks - 1; --i >= 0; ) {
1705		if (!isMarked(blocks[i]))
1706			continue;
1707		for (j = i + 1; j < n_blocks; ++j) {
1708			if (!isMarked(blocks[j]))
1709				continue;
1710			if (eq_blk(blocks[i], blocks[j])) {
1711				blocks[i]->link = blocks[j]->link ?
1712					blocks[j]->link : blocks[j];
1713				break;
1714			}
1715		}
1716	}
1717	for (i = 0; i < n_blocks; ++i) {
1718		p = blocks[i];
1719		if (JT(p) == 0)
1720			continue;
1721		if (JT(p)->link) {
1722			done = 0;
1723			JT(p) = JT(p)->link;
1724		}
1725		if (JF(p)->link) {
1726			done = 0;
1727			JF(p) = JF(p)->link;
1728		}
1729	}
1730	if (!done)
1731		goto top;
1732}
1733
1734static void
1735opt_cleanup()
1736{
1737	free((void *)vnode_base);
1738	free((void *)vmap);
1739	free((void *)edges);
1740	free((void *)space);
1741	free((void *)levels);
1742	free((void *)blocks);
1743}
1744
1745/*
1746 * Return the number of stmts in 's'.
1747 */
1748static int
1749slength(s)
1750	struct slist *s;
1751{
1752	int n = 0;
1753
1754	for (; s; s = s->next)
1755		if (s->s.code != NOP)
1756			++n;
1757	return n;
1758}
1759
1760/*
1761 * Return the number of nodes reachable by 'p'.
1762 * All nodes should be initially unmarked.
1763 */
1764static int
1765count_blocks(p)
1766	struct block *p;
1767{
1768	if (p == 0 || isMarked(p))
1769		return 0;
1770	Mark(p);
1771	return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1772}
1773
1774/*
1775 * Do a depth first search on the flow graph, numbering the
1776 * the basic blocks, and entering them into the 'blocks' array.`
1777 */
1778static void
1779number_blks_r(p)
1780	struct block *p;
1781{
1782	int n;
1783
1784	if (p == 0 || isMarked(p))
1785		return;
1786
1787	Mark(p);
1788	n = n_blocks++;
1789	p->id = n;
1790	blocks[n] = p;
1791
1792	number_blks_r(JT(p));
1793	number_blks_r(JF(p));
1794}
1795
1796/*
1797 * Return the number of stmts in the flowgraph reachable by 'p'.
1798 * The nodes should be unmarked before calling.
1799 *
1800 * Note that "stmts" means "instructions", and that this includes
1801 *
1802 *	side-effect statements in 'p' (slength(p->stmts));
1803 *
1804 *	statements in the true branch from 'p' (count_stmts(JT(p)));
1805 *
1806 *	statements in the false branch from 'p' (count_stmts(JF(p)));
1807 *
1808 *	the conditional jump itself (1);
1809 *
1810 *	an extra long jump if the true branch requires it (p->longjt);
1811 *
1812 *	an extra long jump if the false branch requires it (p->longjf).
1813 */
1814static int
1815count_stmts(p)
1816	struct block *p;
1817{
1818	int n;
1819
1820	if (p == 0 || isMarked(p))
1821		return 0;
1822	Mark(p);
1823	n = count_stmts(JT(p)) + count_stmts(JF(p));
1824	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1825}
1826
1827/*
1828 * Allocate memory.  All allocation is done before optimization
1829 * is begun.  A linear bound on the size of all data structures is computed
1830 * from the total number of blocks and/or statements.
1831 */
1832static void
1833opt_init(root)
1834	struct block *root;
1835{
1836	bpf_u_int32 *p;
1837	int i, n, max_stmts;
1838
1839	/*
1840	 * First, count the blocks, so we can malloc an array to map
1841	 * block number to block.  Then, put the blocks into the array.
1842	 */
1843	unMarkAll();
1844	n = count_blocks(root);
1845	blocks = (struct block **)malloc(n * sizeof(*blocks));
1846	if (blocks == NULL)
1847		bpf_error("malloc");
1848	unMarkAll();
1849	n_blocks = 0;
1850	number_blks_r(root);
1851
1852	n_edges = 2 * n_blocks;
1853	edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1854	if (edges == NULL)
1855		bpf_error("malloc");
1856
1857	/*
1858	 * The number of levels is bounded by the number of nodes.
1859	 */
1860	levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1861	if (levels == NULL)
1862		bpf_error("malloc");
1863
1864	edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1865	nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1866
1867	/* XXX */
1868	space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1869				 + n_edges * edgewords * sizeof(*space));
1870	if (space == NULL)
1871		bpf_error("malloc");
1872	p = space;
1873	all_dom_sets = p;
1874	for (i = 0; i < n; ++i) {
1875		blocks[i]->dom = p;
1876		p += nodewords;
1877	}
1878	all_closure_sets = p;
1879	for (i = 0; i < n; ++i) {
1880		blocks[i]->closure = p;
1881		p += nodewords;
1882	}
1883	all_edge_sets = p;
1884	for (i = 0; i < n; ++i) {
1885		register struct block *b = blocks[i];
1886
1887		b->et.edom = p;
1888		p += edgewords;
1889		b->ef.edom = p;
1890		p += edgewords;
1891		b->et.id = i;
1892		edges[i] = &b->et;
1893		b->ef.id = n_blocks + i;
1894		edges[n_blocks + i] = &b->ef;
1895		b->et.pred = b;
1896		b->ef.pred = b;
1897	}
1898	max_stmts = 0;
1899	for (i = 0; i < n; ++i)
1900		max_stmts += slength(blocks[i]->stmts) + 1;
1901	/*
1902	 * We allocate at most 3 value numbers per statement,
1903	 * so this is an upper bound on the number of valnodes
1904	 * we'll need.
1905	 */
1906	maxval = 3 * max_stmts;
1907	vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1908	vnode_base = (struct valnode *)malloc(maxval * sizeof(*vnode_base));
1909	if (vmap == NULL || vnode_base == NULL)
1910		bpf_error("malloc");
1911}
1912
1913/*
1914 * Some pointers used to convert the basic block form of the code,
1915 * into the array form that BPF requires.  'fstart' will point to
1916 * the malloc'd array while 'ftail' is used during the recursive traversal.
1917 */
1918static struct bpf_insn *fstart;
1919static struct bpf_insn *ftail;
1920
1921#ifdef BDEBUG
1922int bids[1000];
1923#endif
1924
1925/*
1926 * Returns true if successful.  Returns false if a branch has
1927 * an offset that is too large.  If so, we have marked that
1928 * branch so that on a subsequent iteration, it will be treated
1929 * properly.
1930 */
1931static int
1932convert_code_r(p)
1933	struct block *p;
1934{
1935	struct bpf_insn *dst;
1936	struct slist *src;
1937	int slen;
1938	u_int off;
1939	int extrajmps;		/* number of extra jumps inserted */
1940	struct slist **offset = NULL;
1941
1942	if (p == 0 || isMarked(p))
1943		return (1);
1944	Mark(p);
1945
1946	if (convert_code_r(JF(p)) == 0)
1947		return (0);
1948	if (convert_code_r(JT(p)) == 0)
1949		return (0);
1950
1951	slen = slength(p->stmts);
1952	dst = ftail -= (slen + 1 + p->longjt + p->longjf);
1953		/* inflate length by any extra jumps */
1954
1955	p->offset = dst - fstart;
1956
1957	/* generate offset[] for convenience  */
1958	if (slen) {
1959		offset = (struct slist **)calloc(slen, sizeof(struct slist *));
1960		if (!offset) {
1961			bpf_error("not enough core");
1962			/*NOTREACHED*/
1963		}
1964	}
1965	src = p->stmts;
1966	for (off = 0; off < slen && src; off++) {
1967#if 0
1968		printf("off=%d src=%x\n", off, src);
1969#endif
1970		offset[off] = src;
1971		src = src->next;
1972	}
1973
1974	off = 0;
1975	for (src = p->stmts; src; src = src->next) {
1976		if (src->s.code == NOP)
1977			continue;
1978		dst->code = (u_short)src->s.code;
1979		dst->k = src->s.k;
1980
1981		/* fill block-local relative jump */
1982		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
1983#if 0
1984			if (src->s.jt || src->s.jf) {
1985				bpf_error("illegal jmp destination");
1986				/*NOTREACHED*/
1987			}
1988#endif
1989			goto filled;
1990		}
1991		if (off == slen - 2)	/*???*/
1992			goto filled;
1993
1994	    {
1995		int i;
1996		int jt, jf;
1997		char *ljerr = "%s for block-local relative jump: off=%d";
1998
1999#if 0
2000		printf("code=%x off=%d %x %x\n", src->s.code,
2001			off, src->s.jt, src->s.jf);
2002#endif
2003
2004		if (!src->s.jt || !src->s.jf) {
2005			bpf_error(ljerr, "no jmp destination", off);
2006			/*NOTREACHED*/
2007		}
2008
2009		jt = jf = 0;
2010		for (i = 0; i < slen; i++) {
2011			if (offset[i] == src->s.jt) {
2012				if (jt) {
2013					bpf_error(ljerr, "multiple matches", off);
2014					/*NOTREACHED*/
2015				}
2016
2017				dst->jt = i - off - 1;
2018				jt++;
2019			}
2020			if (offset[i] == src->s.jf) {
2021				if (jf) {
2022					bpf_error(ljerr, "multiple matches", off);
2023					/*NOTREACHED*/
2024				}
2025				dst->jf = i - off - 1;
2026				jf++;
2027			}
2028		}
2029		if (!jt || !jf) {
2030			bpf_error(ljerr, "no destination found", off);
2031			/*NOTREACHED*/
2032		}
2033	    }
2034filled:
2035		++dst;
2036		++off;
2037	}
2038	if (offset)
2039		free(offset);
2040
2041#ifdef BDEBUG
2042	bids[dst - fstart] = p->id + 1;
2043#endif
2044	dst->code = (u_short)p->s.code;
2045	dst->k = p->s.k;
2046	if (JT(p)) {
2047		extrajmps = 0;
2048		off = JT(p)->offset - (p->offset + slen) - 1;
2049		if (off >= 256) {
2050		    /* offset too large for branch, must add a jump */
2051		    if (p->longjt == 0) {
2052		    	/* mark this instruction and retry */
2053			p->longjt++;
2054			return(0);
2055		    }
2056		    /* branch if T to following jump */
2057		    dst->jt = extrajmps;
2058		    extrajmps++;
2059		    dst[extrajmps].code = BPF_JMP|BPF_JA;
2060		    dst[extrajmps].k = off - extrajmps;
2061		}
2062		else
2063		    dst->jt = off;
2064		off = JF(p)->offset - (p->offset + slen) - 1;
2065		if (off >= 256) {
2066		    /* offset too large for branch, must add a jump */
2067		    if (p->longjf == 0) {
2068		    	/* mark this instruction and retry */
2069			p->longjf++;
2070			return(0);
2071		    }
2072		    /* branch if F to following jump */
2073		    /* if two jumps are inserted, F goes to second one */
2074		    dst->jf = extrajmps;
2075		    extrajmps++;
2076		    dst[extrajmps].code = BPF_JMP|BPF_JA;
2077		    dst[extrajmps].k = off - extrajmps;
2078		}
2079		else
2080		    dst->jf = off;
2081	}
2082	return (1);
2083}
2084
2085
2086/*
2087 * Convert flowgraph intermediate representation to the
2088 * BPF array representation.  Set *lenp to the number of instructions.
2089 */
2090struct bpf_insn *
2091icode_to_fcode(root, lenp)
2092	struct block *root;
2093	int *lenp;
2094{
2095	int n;
2096	struct bpf_insn *fp;
2097
2098	/*
2099	 * Loop doing convert_code_r() until no branches remain
2100	 * with too-large offsets.
2101	 */
2102	while (1) {
2103	    unMarkAll();
2104	    n = *lenp = count_stmts(root);
2105
2106	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2107	    if (fp == NULL)
2108		    bpf_error("malloc");
2109	    memset((char *)fp, 0, sizeof(*fp) * n);
2110	    fstart = fp;
2111	    ftail = fp + n;
2112
2113	    unMarkAll();
2114	    if (convert_code_r(root))
2115		break;
2116	    free(fp);
2117	}
2118
2119	return fp;
2120}
2121
2122/*
2123 * Make a copy of a BPF program and put it in the "fcode" member of
2124 * a "pcap_t".
2125 *
2126 * If we fail to allocate memory for the copy, fill in the "errbuf"
2127 * member of the "pcap_t" with an error message, and return -1;
2128 * otherwise, return 0.
2129 */
2130int
2131install_bpf_program(pcap_t *p, struct bpf_program *fp)
2132{
2133	size_t prog_size;
2134
2135	/*
2136	 * Free up any already installed program.
2137	 */
2138	pcap_freecode(&p->fcode);
2139
2140	prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2141	p->fcode.bf_len = fp->bf_len;
2142	p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2143	if (p->fcode.bf_insns == NULL) {
2144		snprintf(p->errbuf, sizeof(p->errbuf),
2145			 "malloc: %s", pcap_strerror(errno));
2146		return (-1);
2147	}
2148	memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2149	return (0);
2150}
2151
2152#ifdef BDEBUG
2153static void
2154opt_dump(root)
2155	struct block *root;
2156{
2157	struct bpf_program f;
2158
2159	memset(bids, 0, sizeof bids);
2160	f.bf_insns = icode_to_fcode(root, &f.bf_len);
2161	bpf_dump(&f, 1);
2162	putchar('\n');
2163	free((char *)f.bf_insns);
2164}
2165#endif
2166